Index: projects/clang700-import/contrib/compiler-rt =================================================================== --- projects/clang700-import/contrib/compiler-rt (revision 337644) +++ projects/clang700-import/contrib/compiler-rt (revision 337645) Property changes on: projects/clang700-import/contrib/compiler-rt ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/compiler-rt/dist-release_70:r337313-337643 Index: projects/clang700-import/contrib/libc++ =================================================================== --- projects/clang700-import/contrib/libc++ (revision 337644) +++ projects/clang700-import/contrib/libc++ (revision 337645) Property changes on: projects/clang700-import/contrib/libc++ ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/libc++/dist-release_70:r337314-337643 Index: projects/clang700-import/contrib/llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/Analysis/InstructionSimplify.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Analysis/InstructionSimplify.cpp (revision 337645) @@ -1,5147 +1,5181 @@ //===- InstructionSimplify.cpp - Fold instruction operands ----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements routines for folding instructions into simpler forms // that do not require creating new instructions. This does constant folding // ("add i32 1, 1" -> "2") but can also handle non-constant operands, either // returning a constant ("and i32 %x, 0" -> "0") or an already existing value // ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been // simplified: This is usually true and assuming it simplifies the logic (if // they have not been simplified then results are correct but maybe suboptimal). // //===----------------------------------------------------------------------===// #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/KnownBits.h" #include using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "instsimplify" enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse); static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, const SimplifyQuery &, unsigned); static Value *SimplifyGEPInst(Type *, ArrayRef, const SimplifyQuery &, unsigned); static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal, Value *FalseVal) { BinaryOperator::BinaryOps BinOpCode; if (auto *BO = dyn_cast(Cond)) BinOpCode = BO->getOpcode(); else return nullptr; CmpInst::Predicate ExpectedPred, Pred1, Pred2; if (BinOpCode == BinaryOperator::Or) { ExpectedPred = ICmpInst::ICMP_NE; } else if (BinOpCode == BinaryOperator::And) { ExpectedPred = ICmpInst::ICMP_EQ; } else return nullptr; // %A = icmp eq %TV, %FV // %B = icmp eq %X, %Y (and one of these is a select operand) // %C = and %A, %B // %D = select %C, %TV, %FV // --> // %FV // %A = icmp ne %TV, %FV // %B = icmp ne %X, %Y (and one of these is a select operand) // %C = or %A, %B // %D = select %C, %TV, %FV // --> // %TV Value *X, *Y; if (!match(Cond, m_c_BinOp(m_c_ICmp(Pred1, m_Specific(TrueVal), m_Specific(FalseVal)), m_ICmp(Pred2, m_Value(X), m_Value(Y)))) || Pred1 != Pred2 || Pred1 != ExpectedPred) return nullptr; if (X == TrueVal || X == FalseVal || Y == TrueVal || Y == FalseVal) return BinOpCode == BinaryOperator::Or ? TrueVal : FalseVal; return nullptr; } /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. static Constant *getFalse(Type *Ty) { return ConstantInt::getFalse(Ty); } /// For a boolean type or a vector of boolean type, return true or a vector /// with every element true. static Constant *getTrue(Type *Ty) { return ConstantInt::getTrue(Ty); } /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { CmpInst *Cmp = dyn_cast(V); if (!Cmp) return false; CmpInst::Predicate CPred = Cmp->getPredicate(); Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1); if (CPred == Pred && CLHS == LHS && CRHS == RHS) return true; return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS && CRHS == LHS; } /// Does the given value dominate the specified phi node? static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast(V); if (!I) // Arguments and constants dominate all instructions. return true; // If we are processing instructions (and/or basic blocks) that have not been // fully added to a function, the parent nodes may still be null. Simply // return the conservative answer in these cases. if (!I->getParent() || !P->getParent() || !I->getFunction()) return false; // If we have a DominatorTree then do a precise test. if (DT) return DT->dominates(I, P); // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. if (I->getParent() == &I->getFunction()->getEntryBlock() && !isa(I)) return true; return false; } /// Simplify "A op (B op' C)" by distributing op over op', turning it into /// "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is /// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; // Check whether the expression has the form "(A op' B) op C". if (BinaryOperator *Op0 = dyn_cast(LHS)) if (Op0->getOpcode() == OpcodeToExpand) { // It does! Try turning it into "(A op C) op' (B op C)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; // Do "A op C" and "B op C" both simplify? if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) && L == B && R == A)) { ++NumExpand; return LHS; } // Otherwise return "L op' R" if it simplifies. if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } } } // Check whether the expression has the form "A op (B op' C)". if (BinaryOperator *Op1 = dyn_cast(RHS)) if (Op1->getOpcode() == OpcodeToExpand) { // It does! Try turning it into "(A op B) op' (A op C)". Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); // Do "A op B" and "A op C" both simplify? if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) && L == C && R == B)) { ++NumExpand; return RHS; } // Otherwise return "L op' R" if it simplifies. if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } } } return nullptr; } /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely. if (Op0 && Op0->getOpcode() == Opcode) { Value *A = Op0->getOperand(0); Value *B = Op0->getOperand(1); Value *C = RHS; // Does "B op C" simplify? if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // It does! Return "A op V" if it simplifies or is already available. // If V equals B then "A op V" is just the LHS. if (V == B) return LHS; // Otherwise return "A op V" if it simplifies. if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { ++NumReassoc; return W; } } } // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely. if (Op1 && Op1->getOpcode() == Opcode) { Value *A = LHS; Value *B = Op1->getOperand(0); Value *C = Op1->getOperand(1); // Does "A op B" simplify? if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { // It does! Return "V op C" if it simplifies or is already available. // If V equals B then "V op C" is just the RHS. if (V == B) return RHS; // Otherwise return "V op C" if it simplifies. if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { ++NumReassoc; return W; } } } // The remaining transforms require commutativity as well as associativity. if (!Instruction::isCommutative(Opcode)) return nullptr; // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. if (Op0 && Op0->getOpcode() == Opcode) { Value *A = Op0->getOperand(0); Value *B = Op0->getOperand(1); Value *C = RHS; // Does "C op A" simplify? if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "V op B" if it simplifies or is already available. // If V equals A then "V op B" is just the LHS. if (V == A) return LHS; // Otherwise return "V op B" if it simplifies. if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { ++NumReassoc; return W; } } } // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely. if (Op1 && Op1->getOpcode() == Opcode) { Value *A = LHS; Value *B = Op1->getOperand(0); Value *C = Op1->getOperand(1); // Does "C op A" simplify? if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "B op V" if it simplifies or is already available. // If V equals C then "B op V" is just the RHS. if (V == C) return RHS; // Otherwise return "B op V" if it simplifies. if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { ++NumReassoc; return W; } } } return nullptr; } /// In the case of a binary operation with a select instruction as an operand, /// try to simplify the binop by seeing whether evaluating it on both branches /// of the select results in the same value. Returns the common value if so, /// otherwise returns null. static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; SelectInst *SI; if (isa(LHS)) { SI = cast(LHS); } else { assert(isa(RHS) && "No select instruction operand!"); SI = cast(RHS); } // Evaluate the BinOp on the true and false branches of the select. Value *TV; Value *FV; if (SI == LHS) { TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); } else { TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); } // If they simplified to the same value, then return the common value. // If they both failed to simplify then return null. if (TV == FV) return TV; // If one branch simplified to undef, return the other one. if (TV && isa(TV)) return FV; if (FV && isa(FV)) return TV; // If applying the operation did not change the true and false select values, // then the result of the binop is the select itself. if (TV == SI->getTrueValue() && FV == SI->getFalseValue()) return SI; // If one branch simplified and the other did not, and the simplified // value is equal to the unsimplified one, return the simplified value. // For example, select (cond, X, X & Z) & Z -> X & Z. if ((FV && !TV) || (TV && !FV)) { // Check that the simplified value has the form "X op Y" where "op" is the // same as the original operation. Instruction *Simplified = dyn_cast(FV ? FV : TV); if (Simplified && Simplified->getOpcode() == unsigned(Opcode)) { // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". // We already know that "op" is the same as for the simplified value. See // if the operands match too. If so, return the simplified value. Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue(); Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS; Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch; if (Simplified->getOperand(0) == UnsimplifiedLHS && Simplified->getOperand(1) == UnsimplifiedRHS) return Simplified; if (Simplified->isCommutative() && Simplified->getOperand(1) == UnsimplifiedLHS && Simplified->getOperand(0) == UnsimplifiedRHS) return Simplified; } } return nullptr; } /// In the case of a comparison with a select instruction, try to simplify the /// comparison by seeing whether both branches of the select result in the same /// value. Returns the common value if so, otherwise returns null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; // Make sure the select is on the LHS. if (!isa(LHS)) { std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } assert(isa(LHS) && "Not comparing with a select instruction!"); SelectInst *SI = cast(LHS); Value *Cond = SI->getCondition(); Value *TV = SI->getTrueValue(); Value *FV = SI->getFalseValue(); // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); if (TCmp == Cond) { // It not only simplified, it simplified to the select condition. Replace // it with 'true'. TCmp = getTrue(Cond->getType()); } else if (!TCmp) { // It didn't simplify. However if "cmp TV, RHS" is equal to the select // condition then we can replace it with 'true'. Otherwise give up. if (!isSameCompare(Cond, Pred, TV, RHS)) return nullptr; TCmp = getTrue(Cond->getType()); } // Does "cmp FV, RHS" simplify? Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); if (FCmp == Cond) { // It not only simplified, it simplified to the select condition. Replace // it with 'false'. FCmp = getFalse(Cond->getType()); } else if (!FCmp) { // It didn't simplify. However if "cmp FV, RHS" is equal to the select // condition then we can replace it with 'false'. Otherwise give up. if (!isSameCompare(Cond, Pred, FV, RHS)) return nullptr; FCmp = getFalse(Cond->getType()); } // If both sides simplified to the same value, then use it as the result of // the original comparison. if (TCmp == FCmp) return TCmp; // The remaining cases only make sense if the select condition has the same // type as the result of the comparison, so bail out if this is not so. if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) return nullptr; // If the false value simplified to false, then the result of the compare // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". if (match(FCmp, m_Zero())) if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) return V; // If the true value simplified to true, then the result of the compare // is equal to "Cond || FCmp". if (match(TCmp, m_One())) if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) return V; // Finally, if the false value simplified to true and the true value to // false, then the result of the compare is equal to "!Cond". if (match(FCmp, m_One()) && match(TCmp, m_Zero())) if (Value *V = SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse)) return V; return nullptr; } /// In the case of a binary operation with an operand that is a PHI instruction, /// try to simplify the binop by seeing whether evaluating it on the incoming /// phi values yields the same result for every value. If so returns the common /// value, otherwise returns null. static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; PHINode *PI; if (isa(LHS)) { PI = cast(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!valueDominatesPHI(RHS, PI, Q.DT)) return nullptr; } else { assert(isa(RHS) && "No PHI instruction operand!"); PI = cast(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. if (!valueDominatesPHI(LHS, PI, Q.DT)) return nullptr; } // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; for (Value *Incoming : PI->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = PI == LHS ? SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) return nullptr; CommonValue = V; } return CommonValue; } /// In the case of a comparison with a PHI instruction, try to simplify the /// comparison by seeing whether comparing with all of the incoming phi values /// yields the same result every time. If so returns the common result, /// otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; // Make sure the phi is on the LHS. if (!isa(LHS)) { std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } assert(isa(LHS) && "Not comparing with a phi instruction!"); PHINode *PI = cast(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!valueDominatesPHI(RHS, PI, Q.DT)) return nullptr; // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; for (Value *Incoming : PI->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) return nullptr; CommonValue = V; } return CommonValue; } static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, Value *&Op0, Value *&Op1, const SimplifyQuery &Q) { if (auto *CLHS = dyn_cast(Op0)) { if (auto *CRHS = dyn_cast(Op1)) return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); // Canonicalize the constant to the RHS if this is a commutative operation. if (Instruction::isCommutative(Opcode)) std::swap(Op0, Op1); } return nullptr; } /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; // X + undef -> undef if (match(Op1, m_Undef())) return Op1; // X + 0 -> X if (match(Op1, m_Zero())) return Op0; // If two operands are negative, return 0. if (isKnownNegation(Op0, Op1)) return Constant::getNullValue(Op0->getType()); // X + (Y - X) -> Y // (Y - X) + X -> Y // Eg: X + -X -> 0 Value *Y = nullptr; if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) return Y; // X + ~X -> -1 since ~X = -X-1 Type *Ty = Op0->getType(); if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Ty); // add nsw/nuw (xor Y, signmask), signmask --> Y // The no-wrapping add guarantees that the top bit will be set by the add. // Therefore, the xor must be clearing the already set sign bit of Y. if ((IsNSW || IsNUW) && match(Op1, m_SignMask()) && match(Op0, m_Xor(m_Value(Y), m_SignMask()))) return Y; // add nuw %x, -1 -> -1, because %x can only be 0. if (IsNUW && match(Op1, m_AllOnes())) return Op1; // Which is -1. /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse)) return V; // Threading Add over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A + select(cond, B, C)" means evaluating // "A+B" and "A+C" and seeing if they are equal; but they are equal if and // only if B and C are equal. If B and C are equal then (since we assume // that operands have already been simplified) "select(cond, B, C)" should // have been simplified to the common value of B and C already. Analysing // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. return nullptr; } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW, const SimplifyQuery &Query) { return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit); } /// Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. /// /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { assert(V->getType()->isPtrOrPtrVectorTy()); Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet Visited; Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { if ((!AllowNonInbounds && !GEP->isInBounds()) || !GEP->accumulateConstantOffset(DL, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->isInterposable()) break; V = GA->getAliasee(); } else { if (auto CS = CallSite(V)) if (Value *RV = CS.getReturnedArgOperand()) { V = RV; continue; } break; } assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) return ConstantVector::getSplat(V->getType()->getVectorNumElements(), OffsetIntPtr); return OffsetIntPtr; } /// Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. if (LHS != RHS) return nullptr; // Otherwise, the difference of LHS - RHS can be computed as: // LHS - RHS // = (LHSOffset + Base) - (RHSOffset + Base) // = LHSOffset - RHSOffset return ConstantExpr::getSub(LHSOffset, RHSOffset); } /// Given operands for a Sub, see if we can fold the result. /// If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) return C; // X - undef -> undef // undef - X -> undef if (match(Op0, m_Undef()) || match(Op1, m_Undef())) return UndefValue::get(Op0->getType()); // X - 0 -> X if (match(Op1, m_Zero())) return Op0; // X - X -> 0 if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); // Is this a negation? if (match(Op0, m_Zero())) { // 0 - X -> 0 if the sub is NUW. if (isNUW) return Constant::getNullValue(Op0->getType()); KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. if (isNSW) return Constant::getNullValue(Op0->getType()); // 0 - X -> X if X is 0 or the minimum signed value. return Op1; } } // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X Value *X = nullptr, *Y = nullptr, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) // It does! Now see if "X + V" simplifies. if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "Y + V" simplifies. if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } } // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies. // For example, X - (X + 1) -> -1 X = Op0; if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) // See if "V === X - Y" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) // It does! Now see if "V - Z" simplifies. if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "V - Y" simplifies. if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } } // Z - (X - Y) -> (Z - X) + Y if everything simplifies. // For example, X - (X - Y) -> Y. Z = Op0; if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) // See if "V === Z - X" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) // It does! Now see if "V + Y" simplifies. if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies. if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) && match(Op1, m_Trunc(m_Value(Y)))) if (X->getType() == Y->getType()) // See if "V === X - Y" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) // It does! Now see if "trunc V" simplifies. if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(), Q, MaxRecurse - 1)) // It does, return the simplified "trunc V". return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) if (Constant *Result = computePointerDifference(Q.DL, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Threading Sub over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A - select(cond, B, C)" means evaluating // "A-B" and "A-C" and seeing if they are equal; but they are equal if and // only if B and C are equal. If B and C are equal then (since we assume // that operands have already been simplified) "select(cond, B, C)" should // have been simplified to the common value of B and C already. Analysing // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. return nullptr; } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q) { return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) return C; // X * undef -> 0 // X * 0 -> 0 if (match(Op1, m_CombineOr(m_Undef(), m_Zero()))) return Constant::getNullValue(Op0->getType()); // X * 1 -> X if (match(Op1, m_One())) return Op0; // (X / Y) * Y -> X if the division is exact. Value *X = nullptr; if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) return X; // i1 mul -> and. if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; return nullptr; } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); } /// Check for common or similar folds of integer division or integer remainder. /// This applies to all 4 opcodes (sdiv/udiv/srem/urem). static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { Type *Ty = Op0->getType(); // X / undef -> undef // X % undef -> undef if (match(Op1, m_Undef())) return Op1; // X / 0 -> undef // X % 0 -> undef // We don't need to preserve faults! if (match(Op1, m_Zero())) return UndefValue::get(Ty); // If any element of a constant divisor vector is zero or undef, the whole op // is undef. auto *Op1C = dyn_cast(Op1); if (Op1C && Ty->isVectorTy()) { unsigned NumElts = Ty->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = Op1C->getAggregateElement(i); if (Elt && (Elt->isNullValue() || isa(Elt))) return UndefValue::get(Ty); } } // undef / X -> 0 // undef % X -> 0 if (match(Op0, m_Undef())) return Constant::getNullValue(Ty); // 0 / X -> 0 // 0 % X -> 0 if (match(Op0, m_Zero())) return Constant::getNullValue(Op0->getType()); // X / X -> 1 // X % X -> 0 if (Op0 == Op1) return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty); // X / 1 -> X // X % 1 -> 0 // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1. Value *X; if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1) || (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))) return IsDiv ? Op0 : Constant::getNullValue(Ty); return nullptr; } /// Given a predicate and two operands, return true if the comparison is true. /// This is a helper for div/rem simplification where we return some other value /// when we can prove a relationship between the operands. static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); Constant *C = dyn_cast_or_null(V); return (C && C->isAllOnesValue()); } /// Return true if we can simplify X / Y to 0. Remainder can adapt that answer /// to simplify X % Y to X. static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q, unsigned MaxRecurse, bool IsSigned) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return false; if (IsSigned) { // |X| / |Y| --> 0 // // We require that 1 operand is a simple constant. That could be extended to // 2 variables if we computed the sign bit for each. // // Make sure that a constant is not the minimum signed value because taking // the abs() of that is undefined. Type *Ty = X->getType(); const APInt *C; if (match(X, m_APInt(C)) && !C->isMinSignedValue()) { // Is the variable divisor magnitude always greater than the constant // dividend magnitude? // |Y| > |C| --> Y < -abs(C) or Y > abs(C) Constant *PosDividendC = ConstantInt::get(Ty, C->abs()); Constant *NegDividendC = ConstantInt::get(Ty, -C->abs()); if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) || isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse)) return true; } if (match(Y, m_APInt(C))) { // Special-case: we can't take the abs() of a minimum signed value. If // that's the divisor, then all we have to do is prove that the dividend // is also not the minimum signed value. if (C->isMinSignedValue()) return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse); // Is the variable dividend magnitude always less than the constant // divisor magnitude? // |X| < |C| --> X > -abs(C) and X < abs(C) Constant *PosDivisorC = ConstantInt::get(Ty, C->abs()); Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs()); if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) && isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse)) return true; } return false; } // IsSigned == false. // Is the dividend unsigned less than the divisor? return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse); } /// These are simplifications common to SDiv and UDiv. static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; if (Value *V = simplifyDivRem(Op0, Op1, true)) return V; bool IsSigned = Opcode == Instruction::SDiv; // (X * Y) / Y -> X if the multiplication does not overflow. Value *X; if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) { auto *Mul = cast(Op0); // If the Mul does not overflow, then we are good to go. if ((IsSigned && Mul->hasNoSignedWrap()) || (!IsSigned && Mul->hasNoUnsignedWrap())) return X; // If X has the form X = A / Y, then X * Y cannot overflow. if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) || (!IsSigned && match(X, m_UDiv(m_Value(), m_Specific(Op1))))) return X; } // (X rem Y) / Y -> 0 if ((IsSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || (!IsSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) return Constant::getNullValue(Op0->getType()); // (X /u C1) /u C2 -> 0 if C1 * C2 overflow ConstantInt *C1, *C2; if (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) && match(Op1, m_ConstantInt(C2))) { bool Overflow; (void)C1->getValue().umul_ov(C2->getValue(), Overflow); if (Overflow) return Constant::getNullValue(Op0->getType()); } // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned)) return Constant::getNullValue(Op0->getType()); return nullptr; } /// These are simplifications common to SRem and URem. static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; if (Value *V = simplifyDivRem(Op0, Op1, false)) return V; // (X % Y) % Y -> X % Y if ((Opcode == Instruction::SRem && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || (Opcode == Instruction::URem && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) return Op0; // (X << Y) % X -> 0 if ((Opcode == Instruction::SRem && match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) || (Opcode == Instruction::URem && match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))) return Constant::getNullValue(Op0->getType()); // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If X / Y == 0, then X % Y == X. if (isDivZero(Op0, Op1, Q, MaxRecurse, Opcode == Instruction::SRem)) return Op0; return nullptr; } /// Given operands for an SDiv, see if we can fold the result. /// If not, this returns null. static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { // If two operands are negated and no signed overflow, return -1. if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true)) return Constant::getAllOnesValue(Op0->getType()); return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse); } Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a UDiv, see if we can fold the result. /// If not, this returns null. static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse); } Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for an SRem, see if we can fold the result. /// If not, this returns null. static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { // If the divisor is 0, the result is undefined, so assume the divisor is -1. // srem Op0, (sext i1 X) --> srem Op0, -1 --> 0 Value *X; if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) return ConstantInt::getNullValue(Op0->getType()); // If the two operands are negated, return 0. if (isKnownNegation(Op0, Op1)) return ConstantInt::getNullValue(Op0->getType()); return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse); } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a URem, see if we can fold the result. /// If not, this returns null. static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse); } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } /// Returns true if a shift by \c Amount always yields undef. static bool isUndefShift(Value *Amount) { Constant *C = dyn_cast(Amount); if (!C) return false; // X shift by undef -> undef because it may shift by the bitwidth. if (isa(C)) return true; // Shifting by the bitwidth or more is undefined. if (ConstantInt *CI = dyn_cast(C)) if (CI->getValue().getLimitedValue() >= CI->getType()->getScalarSizeInBits()) return true; // If all lanes of a vector shift are undefined the whole shift is. if (isa(C) || isa(C)) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) if (!isUndefShift(C->getAggregateElement(I))) return false; return true; } return false; } /// Given operands for an Shl, LShr or AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; // 0 shift by X -> 0 if (match(Op0, m_Zero())) return Constant::getNullValue(Op0->getType()); // X shift by 0 -> X // Shift-by-sign-extended bool must be shift-by-0 because shift-by-all-ones // would be poison. Value *X; if (match(Op1, m_Zero()) || (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))) return Op0; // Fold undefined shifts. if (isUndefShift(Op1)) return UndefValue::get(Op0->getType()); // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If any bits in the shift amount make that value greater than or equal to // the number of bits in the type, the shift is undefined. KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (Known.One.getLimitedValue() >= Known.getBitWidth()) return UndefValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth()); if (Known.countMinTrailingZeros() >= NumValidShiftBits) return Op0; return nullptr; } /// Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // X >> X -> 0 if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); // undef >> X -> 0 // undef >> X -> undef (if it's exact) if (match(Op0, m_Undef())) return isExact ? Op0 : Constant::getNullValue(Op0->getType()); // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); if (Op0Known.One[0]) return Op0; } return nullptr; } /// Given operands for an Shl, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; // undef << X -> 0 // undef << X -> undef if (if it's NSW/NUW) if (match(Op0, m_Undef())) return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType()); // (X >> A) << A -> X Value *X; if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; // shl nuw i8 C, %x -> C iff C has sign bit set. if (isNUW && match(Op0, m_Negative())) return Op0; // NOTE: could use computeKnownBits() / LazyValueInfo, // but the cost-benefit analysis suggests it isn't worth it. return nullptr; } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const SimplifyQuery &Q) { return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } /// Given operands for an LShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; // (X << A) >> A -> X Value *X; if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) return X; // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A. // We can return X as we do in the above case since OR alters no bits in X. // SimplifyDemandedBits in InstCombine can do more general optimization for // bit manipulation. This pattern aims to provide opportunities for other // optimizers by supporting a simple but common case in InstSimplify. Value *Y; const APInt *ShRAmt, *ShLAmt; if (match(Op1, m_APInt(ShRAmt)) && match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) && *ShRAmt == *ShLAmt) { const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); const unsigned Width = Op0->getType()->getScalarSizeInBits(); const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); if (EffWidthY <= ShRAmt->getZExtValue()) return X; } return nullptr; } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q) { return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); } /// Given operands for an AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; // all ones >>a X -> -1 // Do not return Op0 because it may contain undef elements if it's a vector. if (match(Op0, m_AllOnes())) return Constant::getAllOnesValue(Op0->getType()); // (X << A) >> A -> X Value *X; if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1)))) return X; // Arithmetic shifting an all-sign-bit value is a no-op. unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return Op0; return nullptr; } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const SimplifyQuery &Q) { return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); } /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst *UnsignedICmp, bool IsAnd) { Value *X, *Y; ICmpInst::Predicate EqPred; if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) || !ICmpInst::isEquality(EqPred)) return nullptr; ICmpInst::Predicate UnsignedPred; if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && ICmpInst::isUnsigned(UnsignedPred)) ; else if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Specific(Y), m_Value(X))) && ICmpInst::isUnsigned(UnsignedPred)) UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); else return nullptr; // X < Y && Y != 0 --> X < Y // X < Y || Y != 0 --> Y != 0 if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) return IsAnd ? UnsignedICmp : ZeroICmp; // X >= Y || Y != 0 --> true // X >= Y || Y == 0 --> X >= Y if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) { if (EqPred == ICmpInst::ICMP_NE) return getTrue(UnsignedICmp->getType()); return UnsignedICmp; } // X < Y && Y == 0 --> false if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && IsAnd) return getFalse(UnsignedICmp->getType()); return nullptr; } /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; Value *A ,*B; if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) return nullptr; // We have (icmp Pred0, A, B) & (icmp Pred1, A, B). // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we // can eliminate Op1 from this 'and'. if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)) return Op0; // Check for any combination of predicates that are guaranteed to be disjoint. if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) || (Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) || (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT) || (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)) return getFalse(Op0->getType()); return nullptr; } /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { ICmpInst::Predicate Pred0, Pred1; Value *A ,*B; if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) return nullptr; // We have (icmp Pred0, A, B) | (icmp Pred1, A, B). // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we // can eliminate Op0 from this 'or'. if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)) return Op1; // Check for any combination of predicates that cover the entire range of // possibilities. if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) || (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) || (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) || (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE)) return getTrue(Op0->getType()); return nullptr; } /// Test if a pair of compares with a shared operand and 2 constants has an /// empty set intersection, full set union, or if one compare is a superset of /// the other. static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd) { // Look for this pattern: {and/or} (icmp X, C0), (icmp X, C1)). if (Cmp0->getOperand(0) != Cmp1->getOperand(0)) return nullptr; const APInt *C0, *C1; if (!match(Cmp0->getOperand(1), m_APInt(C0)) || !match(Cmp1->getOperand(1), m_APInt(C1))) return nullptr; auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0); auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1); // For and-of-compares, check if the intersection is empty: // (icmp X, C0) && (icmp X, C1) --> empty set --> false if (IsAnd && Range0.intersectWith(Range1).isEmptySet()) return getFalse(Cmp0->getType()); // For or-of-compares, check if the union is full: // (icmp X, C0) || (icmp X, C1) --> full set --> true if (!IsAnd && Range0.unionWith(Range1).isFullSet()) return getTrue(Cmp0->getType()); // Is one range a superset of the other? // If this is and-of-compares, take the smaller set: // (icmp sgt X, 4) && (icmp sgt X, 42) --> icmp sgt X, 42 // If this is or-of-compares, take the larger set: // (icmp sgt X, 4) || (icmp sgt X, 42) --> icmp sgt X, 4 if (Range0.contains(Range1)) return IsAnd ? Cmp1 : Cmp0; if (Range1.contains(Range0)) return IsAnd ? Cmp0 : Cmp1; return nullptr; } static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd) { ICmpInst::Predicate P0 = Cmp0->getPredicate(), P1 = Cmp1->getPredicate(); if (!match(Cmp0->getOperand(1), m_Zero()) || !match(Cmp1->getOperand(1), m_Zero()) || P0 != P1) return nullptr; if ((IsAnd && P0 != ICmpInst::ICMP_NE) || (!IsAnd && P1 != ICmpInst::ICMP_EQ)) return nullptr; // We have either "(X == 0 || Y == 0)" or "(X != 0 && Y != 0)". Value *X = Cmp0->getOperand(0); Value *Y = Cmp1->getOperand(0); // If one of the compares is a masked version of a (not) null check, then // that compare implies the other, so we eliminate the other. Optionally, look // through a pointer-to-int cast to match a null check of a pointer type. // (X == 0) || (([ptrtoint] X & ?) == 0) --> ([ptrtoint] X & ?) == 0 // (X == 0) || ((? & [ptrtoint] X) == 0) --> (? & [ptrtoint] X) == 0 // (X != 0) && (([ptrtoint] X & ?) != 0) --> ([ptrtoint] X & ?) != 0 // (X != 0) && ((? & [ptrtoint] X) != 0) --> (? & [ptrtoint] X) != 0 if (match(Y, m_c_And(m_Specific(X), m_Value())) || match(Y, m_c_And(m_PtrToInt(m_Specific(X)), m_Value()))) return Cmp1; // (([ptrtoint] Y & ?) == 0) || (Y == 0) --> ([ptrtoint] Y & ?) == 0 // ((? & [ptrtoint] Y) == 0) || (Y == 0) --> (? & [ptrtoint] Y) == 0 // (([ptrtoint] Y & ?) != 0) && (Y != 0) --> ([ptrtoint] Y & ?) != 0 // ((? & [ptrtoint] Y) != 0) && (Y != 0) --> (? & [ptrtoint] Y) != 0 if (match(X, m_c_And(m_Specific(Y), m_Value())) || match(X, m_c_And(m_PtrToInt(m_Specific(Y)), m_Value()))) return Cmp0; return nullptr; } static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) & (icmp V, C0) ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; Value *V; if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) return nullptr; if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))) return nullptr; auto *AddInst = cast(Op0->getOperand(0)); if (AddInst->getOperand(1) != Op1->getOperand(1)) return nullptr; Type *ITy = Op0->getType(); bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); const APInt Delta = *C1 - *C0; if (C0->isStrictlyPositive()) { if (Delta == 2) { if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT) return getFalse(ITy); if (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT && isNSW) return getFalse(ITy); } if (Delta == 1) { if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_SGT) return getFalse(ITy); if (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGT && isNSW) return getFalse(ITy); } } if (C0->getBoolValue() && isNUW) { if (Delta == 2) if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT) return getFalse(ITy); if (Delta == 1) if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGT) return getFalse(ITy); } return nullptr; } static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) return X; if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) return X; if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) return X; if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0)) return X; if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) return X; if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1)) return X; if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0)) return X; return nullptr; } static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) | (icmp V, C0) ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; Value *V; if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) return nullptr; if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))) return nullptr; auto *AddInst = cast(Op0->getOperand(0)); if (AddInst->getOperand(1) != Op1->getOperand(1)) return nullptr; Type *ITy = Op0->getType(); bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); const APInt Delta = *C1 - *C0; if (C0->isStrictlyPositive()) { if (Delta == 2) { if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE) return getTrue(ITy); if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW) return getTrue(ITy); } if (Delta == 1) { if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE) return getTrue(ITy); if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW) return getTrue(ITy); } } if (C0->getBoolValue() && isNUW) { if (Delta == 2) if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE) return getTrue(ITy); if (Delta == 1) if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE) return getTrue(ITy); } return nullptr; } static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) return X; if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) return X; if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) return X; if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0)) return X; if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) return X; if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1)) return X; if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0)) return X; return nullptr; } static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) { Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); if (LHS0->getType() != RHS0->getType()) return nullptr; FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) { // (fcmp ord NNAN, X) & (fcmp ord X, Y) --> fcmp ord X, Y // (fcmp ord NNAN, X) & (fcmp ord Y, X) --> fcmp ord Y, X // (fcmp ord X, NNAN) & (fcmp ord X, Y) --> fcmp ord X, Y // (fcmp ord X, NNAN) & (fcmp ord Y, X) --> fcmp ord Y, X // (fcmp uno NNAN, X) | (fcmp uno X, Y) --> fcmp uno X, Y // (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X // (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y // (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X if ((isKnownNeverNaN(LHS0) && (LHS1 == RHS0 || LHS1 == RHS1)) || (isKnownNeverNaN(LHS1) && (LHS0 == RHS0 || LHS0 == RHS1))) return RHS; // (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y // (fcmp ord Y, X) & (fcmp ord NNAN, X) --> fcmp ord Y, X // (fcmp ord X, Y) & (fcmp ord X, NNAN) --> fcmp ord X, Y // (fcmp ord Y, X) & (fcmp ord X, NNAN) --> fcmp ord Y, X // (fcmp uno X, Y) | (fcmp uno NNAN, X) --> fcmp uno X, Y // (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X // (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y // (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X if ((isKnownNeverNaN(RHS0) && (RHS1 == LHS0 || RHS1 == LHS1)) || (isKnownNeverNaN(RHS1) && (RHS0 == LHS0 || RHS0 == LHS1))) return LHS; } return nullptr; } static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) { // Look through casts of the 'and' operands to find compares. auto *Cast0 = dyn_cast(Op0); auto *Cast1 = dyn_cast(Op1); if (Cast0 && Cast1 && Cast0->getOpcode() == Cast1->getOpcode() && Cast0->getSrcTy() == Cast1->getSrcTy()) { Op0 = Cast0->getOperand(0); Op1 = Cast1->getOperand(0); } Value *V = nullptr; auto *ICmp0 = dyn_cast(Op0); auto *ICmp1 = dyn_cast(Op1); if (ICmp0 && ICmp1) V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1) : simplifyOrOfICmps(ICmp0, ICmp1); auto *FCmp0 = dyn_cast(Op0); auto *FCmp1 = dyn_cast(Op1); if (FCmp0 && FCmp1) V = simplifyAndOrOfFCmps(FCmp0, FCmp1, IsAnd); if (!V) return nullptr; if (!Cast0) return V; // If we looked through casts, we can only handle a constant simplification // because we are not allowed to create a cast instruction here. if (auto *C = dyn_cast(V)) return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType()); return nullptr; } /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) return C; // X & undef -> 0 if (match(Op1, m_Undef())) return Constant::getNullValue(Op0->getType()); // X & X = X if (Op0 == Op1) return Op0; // X & 0 = 0 if (match(Op1, m_Zero())) return Constant::getNullValue(Op0->getType()); // X & -1 = X if (match(Op1, m_AllOnes())) return Op0; // A & ~A = ~A & A = 0 if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A if (match(Op0, m_c_Or(m_Specific(Op1), m_Value()))) return Op1; // A & (A | ?) = A if (match(Op1, m_c_Or(m_Specific(Op0), m_Value()))) return Op0; // A mask that only clears known zeros of a shifted value is a no-op. Value *X; const APInt *Mask; const APInt *ShAmt; if (match(Op1, m_APInt(Mask))) { // If all bits in the inverted and shifted mask are clear: // and (shl X, ShAmt), Mask --> shl X, ShAmt if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) && (~(*Mask)).lshr(*ShAmt).isNullValue()) return Op0; // If all bits in the inverted and shifted mask are clear: // and (lshr X, ShAmt), Mask --> lshr X, ShAmt if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) && (~(*Mask)).shl(*ShAmt).isNullValue()) return Op0; } // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) return Op0; if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT)) return Op1; } if (Value *V = simplifyAndOrOfCmps(Op0, Op1, true)) return V; // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; + // Assuming the effective width of Y is not larger than A, i.e. all bits + // from X and Y are disjoint in (X << A) | Y, + // if the mask of this AND op covers all bits of X or Y, while it covers + // no bits from the other, we can bypass this AND op. E.g., + // ((X << A) | Y) & Mask -> Y, + // if Mask = ((1 << effective_width_of(Y)) - 1) + // ((X << A) | Y) & Mask -> X << A, + // if Mask = ((1 << effective_width_of(X)) - 1) << A + // SimplifyDemandedBits in InstCombine can optimize the general case. + // This pattern aims to help other passes for a common case. + Value *Y, *XShifted; + if (match(Op1, m_APInt(Mask)) && + match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)), + m_Value(XShifted)), + m_Value(Y)))) { + const unsigned ShftCnt = ShAmt->getZExtValue(); + const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + const unsigned Width = Op0->getType()->getScalarSizeInBits(); + const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); + if (EffWidthY <= ShftCnt) { + const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, + Q.DT); + const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros(); + const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY); + const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt; + // If the mask is extracting all bits from X or Y as is, we can skip + // this AND op. + if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask)) + return Y; + if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask)) + return XShifted; + } + } + return nullptr; } Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for an Or, see if we can fold the result. /// If not, this returns null. static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) return C; // X | undef -> -1 // X | -1 = -1 // Do not return Op1 because it may contain undef elements if it's a vector. if (match(Op1, m_Undef()) || match(Op1, m_AllOnes())) return Constant::getAllOnesValue(Op0->getType()); // X | X = X // X | 0 = X if (Op0 == Op1 || match(Op1, m_Zero())) return Op0; // A | ~A = ~A | A = -1 if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); // (A & ?) | A = A if (match(Op0, m_c_And(m_Specific(Op1), m_Value()))) return Op1; // A | (A & ?) = A if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) return Op0; // ~(A & ?) | A = -1 if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value())))) return Constant::getAllOnesValue(Op1->getType()); // A | ~(A & ?) = -1 if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value())))) return Constant::getAllOnesValue(Op0->getType()); Value *A, *B; // (A & ~B) | (A ^ B) -> (A ^ B) // (~B & A) | (A ^ B) -> (A ^ B) // (A & ~B) | (B ^ A) -> (B ^ A) // (~B & A) | (B ^ A) -> (B ^ A) if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) return Op1; // Commute the 'or' operands. // (A ^ B) | (A & ~B) -> (A ^ B) // (A ^ B) | (~B & A) -> (A ^ B) // (B ^ A) | (A & ~B) -> (B ^ A) // (B ^ A) | (~B & A) -> (B ^ A) if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) return Op0; // (A & B) | (~A ^ B) -> (~A ^ B) // (B & A) | (~A ^ B) -> (~A ^ B) // (A & B) | (B ^ ~A) -> (B ^ ~A) // (B & A) | (B ^ ~A) -> (B ^ ~A) if (match(Op0, m_And(m_Value(A), m_Value(B))) && (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) return Op1; // (~A ^ B) | (A & B) -> (~A ^ B) // (~A ^ B) | (B & A) -> (~A ^ B) // (B ^ ~A) | (A & B) -> (B ^ ~A) // (B ^ ~A) | (B & A) -> (B ^ ~A) if (match(Op1, m_And(m_Value(A), m_Value(B))) && (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) return Op0; if (Value *V = simplifyAndOrOfCmps(Op0, Op1, false)) return V; // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // (A & C1)|(B & C2) const APInt *C1, *C2; if (match(Op0, m_And(m_Value(A), m_APInt(C1))) && match(Op1, m_And(m_Value(B), m_APInt(C2)))) { if (*C1 == ~*C2) { // (A & C1)|(B & C2) // If we have: ((V + N) & C1) | (V & C2) // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 // replace with V+N. Value *N; if (C2->isMask() && // C2 == 0+1+ match(A, m_c_Add(m_Specific(B), m_Value(N)))) { // Add commutes, try both ways. if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. if (C1->isMask() && match(B, m_c_Add(m_Specific(A), m_Value(N)))) { // Add commutes, try both ways. if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; } } } // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; return nullptr; } Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a Xor, see if we can fold the result. /// If not, this returns null. static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) return C; // A ^ undef -> undef if (match(Op1, m_Undef())) return Op1; // A ^ 0 = A if (match(Op1, m_Zero())) return Op0; // A ^ A = 0 if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); // A ^ ~A = ~A ^ A = -1 if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) return V; // Threading Xor over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A ^ select(cond, B, C)" means evaluating // "A^B" and "A^C" and seeing if they are equal; but they are equal if and // only if B and C are equal. If B and C are equal then (since we assume // that operands have already been simplified) "select(cond, B, C)" should // have been simplified to the common value of B and C already. Analysing // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. return nullptr; } Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); } static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } /// Rummage around inside V looking for something equivalent to the comparison /// "LHS Pred RHS". Return such a value if found, otherwise return null. /// Helper function for analyzing max/min idioms. static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast(V); if (!SI) return nullptr; CmpInst *Cmp = dyn_cast(SI->getCondition()); if (!Cmp) return nullptr; Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) return Cmp; if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && LHS == CmpRHS && RHS == CmpLHS) return Cmp; return nullptr; } // A significant optimization not implemented here is assuming that alloca // addresses are not equal to incoming argument values. They don't *alias*, // as we say, but that doesn't mean they aren't equal, so we take a // conservative approach. // // This is inspired in part by C++11 5.10p1: // "Two pointers of the same type compare equal if and only if they are both // null, both point to the same function, or both represent the same // address." // // This is pretty permissive. // // It's also partly due to C11 6.5.9p6: // "Two pointers compare equal if and only if both are null pointers, both are // pointers to the same object (including a pointer to an object and a // subobject at its beginning) or function, both are pointers to one past the // last element of the same array object, or one is a pointer to one past the // end of one array object and the other is a pointer to the start of a // different array object that happens to immediately follow the first array // object in the address space.) // // C11's version is more restrictive, however there's no reason why an argument // couldn't be a one-past-the-end value for a stack object in the caller and be // equal to the beginning of a stack object in the callee. // // If the C and C++ standards are ever made sufficiently restrictive in this // area, it may be possible to update LLVM's semantics accordingly and reinstate // this optimization. static Constant * computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, CmpInst::Predicate Pred, AssumptionCache *AC, const Instruction *CxtI, Value *LHS, Value *RHS) { // First, skip past any trivial no-ops. LHS = LHS->stripPointerCasts(); RHS = RHS->stripPointerCasts(); // A non-null pointer is not equal to a null pointer. if (llvm::isKnownNonZero(LHS, DL) && isa(RHS) && (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: return nullptr; // Equality comaprisons are easy to fold. case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: break; // We can only handle unsigned relational comparisons because 'inbounds' on // a GEP only protects against unsigned wrapping. case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: // However, we have to switch them to their signed variants to handle // negative indices from the base pointer. Pred = ICmpInst::getSignedPredicate(Pred); break; } // Strip off any constant offsets so that we can reason about them. // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets // here and compare base addresses like AliasAnalysis does, however there are // numerous hazards. AliasAnalysis and its utilities rely on special rules // governing loads and stores which don't apply to icmps. Also, AliasAnalysis // doesn't need to guarantee pointer inequality when it says NoAlias. Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); // If LHS and RHS are related via constant offsets to the same base // value, we can replace it with an icmp which just compares the offsets. if (LHS == RHS) return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); // Various optimizations for (in)equality comparisons. if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { // Different non-empty allocations that exist at the same time have // different addresses (if the program can tell). Global variables always // exist, so they always exist during the lifetime of each other and all // allocas. Two different allocas usually have different addresses... // // However, if there's an @llvm.stackrestore dynamically in between two // allocas, they may have the same address. It's tempting to reduce the // scope of the problem by only looking at *static* allocas here. That would // cover the majority of allocas while significantly reducing the likelihood // of having an @llvm.stackrestore pop up in the middle. However, it's not // actually impossible for an @llvm.stackrestore to pop up in the middle of // an entry block. Also, if we have a block that's not attached to a // function, we can't tell if it's "static" under the current definition. // Theoretically, this problem could be fixed by creating a new kind of // instruction kind specifically for static allocas. Such a new instruction // could be required to be at the top of the entry block, thus preventing it // from being subject to a @llvm.stackrestore. Instcombine could even // convert regular allocas into these special allocas. It'd be nifty. // However, until then, this problem remains open. // // So, we'll assume that two non-empty allocas have different addresses // for now. // // With all that, if the offsets are within the bounds of their allocations // (and not one-past-the-end! so we can't use inbounds!), and their // allocations aren't the same, the pointers are not equal. // // Note that it's not necessary to check for LHS being a global variable // address, due to canonicalization and constant folding. if (isa(LHS) && (isa(RHS) || isa(RHS))) { ConstantInt *LHSOffsetCI = dyn_cast(LHSOffset); ConstantInt *RHSOffsetCI = dyn_cast(RHSOffset); uint64_t LHSSize, RHSSize; ObjectSizeOpts Opts; Opts.NullIsUnknownSize = NullPointerIsDefined(cast(LHS)->getFunction()); if (LHSOffsetCI && RHSOffsetCI && getObjectSize(LHS, LHSSize, DL, TLI, Opts) && getObjectSize(RHS, RHSSize, DL, TLI, Opts)) { const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); if (!LHSOffsetValue.isNegative() && !RHSOffsetValue.isNegative() && LHSOffsetValue.ult(LHSSize) && RHSOffsetValue.ult(RHSSize)) { return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); } } // Repeat the above check but this time without depending on DataLayout // or being able to compute a precise size. if (!cast(LHS->getType())->isEmptyTy() && !cast(RHS->getType())->isEmptyTy() && LHSOffset->isNullValue() && RHSOffset->isNullValue()) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); } // Even if an non-inbounds GEP occurs along the path we can still optimize // equality comparisons concerning the result. We avoid walking the whole // chain again by starting where the last calls to // stripAndComputeConstantOffsets left off and accumulate the offsets. Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true); Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true); if (LHS == RHS) return ConstantExpr::getICmp(Pred, ConstantExpr::getAdd(LHSOffset, LHSNoBound), ConstantExpr::getAdd(RHSOffset, RHSNoBound)); // If one side of the equality comparison must come from a noalias call // (meaning a system memory allocation function), and the other side must // come from a pointer that cannot overlap with dynamically-allocated // memory within the lifetime of the current function (allocas, byval // arguments, globals), then determine the comparison result here. SmallVector LHSUObjs, RHSUObjs; GetUnderlyingObjects(LHS, LHSUObjs, DL); GetUnderlyingObjects(RHS, RHSUObjs, DL); // Is the set of underlying objects all noalias calls? auto IsNAC = [](ArrayRef Objects) { return all_of(Objects, isNoAliasCall); }; // Is the set of underlying objects all things which must be disjoint from // noalias calls. For allocas, we consider only static ones (dynamic // allocas might be transformed into calls to malloc not simultaneously // live with the compared-to allocation). For globals, we exclude symbols // that might be resolve lazily to symbols in another dynamically-loaded // library (and, thus, could be malloc'ed by the implementation). auto IsAllocDisjoint = [](ArrayRef Objects) { return all_of(Objects, [](Value *V) { if (const AllocaInst *AI = dyn_cast(V)) return AI->getParent() && AI->getFunction() && AI->isStaticAlloca(); if (const GlobalValue *GV = dyn_cast(V)) return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() || GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) && !GV->isThreadLocal(); if (const Argument *A = dyn_cast(V)) return A->hasByValAttr(); return false; }); }; if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) || (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs))) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); // Fold comparisons for non-escaping pointer even if the allocation call // cannot be elided. We cannot fold malloc comparison to null. Also, the // dynamic allocation call could be either of the operands. Value *MI = nullptr; if (isAllocLikeFn(LHS, TLI) && llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT)) MI = LHS; else if (isAllocLikeFn(RHS, TLI) && llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT)) MI = RHS; // FIXME: We should also fold the compare when the pointer escapes, but the // compare dominates the pointer escape if (MI && !PointerMayBeCaptured(MI, true, true)) return ConstantInt::get(GetCompareTy(LHS), CmpInst::isFalseWhenEqual(Pred)); } // Otherwise, fail. return nullptr; } /// Fold an icmp when its operands have i1 scalar type. static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. if (!OpTy->isIntOrIntVectorTy(1)) return nullptr; // A boolean compared to true/false can be simplified in 14 out of the 20 // (10 predicates * 2 constants) possible combinations. Cases not handled here // require a 'not' of the LHS, so those must be transformed in InstCombine. if (match(RHS, m_Zero())) { switch (Pred) { case CmpInst::ICMP_NE: // X != 0 -> X case CmpInst::ICMP_UGT: // X >u 0 -> X case CmpInst::ICMP_SLT: // X X return LHS; case CmpInst::ICMP_ULT: // X false case CmpInst::ICMP_SGT: // X >s 0 -> false return getFalse(ITy); case CmpInst::ICMP_UGE: // X >=u 0 -> true case CmpInst::ICMP_SLE: // X <=s 0 -> true return getTrue(ITy); default: break; } } else if (match(RHS, m_One())) { switch (Pred) { case CmpInst::ICMP_EQ: // X == 1 -> X case CmpInst::ICMP_UGE: // X >=u 1 -> X case CmpInst::ICMP_SLE: // X <=s -1 -> X return LHS; case CmpInst::ICMP_UGT: // X >u 1 -> false case CmpInst::ICMP_SLT: // X false return getFalse(ITy); case CmpInst::ICMP_ULE: // X <=u 1 -> true case CmpInst::ICMP_SGE: // X >=s -1 -> true return getTrue(ITy); default: break; } } switch (Pred) { default: break; case ICmpInst::ICMP_UGE: if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; case ICmpInst::ICMP_SGE: /// For signed comparison, the values for an i1 are 0 and -1 /// respectively. This maps into a truth table of: /// LHS | RHS | LHS >=s RHS | LHS implies RHS /// 0 | 0 | 1 (0 >= 0) | 1 /// 0 | 1 | 1 (0 >= -1) | 1 /// 1 | 0 | 0 (-1 >= 0) | 0 /// 1 | 1 | 1 (-1 >= -1) | 1 if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; case ICmpInst::ICMP_ULE: if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; } return nullptr; } /// Try hard to fold icmp with zero RHS because this is a common case. static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q) { if (!match(RHS, m_Zero())) return nullptr; Type *ITy = GetCompareTy(LHS); // The return type. switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: return getFalse(ITy); case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: { KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (LHSKnown.isNegative()) return getTrue(ITy); if (LHSKnown.isNonNegative()) return getFalse(ITy); break; } case ICmpInst::ICMP_SLE: { KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (LHSKnown.isNegative()) return getTrue(ITy); if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; } case ICmpInst::ICMP_SGE: { KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (LHSKnown.isNegative()) return getFalse(ITy); if (LHSKnown.isNonNegative()) return getTrue(ITy); break; } case ICmpInst::ICMP_SGT: { KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (LHSKnown.isNegative()) return getFalse(ITy); if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; } } return nullptr; } /// Many binary operators with a constant operand have an easy-to-compute /// range of outputs. This can be used to fold a comparison to always true or /// always false. static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { unsigned Width = Lower.getBitWidth(); const APInt *C; switch (BO.getOpcode()) { case Instruction::Add: if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // FIXME: If we have both nuw and nsw, we should reduce the range further. if (BO.hasNoUnsignedWrap()) { // 'add nuw x, C' produces [C, UINT_MAX]. Lower = *C; } else if (BO.hasNoSignedWrap()) { if (C->isNegative()) { // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. Lower = APInt::getSignedMinValue(Width); Upper = APInt::getSignedMaxValue(Width) + *C + 1; } else { // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. Lower = APInt::getSignedMinValue(Width) + *C; Upper = APInt::getSignedMaxValue(Width) + 1; } } } break; case Instruction::And: if (match(BO.getOperand(1), m_APInt(C))) // 'and x, C' produces [0, C]. Upper = *C + 1; break; case Instruction::Or: if (match(BO.getOperand(1), m_APInt(C))) // 'or x, C' produces [C, UINT_MAX]. Lower = *C; break; case Instruction::AShr: if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. Lower = APInt::getSignedMinValue(Width).ashr(*C); Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { unsigned ShiftAmount = Width - 1; if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); if (C->isNegative()) { // 'ashr C, x' produces [C, C >> (Width-1)] Lower = *C; Upper = C->ashr(ShiftAmount) + 1; } else { // 'ashr C, x' produces [C >> (Width-1), C] Lower = C->ashr(ShiftAmount); Upper = *C + 1; } } break; case Instruction::LShr: if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { // 'lshr x, C' produces [0, UINT_MAX >> C]. Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { // 'lshr C, x' produces [C >> (Width-1), C]. unsigned ShiftAmount = Width - 1; if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); Lower = C->lshr(ShiftAmount); Upper = *C + 1; } break; case Instruction::Shl: if (match(BO.getOperand(0), m_APInt(C))) { if (BO.hasNoUnsignedWrap()) { // 'shl nuw C, x' produces [C, C << CLZ(C)] Lower = *C; Upper = Lower.shl(Lower.countLeadingZeros()) + 1; } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? if (C->isNegative()) { // 'shl nsw C, x' produces [C << CLO(C)-1, C] unsigned ShiftAmount = C->countLeadingOnes() - 1; Lower = C->shl(ShiftAmount); Upper = *C + 1; } else { // 'shl nsw C, x' produces [C, C << CLZ(C)-1] unsigned ShiftAmount = C->countLeadingZeros() - 1; Lower = *C; Upper = C->shl(ShiftAmount) + 1; } } } break; case Instruction::SDiv: if (match(BO.getOperand(1), m_APInt(C))) { APInt IntMin = APInt::getSignedMinValue(Width); APInt IntMax = APInt::getSignedMaxValue(Width); if (C->isAllOnesValue()) { // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] // where C != -1 and C != 0 and C != 1 Lower = IntMin + 1; Upper = IntMax + 1; } else if (C->countLeadingZeros() < Width - 1) { // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] // where C != -1 and C != 0 and C != 1 Lower = IntMin.sdiv(*C); Upper = IntMax.sdiv(*C); if (Lower.sgt(Upper)) std::swap(Lower, Upper); Upper = Upper + 1; assert(Upper != Lower && "Upper part of range has wrapped!"); } } else if (match(BO.getOperand(0), m_APInt(C))) { if (C->isMinSignedValue()) { // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. Lower = *C; Upper = Lower.lshr(1) + 1; } else { // 'sdiv C, x' produces [-|C|, |C|]. Upper = C->abs() + 1; Lower = (-Upper) + 1; } } break; case Instruction::UDiv: if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // 'udiv x, C' produces [0, UINT_MAX / C]. Upper = APInt::getMaxValue(Width).udiv(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { // 'udiv C, x' produces [0, C]. Upper = *C + 1; } break; case Instruction::SRem: if (match(BO.getOperand(1), m_APInt(C))) { // 'srem x, C' produces (-|C|, |C|). Upper = C->abs(); Lower = (-Upper) + 1; } break; case Instruction::URem: if (match(BO.getOperand(1), m_APInt(C))) // 'urem x, C' produces [0, C). Upper = *C; break; default: break; } } static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, Value *RHS) { Type *ITy = GetCompareTy(RHS); // The return type. Value *X; // Sign-bit checks can be optimized to true/false after unsigned // floating-point casts: // icmp slt (bitcast (uitofp X)), 0 --> false // icmp sgt (bitcast (uitofp X)), -1 --> true if (match(LHS, m_BitCast(m_UIToFP(m_Value(X))))) { if (Pred == ICmpInst::ICMP_SLT && match(RHS, m_Zero())) return ConstantInt::getFalse(ITy); if (Pred == ICmpInst::ICMP_SGT && match(RHS, m_AllOnes())) return ConstantInt::getTrue(ITy); } const APInt *C; if (!match(RHS, m_APInt(C))) return nullptr; // Rule out tautological comparisons (eg., ult 0 or uge 0). ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C); if (RHS_CR.isEmptySet()) return ConstantInt::getFalse(ITy); if (RHS_CR.isFullSet()) return ConstantInt::getTrue(ITy); // Find the range of possible values for binary operators. unsigned Width = C->getBitWidth(); APInt Lower = APInt(Width, 0); APInt Upper = APInt(Width, 0); if (auto *BO = dyn_cast(LHS)) setLimitsForBinOp(*BO, Lower, Upper); ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true); if (auto *I = dyn_cast(LHS)) if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(ITy); if (RHS_CR.inverse().contains(LHS_CR)) return ConstantInt::getFalse(ITy); } return nullptr; } /// TODO: A large part of this logic is duplicated in InstCombine's /// foldICmpBinOp(). We should be able to share that and avoid the code /// duplication. static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. BinaryOperator *LBO = dyn_cast(LHS); BinaryOperator *RBO = dyn_cast(RHS); if (MaxRecurse && (LBO || RBO)) { // Analyze the case when either LHS or RHS is an add instruction. Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; if (LBO && LBO->getOpcode() == Instruction::Add) { A = LBO->getOperand(0); B = LBO->getOperand(1); NoLHSWrapProblem = ICmpInst::isEquality(Pred) || (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) || (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap()); } if (RBO && RBO->getOpcode() == Instruction::Add) { C = RBO->getOperand(0); D = RBO->getOperand(1); NoRHSWrapProblem = ICmpInst::isEquality(Pred) || (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) || (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap()); } // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. if ((A == RHS || B == RHS) && NoLHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, Constant::getNullValue(RHS->getType()), Q, MaxRecurse - 1)) return V; // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), C == LHS ? D : C, Q, MaxRecurse - 1)) return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem && NoRHSWrapProblem) { // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y, *Z; if (A == C) { // C + B == C + D -> B == D Y = B; Z = D; } else if (A == D) { // D + B == C + D -> B == C Y = B; Z = C; } else if (B == C) { // A + C == C + D -> A == D Y = A; Z = D; } else { assert(B == D); // A + D == C + D -> A == C Y = A; Z = C; } if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1)) return V; } } { Value *Y = nullptr; // icmp pred (or X, Y), X if (LBO && match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) { if (Pred == ICmpInst::ICMP_ULT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_UGE) return getTrue(ITy); if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (RHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); if (RHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); } } // icmp pred X, (or X, Y) if (RBO && match(RBO, m_c_Or(m_Value(Y), m_Specific(LHS)))) { if (Pred == ICmpInst::ICMP_ULE) return getTrue(ITy); if (Pred == ICmpInst::ICMP_UGT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) { KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (LHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); if (LHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy); } } } // icmp pred (and X, Y), X if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { if (Pred == ICmpInst::ICMP_UGT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_ULE) return getTrue(ITy); } // icmp pred X, (and X, Y) if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) { if (Pred == ICmpInst::ICMP_UGE) return getTrue(ITy); if (Pred == ICmpInst::ICMP_ULT) return getFalse(ITy); } // 0 - (zext X) pred C if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { if (ConstantInt *RHSC = dyn_cast(RHS)) { if (RHSC->getValue().isStrictlyPositive()) { if (Pred == ICmpInst::ICMP_SLT) return ConstantInt::getTrue(RHSC->getContext()); if (Pred == ICmpInst::ICMP_SGE) return ConstantInt::getFalse(RHSC->getContext()); if (Pred == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(RHSC->getContext()); if (Pred == ICmpInst::ICMP_NE) return ConstantInt::getTrue(RHSC->getContext()); } if (RHSC->getValue().isNonNegative()) { if (Pred == ICmpInst::ICMP_SLE) return ConstantInt::getTrue(RHSC->getContext()); if (Pred == ICmpInst::ICMP_SGT) return ConstantInt::getFalse(RHSC->getContext()); } } } // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { switch (Pred) { default: break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: { KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: { KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: return getTrue(ITy); } } // icmp pred X, (urem Y, X) if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { switch (Pred) { default: break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: { KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: { KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: return getFalse(ITy); } } // x >> y <=u x // x udiv y <=u x. if (LBO && (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || match(LBO, m_UDiv(m_Specific(RHS), m_Value())))) { // icmp pred (X op Y), X if (Pred == ICmpInst::ICMP_UGT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_ULE) return getTrue(ITy); } // x >=u x >> y // x >=u x udiv y. if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) || match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) { // icmp pred X, (X op Y) if (Pred == ICmpInst::ICMP_ULT) return getFalse(ITy); if (Pred == ICmpInst::ICMP_UGE) return getTrue(ITy); } // handle: // CI2 << X == CI // CI2 << X != CI // // where CI2 is a power of 2 and CI isn't if (auto *CI = dyn_cast(RHS)) { const APInt *CI2Val, *CIVal = &CI->getValue(); if (LBO && match(LBO, m_Shl(m_APInt(CI2Val), m_Value())) && CI2Val->isPowerOf2()) { if (!CIVal->isPowerOf2()) { // CI2 << X can equal zero in some circumstances, // this simplification is unsafe if CI is zero. // // We know it is safe if: // - The shift is nsw, we can't shift out the one bit. // - The shift is nuw, we can't shift out the one bit. // - CI2 is one // - CI isn't zero if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() || CI2Val->isOneValue() || !CI->isZero()) { if (Pred == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_NE) return ConstantInt::getTrue(RHS->getContext()); } } if (CIVal->isSignMask() && CI2Val->isOneValue()) { if (Pred == ICmpInst::ICMP_UGT) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_ULE) return ConstantInt::getTrue(RHS->getContext()); } } } if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && LBO->getOperand(1) == RBO->getOperand(1)) { switch (LBO->getOpcode()) { default: break; case Instruction::UDiv: case Instruction::LShr: if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; case Instruction::SDiv: if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; case Instruction::AShr: if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; case Instruction::Shl: { bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap(); bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); if (!NUW && !NSW) break; if (!NSW && ICmpInst::isSigned(Pred)) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), Q, MaxRecurse - 1)) return V; break; } } } return nullptr; } /// Simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. Value *A, *B; CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". // Signed variants on "max(a,b)>=a -> true". if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { if (A != RHS) std::swap(A, B); // smax(A, B) pred A. EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". // We analyze this as smax(A, B) pred A. P = Pred; } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { if (A != LHS) std::swap(A, B); // A pred smax(A, B). EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". // We analyze this as smax(A, B) swapped-pred A. P = CmpInst::getSwappedPredicate(Pred); } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { if (A != RHS) std::swap(A, B); // smin(A, B) pred A. EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". // We analyze this as smax(-A, -B) swapped-pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. P = CmpInst::getSwappedPredicate(Pred); } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { if (A != LHS) std::swap(A, B); // A pred smin(A, B). EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". // We analyze this as smax(-A, -B) pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. P = Pred; } if (P != CmpInst::BAD_ICMP_PREDICATE) { // Cases correspond to "max(A, B) p A". switch (P) { default: break; case CmpInst::ICMP_EQ: case CmpInst::ICMP_SLE: // Equivalent to "A EqP B". This may be the same as the condition tested // in the max/min; if so, we can just return that. if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) return V; if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) return V; break; case CmpInst::ICMP_NE: case CmpInst::ICMP_SGT: { CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); // Equivalent to "A InvEqP B". This may be the same as the condition // tested in the max/min; if so, we can just return that. if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) return V; if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) return V; break; } case CmpInst::ICMP_SGE: // Always true. return getTrue(ITy); case CmpInst::ICMP_SLT: // Always false. return getFalse(ITy); } } // Unsigned variants on "max(a,b)>=a -> true". P = CmpInst::BAD_ICMP_PREDICATE; if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { if (A != RHS) std::swap(A, B); // umax(A, B) pred A. EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". // We analyze this as umax(A, B) pred A. P = Pred; } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { if (A != LHS) std::swap(A, B); // A pred umax(A, B). EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". // We analyze this as umax(A, B) swapped-pred A. P = CmpInst::getSwappedPredicate(Pred); } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { if (A != RHS) std::swap(A, B); // umin(A, B) pred A. EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". // We analyze this as umax(-A, -B) swapped-pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. P = CmpInst::getSwappedPredicate(Pred); } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) && (A == LHS || B == LHS)) { if (A != LHS) std::swap(A, B); // A pred umin(A, B). EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". // We analyze this as umax(-A, -B) pred -A. // Note that we do not need to actually form -A or -B thanks to EqP. P = Pred; } if (P != CmpInst::BAD_ICMP_PREDICATE) { // Cases correspond to "max(A, B) p A". switch (P) { default: break; case CmpInst::ICMP_EQ: case CmpInst::ICMP_ULE: // Equivalent to "A EqP B". This may be the same as the condition tested // in the max/min; if so, we can just return that. if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) return V; if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) return V; break; case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT: { CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); // Equivalent to "A InvEqP B". This may be the same as the condition // tested in the max/min; if so, we can just return that. if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) return V; if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) return V; break; } case CmpInst::ICMP_UGE: // Always true. return getTrue(ITy); case CmpInst::ICMP_ULT: // Always false. return getFalse(ITy); } } // Variants on "max(x,y) >= min(x,z)". Value *C, *D; if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && match(RHS, m_SMin(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // max(x, ?) pred min(x, ?). if (Pred == CmpInst::ICMP_SGE) // Always true. return getTrue(ITy); if (Pred == CmpInst::ICMP_SLT) // Always false. return getFalse(ITy); } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && match(RHS, m_SMax(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // min(x, ?) pred max(x, ?). if (Pred == CmpInst::ICMP_SLE) // Always true. return getTrue(ITy); if (Pred == CmpInst::ICMP_SGT) // Always false. return getFalse(ITy); } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && match(RHS, m_UMin(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // max(x, ?) pred min(x, ?). if (Pred == CmpInst::ICMP_UGE) // Always true. return getTrue(ITy); if (Pred == CmpInst::ICMP_ULT) // Always false. return getFalse(ITy); } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && match(RHS, m_UMax(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // min(x, ?) pred max(x, ?). if (Pred == CmpInst::ICMP_ULE) // Always true. return getTrue(ITy); if (Pred == CmpInst::ICMP_UGT) // Always false. return getFalse(ITy); } return nullptr; } /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); if (Constant *CLHS = dyn_cast(LHS)) { if (Constant *CRHS = dyn_cast(RHS)) return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } Type *ITy = GetCompareTy(LHS); // The return type. // icmp X, X -> true/false // icmp X, undef -> true/false because undef could be X. if (LHS == RHS || isa(RHS)) return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q)) return V; if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q)) return V; if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS)) return V; // If both operands have range metadata, use the metadata // to simplify the comparison. if (isa(RHS) && isa(LHS)) { auto RHS_Instr = cast(RHS); auto LHS_Instr = cast(LHS); if (RHS_Instr->getMetadata(LLVMContext::MD_range) && LHS_Instr->getMetadata(LLVMContext::MD_range)) { auto RHS_CR = getConstantRangeFromMetadata( *RHS_Instr->getMetadata(LLVMContext::MD_range)); auto LHS_CR = getConstantRangeFromMetadata( *LHS_Instr->getMetadata(LLVMContext::MD_range)); auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); if (Satisfied_CR.contains(LHS_CR)) return ConstantInt::getTrue(RHS->getContext()); auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( CmpInst::getInversePredicate(Pred), RHS_CR); if (InversedSatisfied_CR.contains(LHS_CR)) return ConstantInt::getFalse(RHS->getContext()); } } // Compare of cast, for example (zext X) != 0 -> X != 0 if (isa(LHS) && (isa(RHS) || isa(RHS))) { Instruction *LI = cast(LHS); Value *SrcOp = LI->getOperand(0); Type *SrcTy = SrcOp->getType(); Type *DstTy = LI->getType(); // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. if (MaxRecurse && isa(LI) && Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, ConstantExpr::getIntToPtr(RHSC, SrcTy), Q, MaxRecurse-1)) return V; } else if (PtrToIntInst *RI = dyn_cast(RHS)) { if (RI->getOperand(0)->getType() == SrcTy) // Compare without the cast. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q, MaxRecurse-1)) return V; } } if (isa(LHS)) { // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the // same type. if (ZExtInst *RI = dyn_cast(RHS)) { if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that signed predicates become unsigned. if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), SrcOp, RI->getOperand(0), Q, MaxRecurse-1)) return V; } // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended // too. If not, then try to deduce the result of the comparison. else if (ConstantInt *CI = dyn_cast(RHS)) { // Compute the constant that would happen if we truncated to SrcTy then // reextended to DstTy. Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); // If the re-extended constant didn't change then this is effectively // also a case of comparing two zero-extended values. if (RExt == CI && MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit // there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); // LHS getContext()); case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: return ConstantInt::getTrue(CI->getContext()); // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS // is non-negative then LHS getValue().isNegative() ? ConstantInt::getTrue(CI->getContext()) : ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: return CI->getValue().isNegative() ? ConstantInt::getFalse(CI->getContext()) : ConstantInt::getTrue(CI->getContext()); } } } } if (isa(LHS)) { // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the // same type. if (SExtInst *RI = dyn_cast(RHS)) { if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that the predicate does not change. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q, MaxRecurse-1)) return V; } // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended // too. If not, then try to deduce the result of the comparison. else if (ConstantInt *CI = dyn_cast(RHS)) { // Compute the constant that would happen if we truncated to SrcTy then // reextended to DstTy. Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); // If the re-extended constant didn't change then this is effectively // also a case of comparing two sign-extended values. if (RExt == CI && MaxRecurse) if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are all equal, while RHS has varying // bits there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_EQ: return ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_NE: return ConstantInt::getTrue(CI->getContext()); // If RHS is non-negative then LHS s RHS. case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: return CI->getValue().isNegative() ? ConstantInt::getTrue(CI->getContext()) : ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: return CI->getValue().isNegative() ? ConstantInt::getFalse(CI->getContext()) : ConstantInt::getTrue(CI->getContext()); // If LHS is non-negative then LHS u RHS. case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: // Comparison is true iff the LHS =s 0. if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, Constant::getNullValue(SrcTy), Q, MaxRecurse-1)) return V; break; } } } } } // icmp eq|ne X, Y -> false|true if X != Y if (ICmpInst::isEquality(Pred) && isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy); } if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse)) return V; if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse)) return V; // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. if (LHS->getType()->isPointerTy()) if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS, RHS)) return C; if (auto *CLHS = dyn_cast(LHS)) if (auto *CRHS = dyn_cast(RHS)) if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) == Q.DL.getTypeSizeInBits(CLHS->getType()) && Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) == Q.DL.getTypeSizeInBits(CRHS->getType())) if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, CLHS->getPointerOperand(), CRHS->getPointerOperand())) return C; if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { if (GEPOperator *GRHS = dyn_cast(RHS)) { if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() && (ICmpInst::isEquality(Pred) || (GLHS->isInBounds() && GRHS->isInBounds() && Pred == ICmpInst::getSignedPredicate(Pred)))) { // The bases are equal and the indices are constant. Build a constant // expression GEP with the same indices and a null base pointer to see // what constant folding can make out of it. Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); SmallVector IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); Constant *NewLHS = ConstantExpr::getGetElementPtr( GLHS->getSourceElementType(), Null, IndicesLHS); SmallVector IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); Constant *NewRHS = ConstantExpr::getGetElementPtr( GLHS->getSourceElementType(), Null, IndicesRHS); return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); } } } // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa(LHS) || isa(RHS)) if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return nullptr; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } /// Given operands for an FCmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast(LHS)) { if (Constant *CRHS = dyn_cast(RHS)) return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); Pred = CmpInst::getSwappedPredicate(Pred); } // Fold trivial predicates. Type *RetTy = GetCompareTy(LHS); if (Pred == FCmpInst::FCMP_FALSE) return getFalse(RetTy); if (Pred == FCmpInst::FCMP_TRUE) return getTrue(RetTy); // UNO/ORD predicates can be trivially folded if NaNs are ignored. if (FMF.noNaNs()) { if (Pred == FCmpInst::FCMP_UNO) return getFalse(RetTy); if (Pred == FCmpInst::FCMP_ORD) return getTrue(RetTy); } // NaN is unordered; NaN is not ordered. assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) && "Comparison must be either ordered or unordered"); if (match(RHS, m_NaN())) return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred)); // fcmp pred x, undef and fcmp pred undef, x // fold to true if unordered, false if ordered if (isa(LHS) || isa(RHS)) { // Choosing NaN for the undef will always make unordered comparison succeed // and ordered comparison fail. return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred)); } // fcmp x,x -> true/false. Not all compares are foldable. if (LHS == RHS) { if (CmpInst::isTrueWhenEqual(Pred)) return getTrue(RetTy); if (CmpInst::isFalseWhenEqual(Pred)) return getFalse(RetTy); } // Handle fcmp with constant RHS. const APFloat *C; if (match(RHS, m_APFloat(C))) { // Check whether the constant is an infinity. if (C->isInfinity()) { if (C->isNegative()) { switch (Pred) { case FCmpInst::FCMP_OLT: // No value is ordered and less than negative infinity. return getFalse(RetTy); case FCmpInst::FCMP_UGE: // All values are unordered with or at least negative infinity. return getTrue(RetTy); default: break; } } else { switch (Pred) { case FCmpInst::FCMP_OGT: // No value is ordered and greater than infinity. return getFalse(RetTy); case FCmpInst::FCMP_ULE: // All values are unordered with and at most infinity. return getTrue(RetTy); default: break; } } } if (C->isZero()) { switch (Pred) { case FCmpInst::FCMP_UGE: if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) return getTrue(RetTy); break; case FCmpInst::FCMP_OLT: // X < 0 if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) return getFalse(RetTy); break; default: break; } } else if (C->isNegative()) { assert(!C->isNaN() && "Unexpected NaN constant!"); // TODO: We can catch more cases by using a range check rather than // relying on CannotBeOrderedLessThanZero. switch (Pred) { case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_UGT: case FCmpInst::FCMP_UNE: // (X >= 0) implies (X > C) when (C < 0) if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) return getTrue(RetTy); break; case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_OLT: // (X >= 0) implies !(X < C) when (C < 0) if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) return getFalse(RetTy); break; default: break; } } } // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa(LHS) || isa(RHS)) if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return nullptr; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } /// See if V simplifies when its operand Op is replaced with RepOp. static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, const SimplifyQuery &Q, unsigned MaxRecurse) { // Trivial replacement. if (V == Op) return RepOp; // We cannot replace a constant, and shouldn't even try. if (isa(Op)) return nullptr; auto *I = dyn_cast(V); if (!I) return nullptr; // If this is a binary operator, try to simplify it with the replaced op. if (auto *B = dyn_cast(I)) { // Consider: // %cmp = icmp eq i32 %x, 2147483647 // %add = add nsw i32 %x, 1 // %sel = select i1 %cmp, i32 -2147483648, i32 %add // // We can't replace %sel with %add unless we strip away the flags. if (isa(B)) if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap()) return nullptr; if (isa(B)) if (B->isExact()) return nullptr; if (MaxRecurse) { if (B->getOperand(0) == Op) return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q, MaxRecurse - 1); if (B->getOperand(1) == Op) return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q, MaxRecurse - 1); } } // Same for CmpInsts. if (CmpInst *C = dyn_cast(I)) { if (MaxRecurse) { if (C->getOperand(0) == Op) return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q, MaxRecurse - 1); if (C->getOperand(1) == Op) return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q, MaxRecurse - 1); } } // Same for GEPs. if (auto *GEP = dyn_cast(I)) { if (MaxRecurse) { SmallVector NewOps(GEP->getNumOperands()); transform(GEP->operands(), NewOps.begin(), [&](Value *V) { return V == Op ? RepOp : V; }); return SimplifyGEPInst(GEP->getSourceElementType(), NewOps, Q, MaxRecurse - 1); } } // TODO: We could hand off more cases to instsimplify here. // If all operands are constant after substituting Op for RepOp then we can // constant fold the instruction. if (Constant *CRepOp = dyn_cast(RepOp)) { // Build a list of all constant operands. SmallVector ConstOps; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (I->getOperand(i) == Op) ConstOps.push_back(CRepOp); else if (Constant *COp = dyn_cast(I->getOperand(i))) ConstOps.push_back(COp); else break; } // All operands were constants, fold it. if (ConstOps.size() == I->getNumOperands()) { if (CmpInst *C = dyn_cast(I)) return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], ConstOps[1], Q.DL, Q.TLI); if (LoadInst *LI = dyn_cast(I)) if (!LI->isVolatile()) return ConstantFoldLoadFromConstPtr(ConstOps[0], LI->getType(), Q.DL); return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI); } } return nullptr; } /// Try to simplify a select instruction when its condition operand is an /// integer comparison where one operand of the compare is a constant. static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, const APInt *Y, bool TrueWhenUnset) { const APInt *C; // (X & Y) == 0 ? X & ~Y : X --> X // (X & Y) != 0 ? X & ~Y : X --> X & ~Y if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) && *Y == ~*C) return TrueWhenUnset ? FalseVal : TrueVal; // (X & Y) == 0 ? X : X & ~Y --> X & ~Y // (X & Y) != 0 ? X : X & ~Y --> X if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) && *Y == ~*C) return TrueWhenUnset ? FalseVal : TrueVal; if (Y->isPowerOf2()) { // (X & Y) == 0 ? X | Y : X --> X | Y // (X & Y) != 0 ? X | Y : X --> X if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) && *Y == *C) return TrueWhenUnset ? TrueVal : FalseVal; // (X & Y) == 0 ? X : X | Y --> X // (X & Y) != 0 ? X : X | Y --> X | Y if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) && *Y == *C) return TrueWhenUnset ? TrueVal : FalseVal; } return nullptr; } /// An alternative way to test if a bit is set or not uses sgt/slt instead of /// eq/ne. static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, ICmpInst::Predicate Pred, Value *TrueVal, Value *FalseVal) { Value *X; APInt Mask; if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) return nullptr; return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask, Pred == ICmpInst::ICMP_EQ); } /// Try to simplify a select instruction when its condition operand is an /// integer comparison. static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { ICmpInst::Predicate Pred; Value *CmpLHS, *CmpRHS; if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) return nullptr; if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) { Value *X; const APInt *Y; if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y)))) if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y, Pred == ICmpInst::ICMP_EQ)) return V; } // Check for other compares that behave like bit test. if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, TrueVal, FalseVal)) return V; // If we have an equality comparison, then we know the value in one of the // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == TrueVal) return FalseVal; if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == FalseVal || SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == FalseVal) return FalseVal; } else if (Pred == ICmpInst::ICMP_NE) { if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == FalseVal || SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == FalseVal) return TrueVal; if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == TrueVal) return TrueVal; } return nullptr; } /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *CondC = dyn_cast(Cond)) { if (auto *TrueC = dyn_cast(TrueVal)) if (auto *FalseC = dyn_cast(FalseVal)) return ConstantFoldSelectInstruction(CondC, TrueC, FalseC); // select undef, X, Y -> X or Y if (isa(CondC)) return isa(FalseVal) ? FalseVal : TrueVal; // TODO: Vector constants with undef elements don't simplify. // select true, X, Y -> X if (CondC->isAllOnesValue()) return TrueVal; // select false, X, Y -> Y if (CondC->isNullValue()) return FalseVal; } // select ?, X, X -> X if (TrueVal == FalseVal) return TrueVal; if (isa(TrueVal)) // select ?, undef, X -> X return FalseVal; if (isa(FalseVal)) // select ?, X, undef -> X return TrueVal; if (Value *V = simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal)) return V; return nullptr; } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q) { return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); } /// Given operands for an GetElementPtrInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, const SimplifyQuery &Q, unsigned) { // The type of the GEP pointer operand. unsigned AS = cast(Ops[0]->getType()->getScalarType())->getAddressSpace(); // getelementptr P -> P. if (Ops.size() == 1) return Ops[0]; // Compute the (pointer) type returned by the GEP instruction. Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast(Ops[0]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); else if (VectorType *VT = dyn_cast(Ops[1]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); if (isa(Ops[0])) return UndefValue::get(GEPTy); if (Ops.size() == 2) { // getelementptr P, 0 -> P. if (match(Ops[1], m_Zero()) && Ops[0]->getType() == GEPTy) return Ops[0]; Type *Ty = SrcTy; if (Ty->isSized()) { Value *P; uint64_t C; uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty); // getelementptr P, N -> P if P points to a type of zero size. if (TyAllocSize == 0 && Ops[0]->getType() == GEPTy) return Ops[0]; // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == Q.DL.getIndexSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); Value *Temp; if (match(P, m_PtrToInt(m_Value(Temp)))) if (Temp->getType() == GEPTy) return Temp; return nullptr; }; // getelementptr V, (sub P, V) -> P if P points to a type of size 1. if (TyAllocSize == 1 && match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))))) if (Value *R = PtrToIntOrZero(P)) return R; // getelementptr V, (ashr (sub P, V), C) -> Q // if P points to a type of size 1 << C. if (match(Ops[1], m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), m_ConstantInt(C))) && TyAllocSize == 1ULL << C) if (Value *R = PtrToIntOrZero(P)) return R; // getelementptr V, (sdiv (sub P, V), C) -> Q // if P points to a type of size C. if (match(Ops[1], m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), m_SpecificInt(TyAllocSize)))) if (Value *R = PtrToIntOrZero(P)) return R; } } } if (Q.DL.getTypeAllocSize(LastType) == 1 && all_of(Ops.slice(1).drop_back(1), [](Value *Idx) { return match(Idx, m_Zero()); })) { unsigned IdxWidth = Q.DL.getIndexSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == IdxWidth) { APInt BasePtrOffset(IdxWidth, 0); Value *StrippedBasePtr = Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset); // gep (gep V, C), (sub 0, V) -> C if (match(Ops.back(), m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset); return ConstantExpr::getIntToPtr(CI, GEPTy); } // gep (gep V, C), (xor V, -1) -> C-1 if (match(Ops.back(), m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) { auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1); return ConstantExpr::getIntToPtr(CI, GEPTy); } } } // Check to see if this is constant foldable. if (!all_of(Ops, [](Value *V) { return isa(V); })) return nullptr; auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), Ops.slice(1)); if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL)) return CEFolded; return CE; } Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, const SimplifyQuery &Q) { return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const SimplifyQuery &Q, unsigned) { if (Constant *CAgg = dyn_cast(Agg)) if (Constant *CVal = dyn_cast(Val)) return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); // insertvalue x, undef, n -> x if (match(Val, m_Undef())) return Agg; // insertvalue x, (extractvalue y, n), n if (ExtractValueInst *EV = dyn_cast(Val)) if (EV->getAggregateOperand()->getType() == Agg->getType() && EV->getIndices() == Idxs) { // insertvalue undef, (extractvalue y, n), n -> y if (match(Agg, m_Undef())) return EV->getAggregateOperand(); // insertvalue y, (extractvalue y, n), n -> y if (Agg == EV->getAggregateOperand()) return Agg; } return nullptr; } Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const SimplifyQuery &Q) { return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); } Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, const SimplifyQuery &Q) { // Try to constant fold. auto *VecC = dyn_cast(Vec); auto *ValC = dyn_cast(Val); auto *IdxC = dyn_cast(Idx); if (VecC && ValC && IdxC) return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC); // Fold into undef if index is out of bounds. if (auto *CI = dyn_cast(Idx)) { uint64_t NumElements = cast(Vec->getType())->getNumElements(); if (CI->uge(NumElements)) return UndefValue::get(Vec->getType()); } // If index is undef, it might be out of bounds (see above case) if (isa(Idx)) return UndefValue::get(Vec->getType()); return nullptr; } /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const SimplifyQuery &, unsigned) { if (auto *CAgg = dyn_cast(Agg)) return ConstantFoldExtractValueInstruction(CAgg, Idxs); // extractvalue x, (insertvalue y, elt, n), n -> elt unsigned NumIdxs = Idxs.size(); for (auto *IVI = dyn_cast(Agg); IVI != nullptr; IVI = dyn_cast(IVI->getAggregateOperand())) { ArrayRef InsertValueIdxs = IVI->getIndices(); unsigned NumInsertValueIdxs = InsertValueIdxs.size(); unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs); if (InsertValueIdxs.slice(0, NumCommonIdxs) == Idxs.slice(0, NumCommonIdxs)) { if (NumIdxs == NumInsertValueIdxs) return IVI->getInsertedValueOperand(); break; } } return nullptr; } Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const SimplifyQuery &Q) { return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); } /// Given operands for an ExtractElementInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &, unsigned) { if (auto *CVec = dyn_cast(Vec)) { if (auto *CIdx = dyn_cast(Idx)) return ConstantFoldExtractElementInstruction(CVec, CIdx); // The index is not relevant if our vector is a splat. if (auto *Splat = CVec->getSplatValue()) return Splat; if (isa(Vec)) return UndefValue::get(Vec->getType()->getVectorElementType()); } // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (auto *IdxC = dyn_cast(Idx)) { if (IdxC->getValue().uge(Vec->getType()->getVectorNumElements())) // definitely out of bounds, thus undefined result return UndefValue::get(Vec->getType()->getVectorElementType()); if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) return Elt; } // An undef extract index can be arbitrarily chosen to be an out-of-range // index value, which would result in the instruction being undef. if (isa(Idx)) return UndefValue::get(Vec->getType()->getVectorElementType()); return nullptr; } Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q) { return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); } /// See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = nullptr; bool HasUndefInput = false; for (Value *Incoming : PN->incoming_values()) { // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PN) continue; if (isa(Incoming)) { // Remember that we saw an undef value, but otherwise ignore them. HasUndefInput = true; continue; } if (CommonValue && Incoming != CommonValue) return nullptr; // Not the same, bail out. CommonValue = Incoming; } // If CommonValue is null then all of the incoming values were either undef or // equal to the phi node itself. if (!CommonValue) return UndefValue::get(PN->getType()); // If we have a PHI node like phi(X, undef, X), where X is defined by some // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; return CommonValue; } static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *C = dyn_cast(Op)) return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); if (auto *CI = dyn_cast(Op)) { auto *Src = CI->getOperand(0); Type *SrcTy = Src->getType(); Type *MidTy = CI->getType(); Type *DstTy = Ty; if (Src->getType() == Ty) { auto FirstOp = static_cast(CI->getOpcode()); auto SecondOp = static_cast(CastOpc); Type *SrcIntPtrTy = SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr; Type *MidIntPtrTy = MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy) : nullptr; Type *DstIntPtrTy = DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy) : nullptr; if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy) == Instruction::BitCast) return Src; } } // bitcast x -> x if (CastOpc == Instruction::BitCast) if (Op->getType() == Ty) return Op; return nullptr; } Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q) { return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); } /// For the given destination element of a shuffle, peek through shuffles to /// match a root vector source operand that contains that element in the same /// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, int MaskVal, Value *RootVec, unsigned MaxRecurse) { if (!MaxRecurse--) return nullptr; // Bail out if any mask value is undefined. That kind of shuffle may be // simplified further based on demanded bits or other folds. if (MaskVal == -1) return nullptr; // The mask value chooses which source operand we need to look at next. int InVecNumElts = Op0->getType()->getVectorNumElements(); int RootElt = MaskVal; Value *SourceOp = Op0; if (MaskVal >= InVecNumElts) { RootElt = MaskVal - InVecNumElts; SourceOp = Op1; } // If the source operand is a shuffle itself, look through it to find the // matching root vector. if (auto *SourceShuf = dyn_cast(SourceOp)) { return foldIdentityShuffles( DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1), SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse); } // TODO: Look through bitcasts? What if the bitcast changes the vector element // size? // The source operand is not a shuffle. Initialize the root vector value for // this shuffle if that has not been done yet. if (!RootVec) RootVec = SourceOp; // Give up as soon as a source operand does not match the existing root value. if (RootVec != SourceOp) return nullptr; // The element must be coming from the same lane in the source vector // (although it may have crossed lanes in intermediate shuffles). if (RootElt != DestElt) return nullptr; return RootVec; } static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q, unsigned MaxRecurse) { if (isa(Mask)) return UndefValue::get(RetTy); Type *InVecTy = Op0->getType(); unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); unsigned InVecNumElts = InVecTy->getVectorNumElements(); SmallVector Indices; ShuffleVectorInst::getShuffleMask(Mask, Indices); assert(MaskNumElts == Indices.size() && "Size of Indices not same as number of mask elements?"); // Canonicalization: If mask does not select elements from an input vector, // replace that input vector with undef. bool MaskSelects0 = false, MaskSelects1 = false; for (unsigned i = 0; i != MaskNumElts; ++i) { if (Indices[i] == -1) continue; if ((unsigned)Indices[i] < InVecNumElts) MaskSelects0 = true; else MaskSelects1 = true; } if (!MaskSelects0) Op0 = UndefValue::get(InVecTy); if (!MaskSelects1) Op1 = UndefValue::get(InVecTy); auto *Op0Const = dyn_cast(Op0); auto *Op1Const = dyn_cast(Op1); // If all operands are constant, constant fold the shuffle. if (Op0Const && Op1Const) return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); // Canonicalization: if only one input vector is constant, it shall be the // second one. if (Op0Const && !Op1Const) { std::swap(Op0, Op1); ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); } // A shuffle of a splat is always the splat itself. Legal if the shuffle's // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast(Op0)) if (isa(Op1) && RetTy == InVecTy && OpShuf->getMask()->getSplatValue()) return Op0; // Don't fold a shuffle with undef mask elements. This may get folded in a // better way using demanded bits or other analysis. // TODO: Should we allow this? if (find(Indices, -1) != Indices.end()) return nullptr; // Check if every element of this shuffle can be mapped back to the // corresponding element of a single root vector. If so, we don't need this // shuffle. This handles simple identity shuffles as well as chains of // shuffles that may widen/narrow and/or move elements across lanes and back. Value *RootVec = nullptr; for (unsigned i = 0; i != MaskNumElts; ++i) { // Note that recursion is limited for each vector element, so if any element // exceeds the limit, this will fail to simplify. RootVec = foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse); // We can't replace a widening/narrowing shuffle with one of its operands. if (!RootVec || RootVec->getType() != RetTy) return nullptr; } return RootVec; } /// Given operands for a ShuffleVectorInst, fold the result or return null. Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q) { return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } static Constant *propagateNaN(Constant *In) { // If the input is a vector with undef elements, just return a default NaN. if (!In->isNaN()) return ConstantFP::getNaN(In->getType()); // Propagate the existing NaN constant when possible. // TODO: Should we quiet a signaling NaN? return In; } static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { if (isa(Op0) || isa(Op1)) return ConstantFP::getNaN(Op0->getType()); if (match(Op0, m_NaN())) return propagateNaN(cast(Op0)); if (match(Op1, m_NaN())) return propagateNaN(cast(Op1)); return nullptr; } /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; if (Constant *C = simplifyFPBinop(Op0, Op1)) return C; // fadd X, -0 ==> X if (match(Op1, m_NegZeroFP())) return Op0; // fadd X, 0 ==> X, when we know X is not -0 if (match(Op1, m_PosZeroFP()) && (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) return Op0; // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant) // We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN. // Negative zeros are allowed because we always end up with positive zero: // X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 // X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0 // X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0 // X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0 if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) || match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))) return ConstantFP::getNullValue(Op0->getType()); return nullptr; } /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; if (Constant *C = simplifyFPBinop(Op0, Op1)) return C; // fsub X, +0 ==> X if (match(Op1, m_PosZeroFP())) return Op0; // fsub X, -0 ==> X, when we know X is not -0 if (match(Op1, m_NegZeroFP()) && (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI))) return Op0; // fsub -0.0, (fsub -0.0, X) ==> X Value *X; if (match(Op0, m_NegZeroFP()) && match(Op1, m_FSub(m_NegZeroFP(), m_Value(X)))) return X; // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) && match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X)))) return X; // fsub nnan x, x ==> 0.0 if (FMF.noNaNs() && Op0 == Op1) return Constant::getNullValue(Op0->getType()); return nullptr; } /// Given the operands for an FMul, see if we can fold the result static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; if (Constant *C = simplifyFPBinop(Op0, Op1)) return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) return Op0; // fmul nnan nsz X, 0 ==> 0 if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); // sqrt(X) * sqrt(X) --> X, if we can: // 1. Remove the intermediate rounding (reassociate). // 2. Ignore non-zero negative numbers because sqrt would produce NAN. // 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0. Value *X; if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X))) && FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros()) return X; return nullptr; } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; if (Constant *C = simplifyFPBinop(Op0, Op1)) return C; // X / 1.0 -> X if (match(Op1, m_FPOne())) return Op0; // 0 / X -> 0 // Requires that NaNs are off (X could be zero) and signed zeroes are // ignored (X could be positive or negative, so the output sign is unknown). if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); if (FMF.noNaNs()) { // X / X -> 1.0 is legal when NaNs are ignored. // We can ignore infinities because INF/INF is NaN. if (Op0 == Op1) return ConstantFP::get(Op0->getType(), 1.0); // (X * Y) / Y --> X if we can reassociate to the above form. Value *X; if (FMF.allowReassoc() && match(Op0, m_c_FMul(m_Value(X), m_Specific(Op1)))) return X; // -X / X -> -1.0 and // X / -X -> -1.0 are legal when NaNs are ignored. // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored. if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) && BinaryOperator::getFNegArgument(Op0) == Op1) || (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) && BinaryOperator::getFNegArgument(Op1) == Op0)) return ConstantFP::get(Op0->getType(), -1.0); } return nullptr; } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; if (Constant *C = simplifyFPBinop(Op0, Op1)) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. // The constant match may include undef elements in a vector, so return a full // zero constant as the result. if (FMF.noNaNs()) { // +0 % X -> 0 if (match(Op0, m_PosZeroFP())) return ConstantFP::getNullValue(Op0->getType()); // -0 % X -> -0 if (match(Op0, m_NegZeroFP())) return ConstantFP::getNegativeZero(Op0->getType()); } return nullptr; } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); } //=== Helper functions for higher up the class hierarchy. /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::Sub: return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::Mul: return SimplifyMulInst(LHS, RHS, Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::LShr: return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse); case Instruction::AShr: return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse); case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); case Instruction::Or: return SimplifyOrInst(LHS, RHS, Q, MaxRecurse); case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FSub: return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FMul: return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); default: llvm_unreachable("Unexpected opcode"); } } /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FSub: return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FMul: return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse); default: return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); } } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q) { return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } static bool IsIdempotent(Intrinsic::ID ID) { switch (ID) { default: return false; // Unary idempotent: f(f(x)) = f(x) case Intrinsic::fabs: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: case Intrinsic::canonicalize: return true; } } static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset, const DataLayout &DL) { GlobalValue *PtrSym; APInt PtrOffset; if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL)) return nullptr; Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext()); Type *Int32Ty = Type::getInt32Ty(Ptr->getContext()); Type *Int32PtrTy = Int32Ty->getPointerTo(); Type *Int64Ty = Type::getInt64Ty(Ptr->getContext()); auto *OffsetConstInt = dyn_cast(Offset); if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64) return nullptr; uint64_t OffsetInt = OffsetConstInt->getSExtValue(); if (OffsetInt % 4 != 0) return nullptr; Constant *C = ConstantExpr::getGetElementPtr( Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy), ConstantInt::get(Int64Ty, OffsetInt / 4)); Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL); if (!Loaded) return nullptr; auto *LoadedCE = dyn_cast(Loaded); if (!LoadedCE) return nullptr; if (LoadedCE->getOpcode() == Instruction::Trunc) { LoadedCE = dyn_cast(LoadedCE->getOperand(0)); if (!LoadedCE) return nullptr; } if (LoadedCE->getOpcode() != Instruction::Sub) return nullptr; auto *LoadedLHS = dyn_cast(LoadedCE->getOperand(0)); if (!LoadedLHS || LoadedLHS->getOpcode() != Instruction::PtrToInt) return nullptr; auto *LoadedLHSPtr = LoadedLHS->getOperand(0); Constant *LoadedRHS = LoadedCE->getOperand(1); GlobalValue *LoadedRHSSym; APInt LoadedRHSOffset; if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset, DL) || PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset) return nullptr; return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy); } static bool maskIsAllZeroOrUndef(Value *Mask) { auto *ConstMask = dyn_cast(Mask); if (!ConstMask) return false; if (ConstMask->isNullValue() || isa(ConstMask)) return true; for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E; ++I) { if (auto *MaskElt = ConstMask->getAggregateElement(I)) if (MaskElt->isNullValue() || isa(MaskElt)) continue; return false; } return true; } static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, const SimplifyQuery &Q) { // Idempotent functions return the same result when called repeatedly. Intrinsic::ID IID = F->getIntrinsicID(); if (IsIdempotent(IID)) if (auto *II = dyn_cast(Op0)) if (II->getIntrinsicID() == IID) return II; Value *X; switch (IID) { case Intrinsic::fabs: if (SignBitMustBeZero(Op0, Q.TLI)) return Op0; break; case Intrinsic::bswap: // bswap(bswap(x)) -> x if (match(Op0, m_BSwap(m_Value(X)))) return X; break; case Intrinsic::bitreverse: // bitreverse(bitreverse(x)) -> x if (match(Op0, m_BitReverse(m_Value(X)))) return X; break; case Intrinsic::exp: // exp(log(x)) -> x if (Q.CxtI->hasAllowReassoc() && match(Op0, m_Intrinsic(m_Value(X)))) return X; break; case Intrinsic::exp2: // exp2(log2(x)) -> x if (Q.CxtI->hasAllowReassoc() && match(Op0, m_Intrinsic(m_Value(X)))) return X; break; case Intrinsic::log: // log(exp(x)) -> x if (Q.CxtI->hasAllowReassoc() && match(Op0, m_Intrinsic(m_Value(X)))) return X; break; case Intrinsic::log2: // log2(exp2(x)) -> x if (Q.CxtI->hasAllowReassoc() && match(Op0, m_Intrinsic(m_Value(X)))) return X; break; default: break; } return nullptr; } static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, const SimplifyQuery &Q) { Intrinsic::ID IID = F->getIntrinsicID(); Type *ReturnType = F->getReturnType(); switch (IID) { case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: // X - X -> { 0, false } if (Op0 == Op1) return Constant::getNullValue(ReturnType); // X - undef -> undef // undef - X -> undef if (isa(Op0) || isa(Op1)) return UndefValue::get(ReturnType); break; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: // X + undef -> undef if (isa(Op0) || isa(Op1)) return UndefValue::get(ReturnType); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: // 0 * X -> { 0, false } // X * 0 -> { 0, false } if (match(Op0, m_Zero()) || match(Op1, m_Zero())) return Constant::getNullValue(ReturnType); // undef * X -> { 0, false } // X * undef -> { 0, false } if (match(Op0, m_Undef()) || match(Op1, m_Undef())) return Constant::getNullValue(ReturnType); break; case Intrinsic::load_relative: if (auto *C0 = dyn_cast(Op0)) if (auto *C1 = dyn_cast(Op1)) return SimplifyRelativeLoad(C0, C1, Q.DL); break; case Intrinsic::powi: if (auto *Power = dyn_cast(Op1)) { // powi(x, 0) -> 1.0 if (Power->isZero()) return ConstantFP::get(Op0->getType(), 1.0); // powi(x, 1) -> x if (Power->isOne()) return Op0; } break; case Intrinsic::maxnum: case Intrinsic::minnum: // If one argument is NaN, return the other argument. if (match(Op0, m_NaN())) return Op1; if (match(Op1, m_NaN())) return Op0; break; default: break; } return nullptr; } template static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, const SimplifyQuery &Q) { // Intrinsics with no operands have some kind of side effect. Don't simplify. unsigned NumOperands = std::distance(ArgBegin, ArgEnd); if (NumOperands == 0) return nullptr; Intrinsic::ID IID = F->getIntrinsicID(); if (NumOperands == 1) return simplifyUnaryIntrinsic(F, ArgBegin[0], Q); if (NumOperands == 2) return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q); // Handle intrinsics with 3 or more arguments. switch (IID) { case Intrinsic::masked_load: { Value *MaskArg = ArgBegin[2]; Value *PassthruArg = ArgBegin[3]; // If the mask is all zeros or undef, the "passthru" argument is the result. if (maskIsAllZeroOrUndef(MaskArg)) return PassthruArg; return nullptr; } case Intrinsic::fshl: case Intrinsic::fshr: { Value *ShAmtArg = ArgBegin[2]; const APInt *ShAmtC; if (match(ShAmtArg, m_APInt(ShAmtC))) { // If there's effectively no shift, return the 1st arg or 2nd arg. // TODO: For vectors, we could check each element of a non-splat constant. APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth()); if (ShAmtC->urem(BitWidth).isNullValue()) return ArgBegin[IID == Intrinsic::fshl ? 0 : 1]; } return nullptr; } default: return nullptr; } } template static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, IterTy ArgEnd, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *Ty = V->getType(); if (PointerType *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); FunctionType *FTy = cast(Ty); // call undef -> undef // call null -> undef if (isa(V) || isa(V)) return UndefValue::get(FTy->getReturnType()); Function *F = dyn_cast(V); if (!F) return nullptr; if (F->isIntrinsic()) if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q)) return Ret; if (!canConstantFoldCallTo(CS, F)) return nullptr; SmallVector ConstantArgs; ConstantArgs.reserve(ArgEnd - ArgBegin); for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { Constant *C = dyn_cast(*I); if (!C) return nullptr; ConstantArgs.push_back(C); } return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); } Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q) { return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit); } Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef Args, const SimplifyQuery &Q) { return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) { CallSite CS(const_cast(ICS.getInstruction())); return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. /// If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I); Value *Result; switch (I->getOpcode()) { default: Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Mul: Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::SDiv: Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::UDiv: Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::SRem: Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::URem: Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FRem: Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), Q); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), Q); break; case Instruction::And: Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Or: Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Xor: Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast(I)->getPredicate(), I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast(I)->getPredicate(), I->getOperand(0), I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), I->getOperand(2), Q); break; case Instruction::GetElementPtr: { SmallVector Ops(I->op_begin(), I->op_end()); Result = SimplifyGEPInst(cast(I)->getSourceElementType(), Ops, Q); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), IV->getIndices(), Q); break; } case Instruction::InsertElement: { auto *IE = cast(I); Result = SimplifyInsertElementInst(IE->getOperand(0), IE->getOperand(1), IE->getOperand(2), Q); break; } case Instruction::ExtractValue: { auto *EVI = cast(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), EVI->getIndices(), Q); break; } case Instruction::ExtractElement: { auto *EEI = cast(I); Result = SimplifyExtractElementInst(EEI->getVectorOperand(), EEI->getIndexOperand(), Q); break; } case Instruction::ShuffleVector: { auto *SVI = cast(I); Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), SVI->getMask(), SVI->getType(), Q); break; } case Instruction::PHI: Result = SimplifyPHINode(cast(I), Q); break; case Instruction::Call: { CallSite CS(cast(I)); Result = SimplifyCall(CS, Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q); break; case Instruction::Alloca: // No simplifications for Alloca and it can't be constant folded. Result = nullptr; break; } // In general, it is possible for computeKnownBits to determine all bits in a // value even when the operands are not all constants. if (!Result && I->getType()->isIntOrIntVectorTy()) { KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); if (Known.isConstant()) Result = ConstantInt::get(I->getType(), Known.getConstant()); } /// If called on unreachable code, the above logic may report that the /// instruction simplified to itself. Make life easier for users by /// detecting that case here, returning a safe value instead. return Result == I ? UndefValue::get(I->getType()) : Result; } /// Implementation of recursive simplification through an instruction's /// uses. /// /// This is the common implementation of the recursive simplification routines. /// If we have a pre-simplified value in 'SimpleV', that is forcibly used to /// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of /// instructions to process and attempt to simplify it using /// InstructionSimplify. /// /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { bool Simplified = false; SmallSetVector Worklist; const DataLayout &DL = I->getModule()->getDataLayout(); // If we have an explicit value to collapse to, do that round of the // simplification loop by hand initially. if (SimpleV) { for (User *U : I->users()) if (U != I) Worklist.insert(cast(U)); // Replace the instruction with its simplified value. I->replaceAllUsesWith(SimpleV); // Gracefully handle edge cases where the instruction is not wired into any // parent block. if (I->getParent() && !I->isEHPad() && !isa(I) && !I->mayHaveSideEffects()) I->eraseFromParent(); } else { Worklist.insert(I); } // Note that we must test the size on each iteration, the worklist can grow. for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { I = Worklist[Idx]; // See if this instruction simplifies. SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (!SimpleV) continue; Simplified = true; // Stash away all the uses of the old instruction so we can check them for // recursive simplifications after a RAUW. This is cheaper than checking all // uses of To on the recursive step in most cases. for (User *U : I->users()) Worklist.insert(cast(U)); // Replace the instruction with its simplified value. I->replaceAllUsesWith(SimpleV); // Gracefully handle edge cases where the instruction is not wired into any // parent block. if (I->getParent() && !I->isEHPad() && !isa(I) && !I->mayHaveSideEffects()) I->eraseFromParent(); } return Simplified; } bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); } namespace llvm { const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { auto *DTWP = P.getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *TLIWP = P.getAnalysisIfAvailable(); auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; auto *ACWP = P.getAnalysisIfAvailable(); auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; return {F.getParent()->getDataLayout(), TLI, DT, AC}; } const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR, const DataLayout &DL) { return {DL, &AR.TLI, &AR.DT, &AR.AC}; } template const SimplifyQuery getBestSimplifyQuery(AnalysisManager &AM, Function &F) { auto *DT = AM.template getCachedResult(F); auto *TLI = AM.template getCachedResult(F); auto *AC = AM.template getCachedResult(F); return {F.getParent()->getDataLayout(), TLI, DT, AC}; } template const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, Function &); } Index: projects/clang700-import/contrib/llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/Analysis/ValueTracking.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Analysis/ValueTracking.cpp (revision 337645) @@ -1,5132 +1,5135 @@ //===- ValueTracking.cpp - Walk computations to compute properties --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains routines that help analyze properties that chains of // computations have. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include #include #include #include #include using namespace llvm; using namespace llvm::PatternMatch; const unsigned MaxDepth = 6; // Controls the number of uses of the value searched for possible // dominating comparisons. static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; return DL.getIndexTypeSizeInBits(Ty); } namespace { // Simplifying using an assume can only be done in a particular control-flow // context (the context instruction provides that context). If an assume and // the context instruction are not in the same block then the DT helps in // figuring out if we can use it. struct Query { const DataLayout &DL; AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; // Unlike the other analyses, this may be a nullptr because not all clients // provide it currently. OptimizationRemarkEmitter *ORE; /// Set of assumptions that should be excluded from further queries. /// This is because of the potential for mutual recursion to cause /// computeKnownBits to repeatedly visit the same assume intrinsic. The /// classic case of this is assume(x = y), which will attempt to determine /// bits in x from bits in y, which will attempt to determine bits in y from /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo /// (all of which can call computeKnownBits), and so on. std::array Excluded; unsigned NumExcluded = 0; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr) : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {} Query(const Query &Q, const Value *NewExcl) : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), NumExcluded(Q.NumExcluded) { Excluded = Q.Excluded; Excluded[NumExcluded++] = NewExcl; assert(NumExcluded <= Excluded.size()); } bool isExcluded(const Value *Value) const { if (NumExcluded == 0) return false; auto End = Excluded.begin() + NumExcluded; return std::find(Excluded.begin(), End, Value) != End; } }; } // end anonymous namespace // Given the provided Value and, potentially, a context instruction, return // the preferred context instruction (if any). static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { // If we've been provided with a context instruction, then use that (provided // it has been inserted). if (CxtI && CxtI->getParent()) return CxtI; // If the value is really an already-inserted instruction, then use that. CxtI = dyn_cast(V); if (CxtI && CxtI->getParent()) return CxtI; return nullptr; } static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q); void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE) { ::computeKnownBits(V, Known, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } static KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q); KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE) { return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { assert(LHS->getType() == RHS->getType() && "LHS and RHS should have the same type"); assert(LHS->getType()->isIntOrIntVectorTy() && "LHS and RHS should be integers"); // Look for an inverted mask: (X & ~M) op (Y & M). Value *M; if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && match(RHS, m_c_And(m_Specific(M), m_Value()))) return true; if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && match(LHS, m_c_And(m_Specific(M), m_Value()))) return true; IntegerType *IT = cast(LHS->getType()->getScalarType()); KnownBits LHSKnown(IT->getBitWidth()); KnownBits RHSKnown(IT->getBitWidth()); computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT); computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT); return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) { for (const User *U : CxtI->users()) { if (const ICmpInst *IC = dyn_cast(U)) if (IC->isEquality()) if (Constant *C = dyn_cast(IC->getOperand(1))) if (C->isNullValue()) continue; return false; } return true; } static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q); bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q); bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); return Known.isNonNegative(); } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { if (auto *CI = dyn_cast(V)) return CI->getValue().isStrictlyPositive(); // TODO: We'd doing two recursive queries here. We should factor this such // that only a single query is needed. return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) && isKnownNonZero(V, DL, Depth, AC, CxtI, DT); } bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); return Known.isNegative(); } static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q); bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::isKnownNonEqual(V1, V2, Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT)); } static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q); bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::MaskedValueIsZero(V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q); unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); } static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, bool NSW, KnownBits &KnownOut, KnownBits &Known2, unsigned Depth, const Query &Q) { unsigned BitWidth = KnownOut.getBitWidth(); // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. KnownBits LHSKnown(BitWidth); computeKnownBits(Op0, LHSKnown, Depth + 1, Q); computeKnownBits(Op1, Known2, Depth + 1, Q); KnownOut = KnownBits::computeForAddSub(Add, NSW, LHSKnown, Known2); } static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q) { unsigned BitWidth = Known.getBitWidth(); computeKnownBits(Op1, Known, Depth + 1, Q); computeKnownBits(Op0, Known2, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; // If the multiplication is known not to overflow, compute the sign bit. if (NSW) { if (Op0 == Op1) { // The product of a number with itself is non-negative. isKnownNonNegative = true; } else { bool isKnownNonNegativeOp1 = Known.isNonNegative(); bool isKnownNonNegativeOp0 = Known2.isNonNegative(); bool isKnownNegativeOp1 = Known.isNegative(); bool isKnownNegativeOp0 = Known2.isNegative(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); // The product of a negative number and a non-negative number is either // negative or zero. if (!isKnownNonNegative) isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && isKnownNonZero(Op0, Depth, Q)) || (isKnownNegativeOp0 && isKnownNonNegativeOp1 && isKnownNonZero(Op1, Depth, Q)); } } assert(!Known.hasConflict() && !Known2.hasConflict()); // Compute a conservative estimate for high known-0 bits. unsigned LeadZ = std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; LeadZ = std::min(LeadZ, BitWidth); // The result of the bottom bits of an integer multiply can be // inferred by looking at the bottom bits of both operands and // multiplying them together. // We can infer at least the minimum number of known trailing bits // of both operands. Depending on number of trailing zeros, we can // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming // a and b are divisible by m and n respectively. // We then calculate how many of those bits are inferrable and set // the output. For example, the i8 mul: // a = XXXX1100 (12) // b = XXXX1110 (14) // We know the bottom 3 bits are zero since the first can be divided by // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). // Applying the multiplication to the trimmed arguments gets: // XX11 (3) // X111 (7) // ------- // XX11 // XX11 // XX11 // XX11 // ------- // XXXXX01 // Which allows us to infer the 2 LSBs. Since we're multiplying the result // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. // The proof for this can be described as: // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + // umin(countTrailingZeros(C2), C6) + // umin(C5 - umin(countTrailingZeros(C1), C5), // C6 - umin(countTrailingZeros(C2), C6)))) - 1) // %aa = shl i8 %a, C5 // %bb = shl i8 %b, C6 // %aaa = or i8 %aa, C1 // %bbb = or i8 %bb, C2 // %mul = mul i8 %aaa, %bbb // %mask = and i8 %mul, C7 // => // %mask = i8 ((C1*C2)&C7) // Where C5, C6 describe the known bits of %a, %b // C1, C2 describe the known bottom bits of %a, %b. // C7 describes the mask of the known bits of the result. APInt Bottom0 = Known.One; APInt Bottom1 = Known2.One; // How many times we'd be able to divide each argument by 2 (shr by 1). // This gives us the number of trailing zeros on the multiplication result. unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes(); unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes(); unsigned TrailZero0 = Known.countMinTrailingZeros(); unsigned TrailZero1 = Known2.countMinTrailingZeros(); unsigned TrailZ = TrailZero0 + TrailZero1; // Figure out the fewest known-bits operand. unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0, TrailBitsKnown1 - TrailZero1); unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) * Bottom1.getLoBits(TrailBitsKnown1); Known.resetAll(); Known.Zero.setHighBits(LeadZ); Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); Known.One |= BottomKnown.getLoBits(ResultBitsKnown); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in // which case we prefer to follow the result of the direct computation, // though as the program is invoking undefined behaviour we can choose // whatever we like here. if (isKnownNonNegative && !Known.isNegative()) Known.makeNonNegative(); else if (isKnownNegative && !Known.isNonNegative()) Known.makeNegative(); } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known) { unsigned BitWidth = Known.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = mdconst::extract(Ranges.getOperand(2 * i + 0)); ConstantInt *Upper = mdconst::extract(Ranges.getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); // The first CommonPrefixBits of all values in Range are equal. unsigned CommonPrefixBits = (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); Known.One &= Range.getUnsignedMax() & Mask; Known.Zero &= ~Range.getUnsignedMax() & Mask; } } static bool isEphemeralValueOf(const Instruction *I, const Value *E) { SmallVector WorkSet(1, I); SmallPtrSet Visited; SmallPtrSet EphValues; // The instruction defining an assumption's condition itself is always // considered ephemeral to that assumption (even if it has other // non-ephemeral users). See r246696's test case for an example. if (is_contained(I->operands(), E)) return true; while (!WorkSet.empty()) { const Value *V = WorkSet.pop_back_val(); if (!Visited.insert(V).second) continue; // If all uses of this value are ephemeral, then so is this value. if (llvm::all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) { if (V == E) return true; if (V == I || isSafeToSpeculativelyExecute(V)) { EphValues.insert(V); if (const User *U = dyn_cast(V)) for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); J != JE; ++J) WorkSet.push_back(*J); } } } return false; } // Is this an intrinsic that cannot be speculated but also cannot trap? bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { if (const CallInst *CI = dyn_cast(I)) if (Function *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { default: break; // FIXME: This list is repeated from NoTTI::getIntrinsicCost. case Intrinsic::assume: case Intrinsic::sideeffect: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: return true; } return false; } bool llvm::isValidAssumeForContext(const Instruction *Inv, const Instruction *CxtI, const DominatorTree *DT) { // There are two restrictions on the use of an assume: // 1. The assume must dominate the context (or the control flow must // reach the assume whenever it reaches the context). // 2. The context must not be in the assume's set of ephemeral values // (otherwise we will use the assume to prove that the condition // feeding the assume is trivially true, thus causing the removal of // the assume). if (DT) { if (DT->dominates(Inv, CxtI)) return true; } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { // We don't have a DT, but this trivially dominates. return true; } // With or without a DT, the only remaining case we will check is if the // instructions are in the same BB. Give up if that is not the case. if (Inv->getParent() != CxtI->getParent()) return false; // If we have a dom tree, then we now know that the assume doesn't dominate // the other instruction. If we don't have a dom tree then we can check if // the assume is first in the BB. if (!DT) { // Search forward from the assume until we reach the context (or the end // of the block); the common case is that the assume will come first. for (auto I = std::next(BasicBlock::const_iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) if (&*I == CxtI) return true; } // The context comes first, but they're both in the same block. Make sure // there is nothing in between that might interrupt the control flow. for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); I != IE; ++I) if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, CxtI); } static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! if (!Q.AC || !Q.CxtI) return; unsigned BitWidth = Known.getBitWidth(); // Note that the patterns below need to be kept in sync with the code // in AssumptionCache::updateAffectedValues. for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { if (!AssumeVH) continue; CallInst *I = cast(AssumeVH); assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && "Got assumption for the wrong function!"); if (Q.isExcluded(I)) continue; // Warning: This loop can end up being somewhat performance sensitive. // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); Value *Arg = I->getArgOperand(0); if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); Known.setAllOnes(); return; } if (match(Arg, m_Not(m_Specific(V))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); Known.setAllZero(); return; } // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth == MaxDepth) continue; Value *A, *B; auto m_V = m_CombineOr(m_Specific(V), m_CombineOr(m_PtrToInt(m_Specific(V)), m_BitCast(m_Specific(V)))); CmpInst::Predicate Pred; uint64_t C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); Known.Zero |= RHSKnown.Zero; Known.One |= RHSKnown.One; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits MaskKnown(BitWidth); computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. Known.Zero |= RHSKnown.Zero & MaskKnown.One; Known.One |= RHSKnown.One & MaskKnown.One; // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits MaskKnown(BitWidth); computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. Known.Zero |= RHSKnown.One & MaskKnown.One; Known.One |= RHSKnown.Zero & MaskKnown.One; // assume(v | b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. Known.Zero |= RHSKnown.Zero & BKnown.Zero; Known.One |= RHSKnown.One & BKnown.Zero; // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. Known.Zero |= RHSKnown.One & BKnown.Zero; Known.One |= RHSKnown.Zero & BKnown.Zero; // assume(v ^ b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, // we can propagate inverted known bits from the RHS to V. Known.Zero |= RHSKnown.Zero & BKnown.Zero; Known.One |= RHSKnown.One & BKnown.Zero; Known.Zero |= RHSKnown.One & BKnown.One; Known.One |= RHSKnown.Zero & BKnown.One; // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); KnownBits BKnown(BitWidth); computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are // known to be one, we can propagate known bits from the RHS to V. Known.Zero |= RHSKnown.One & BKnown.Zero; Known.One |= RHSKnown.Zero & BKnown.Zero; Known.Zero |= RHSKnown.Zero & BKnown.One; Known.One |= RHSKnown.One & BKnown.One; // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. RHSKnown.Zero.lshrInPlace(C); Known.Zero |= RHSKnown.Zero; RHSKnown.One.lshrInPlace(C); Known.One |= RHSKnown.One; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. RHSKnown.One.lshrInPlace(C); Known.Zero |= RHSKnown.One; RHSKnown.Zero.lshrInPlace(C); Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. Known.Zero |= RHSKnown.Zero << C; Known.One |= RHSKnown.One << C; // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. Known.Zero |= RHSKnown.One << C; Known.One |= RHSKnown.Zero << C; // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); if (RHSKnown.isNonNegative()) { // We know that the sign bit is zero. Known.makeNonNegative(); } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { // We know that the sign bit is zero. Known.makeNonNegative(); } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); if (RHSKnown.isNegative()) { // We know that the sign bit is one. Known.makeNegative(); } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); if (RHSKnown.isZero() || RHSKnown.isNegative()) { // We know that the sign bit is one. Known.makeNegative(); } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // If the RHS is known zero, then this assumption must be wrong (nothing // is unsigned less than zero). Signal a conflict and get out of here. if (RHSKnown.isZero()) { Known.Zero.setAllBits(); Known.One.setAllBits(); break; } // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); else Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); } } // If assumptions conflict with each other or previous known bits, then we // have a logical fallacy. It's possible that the assumption is not reachable, // so this isn't a real bug. On the other hand, the program may have undefined // behavior, or we might have a bug in the compiler. We can't assert/crash, so // clear out the known bits, try to warn the user, and hope for the best. if (Known.Zero.intersects(Known.One)) { Known.resetAll(); if (Q.ORE) Q.ORE->emit([&]() { auto *CxtI = const_cast(Q.CxtI); return OptimizationRemarkAnalysis("value-tracking", "BadAssumption", CxtI) << "Detected conflicting code assumptions. Program may " "have undefined behavior, or compiler may have " "internal error."; }); } } /// Compute known bits from a shift operator, including those with a /// non-constant shift amount. Known is the output of this function. Known2 is a /// pre-allocated temporary with the same bit width as Known. KZF and KOF are /// operator-specific functions that, given the known-zero or known-one bits /// respectively, and a shift amount, compute the implied known-zero or /// known-one bits of the shift operator's result respectively for that shift /// amount. The results from calling KZF and KOF are conservatively combined for /// all permitted shift amounts. static void computeKnownBitsFromShiftOperator( const Operator *I, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q, function_ref KZF, function_ref KOF) { unsigned BitWidth = Known.getBitWidth(); if (auto *SA = dyn_cast(I->getOperand(1))) { unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); Known.Zero = KZF(Known.Zero, ShiftAmt); Known.One = KOF(Known.One, ShiftAmt); // If the known bits conflict, this must be an overflowing left shift, so // the shift result is poison. We can return anything we want. Choose 0 for // the best folding opportunity. if (Known.hasConflict()) Known.setAllZero(); return; } computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); // If the shift amount could be greater than or equal to the bit-width of the // LHS, the value could be poison, but bail out because the check below is // expensive. TODO: Should we just carry on? if ((~Known.Zero).uge(BitWidth)) { Known.resetAll(); return; } // Note: We cannot use Known.Zero.getLimitedValue() here, because if // BitWidth > 64 and any upper bits are known, we'll end up returning the // limit value (which implies all bits are known). uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); // It would be more-clearly correct to use the two temporaries for this // calculation. Reusing the APInts here to prevent unnecessary allocations. Known.resetAll(); // If we know the shifter operand is nonzero, we can sometimes infer more // known bits. However this is expensive to compute, so be lazy about it and // only compute it when absolutely necessary. Optional ShifterOperandIsNonZero; // Early exit if we can't constrain any well-defined shift amount. if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { // Combine the shifted known input bits only for those shift amounts // compatible with its known constraints. if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) continue; if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) continue; // If we know the shifter is nonzero, we may be able to infer more known // bits. This check is sunk down as far as possible to avoid the expensive // call to isKnownNonZero if the cheaper checks above fail. if (ShiftAmt == 0) { if (!ShifterOperandIsNonZero.hasValue()) ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (*ShifterOperandIsNonZero) continue; } Known.Zero &= KZF(Known2.Zero, ShiftAmt); Known.One &= KOF(Known2.One, ShiftAmt); } // If the known bits conflict, the result is poison. Return a 0 and hope the // caller can further optimize that. if (Known.hasConflict()) Known.setAllZero(); } static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, unsigned Depth, const Query &Q) { unsigned BitWidth = Known.getBitWidth(); KnownBits Known2(Known); switch (I->getOpcode()) { default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, Known); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. Known.Zero |= Known2.Zero; // and(x, add (x, -1)) is a common idiom that always clears the low bit; // here we handle the more general case of adding any odd number by // matching the form add(x, add(x, y)) where y is odd. // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. Value *X = nullptr, *Y = nullptr; if (!Known.Zero[0] && !Known.One[0] && match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { Known2.resetAll(); computeKnownBits(Y, Known2, Depth + 1, Q); if (Known2.countMinTrailingOnes() > 0) Known.Zero.setBit(0); } break; } case Instruction::Or: computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. Known.One |= Known2.One; break; case Instruction::Xor: { computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); Known.Zero = std::move(KnownZeroOut); break; } case Instruction::Mul: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known, Known2, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); unsigned LeadZ = Known2.countMinLeadingZeros(); Known2.resetAll(); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); if (RHSMaxLeadingZeros != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); Known.Zero.setHighBits(LeadZ); break; } case Instruction::Select: { const Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); computeKnownBits(LHS, Known2, Depth + 1, Q); } else { computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); } unsigned MaxHighOnes = 0; unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. if (Known.isNegative() && Known2.isNegative()) // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); // If either side is non-negative, the result is non-negative. else if (Known.isNonNegative() || Known2.isNonNegative()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. if (Known.isNonNegative() && Known2.isNonNegative()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); // If either side is negative, the result is negative. else if (Known.isNegative() || Known2.isNegative()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); } else if (SPF == SPF_UMIN) { // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); } else if (SPF == SPF_ABS) { // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. if (cast(RHS)->hasNoSignedWrap()) MaxHighZeros = 1; } // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Known.Zero &= Known2.Zero; if (MaxHighOnes > 0) Known.One.setHighBits(MaxHighOnes); if (MaxHighZeros > 0) Known.Zero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::SIToFP: case Instruction::UIToFP: break; // Can't work with floating point. case Instruction::PtrToInt: case Instruction::IntToPtr: // Fall through and handle them the same as zext/trunc. LLVM_FALLTHROUGH; case Instruction::ZExt: case Instruction::Trunc: { Type *SrcTy = I->getOperand(0)->getType(); unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. Type *ScalarTy = SrcTy->getScalarType(); SrcBitWidth = ScalarTy->isPointerTy() ? Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); assert(SrcBitWidth && "SrcBitWidth can't be zero"); Known = Known.zextOrTrunc(SrcBitWidth); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); Known = Known.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) Known.Zero.setBitsFrom(SrcBitWidth); break; } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); if (SrcTy->isIntOrPtrTy() && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; } break; } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); Known = Known.trunc(SrcBitWidth); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. Known = Known.sext(BitWidth); break; } case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 bool NSW = cast(I)->hasNoSignedWrap(); auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) { APInt KZResult = KnownZero << ShiftAmt; KZResult.setLowBits(ShiftAmt); // Low bits known 0. // If this shift has "nsw" keyword, then the result is either a poison // value or has the same sign bit as the first operand. if (NSW && KnownZero.isSignBitSet()) KZResult.setSignBit(); return KZResult; }; auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) { APInt KOResult = KnownOne << ShiftAmt; if (NSW && KnownOne.isSignBitSet()) KOResult.setSignBit(); return KOResult; }; computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::LShr: { // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { APInt KZResult = KnownZero.lshr(ShiftAmt); // High bits known zero. KZResult.setHighBits(ShiftAmt); return KZResult; }; auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { return KnownOne.lshr(ShiftAmt); }; computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::AShr: { // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { return KnownZero.ashr(ShiftAmt); }; auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { return KnownOne.ashr(ShiftAmt); }; computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, Known, Known2, Depth, Q); break; } case Instruction::Add: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, Known, Known2, Depth, Q); break; } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // The low bits of the first operand are unchanged by the srem. Known.Zero = Known2.Zero & LowBits; Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If it's known zero, our sign bit is also zero. if (Known2.isNonNegative()) Known.makeNonNegative(); break; case Instruction::URem: { if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { const APInt &RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); Known.Zero |= ~LowBits; Known.One &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); unsigned Leaders = std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); Known.resetAll(); Known.Zero.setHighBits(Leaders); break; } case Instruction::Alloca: { const AllocaInst *AI = cast(I); unsigned Align = AI->getAlignment(); if (Align == 0) Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); if (Align > 0) Known.Zero.setLowBits(countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. KnownBits LocalKnown(BitWidth); computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); unsigned TrailZ = LocalKnown.countMinTrailingZeros(); gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { Value *Index = I->getOperand(i); if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. // Handle case when index is vector zeroinitializer Constant *CIndex = cast(Index); if (CIndex->isZeroValue()) continue; if (CIndex->getType()->isVectorTy()) Index = CIndex->getSplatValue(); unsigned Idx = cast(Index)->getZExtValue(); const StructLayout *SL = Q.DL.getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); TrailZ = std::min(TrailZ, countTrailingZeros(Offset)); } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); if (!IndexedTy->isSized()) { TrailZ = 0; break; } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy); LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); computeKnownBits(Index, LocalKnown, Depth + 1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnown.countMinTrailingZeros())); } } Known.Zero.setLowBits(TrailZ); break; } case Instruction::PHI: { const PHINode *P = cast(I); // Handle the case of a simple two-predecessor recurrence PHI. // There's a lot more that could theoretically be done here, but // this is sufficient to catch some interesting cases. if (P->getNumIncomingValues() == 2) { for (unsigned i = 0; i != 2; ++i) { Value *L = P->getIncomingValue(i); Value *R = P->getIncomingValue(!i); Operator *LU = dyn_cast(L); if (!LU) continue; unsigned Opcode = LU->getOpcode(); // Check for operations that have the property that if // both their operands have low zero bits, the result // will have low zero bits. if (Opcode == Instruction::Add || Opcode == Instruction::Sub || Opcode == Instruction::And || Opcode == Instruction::Or || Opcode == Instruction::Mul) { Value *LL = LU->getOperand(0); Value *LR = LU->getOperand(1); // Find a recurrence. if (LL == I) L = LR; else if (LR == I) L = LL; else break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. computeKnownBits(R, Known2, Depth + 1, Q); // We need to take the minimum number of known bits KnownBits Known3(Known); computeKnownBits(L, Known3, Depth + 1, Q); Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), Known3.countMinTrailingZeros())); auto *OverflowOp = dyn_cast(LU); if (OverflowOp && OverflowOp->hasNoSignedWrap()) { // If initial value of recurrence is nonnegative, and we are adding // a nonnegative number with nsw, the result can only be nonnegative // or poison value regardless of the number of times we execute the // add in phi recurrence. If initial value is negative and we are // adding a negative number with nsw, the result can only be // negative or poison value. Similar arguments apply to sub and mul. // // (add non-negative, non-negative) --> non-negative // (add negative, negative) --> negative if (Opcode == Instruction::Add) { if (Known2.isNonNegative() && Known3.isNonNegative()) Known.makeNonNegative(); else if (Known2.isNegative() && Known3.isNegative()) Known.makeNegative(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { if (Known2.isNonNegative() && Known3.isNegative()) Known.makeNonNegative(); else if (Known2.isNegative() && Known3.isNonNegative()) Known.makeNegative(); } // (mul nsw non-negative, non-negative) --> non-negative else if (Opcode == Instruction::Mul && Known2.isNonNegative() && Known3.isNonNegative()) Known.makeNonNegative(); } break; } } } // Unreachable blocks may have zero-operand PHI nodes. if (P->getNumIncomingValues() == 0) break; // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) { // Skip if every incoming value references to ourself. if (dyn_cast_or_null(P->hasConstantValue())) break; Known.Zero.setAllBits(); Known.One.setAllBits(); for (Value *IncValue : P->incoming_values()) { // Skip direct self references. if (IncValue == P) continue; Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. computeKnownBits(IncValue, Known2, MaxDepth - 1, Q); Known.Zero &= Known2.Zero; Known.One &= Known2.One; // If all bits have been ruled out, there's no need to check // more operands. if (!Known.Zero && !Known.One) break; } } break; } case Instruction::Call: case Instruction::Invoke: // If range metadata is attached to this call, set known bits from that, // and then intersect with known bits based on other properties of the // function. if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, Known); if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { computeKnownBits(RV, Known2, Depth + 1, Q); Known.Zero |= Known2.Zero; Known.One |= Known2.One; } if (const IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bitreverse: computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); Known.Zero |= Known2.Zero.reverseBits(); Known.One |= Known2.One.reverseBits(); break; case Intrinsic::bswap: computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); Known.Zero |= Known2.Zero.byteSwap(); Known.One |= Known2.One.byteSwap(); break; case Intrinsic::ctlz: { computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.One.countLeadingZeros(); // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleLZ = std::min(PossibleLZ, BitWidth - 1); unsigned LowBits = Log2_32(PossibleLZ)+1; Known.Zero.setBitsFrom(LowBits); break; } case Intrinsic::cttz: { computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.One.countTrailingZeros(); // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleTZ = std::min(PossibleTZ, BitWidth - 1); unsigned LowBits = Log2_32(PossibleTZ)+1; Known.Zero.setBitsFrom(LowBits); break; } case Intrinsic::ctpop: { computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. unsigned BitsPossiblySet = Known2.countMaxPopulation(); unsigned LowBits = Log2_32(BitsPossiblySet)+1; Known.Zero.setBitsFrom(LowBits); // TODO: we could bound KnownOne using the lower bound on the number // of bits which might be set provided by popcnt KnownOne2. break; } case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); break; } } break; case Instruction::ExtractElement: // Look through extract element. At the moment we keep this simple and skip // tracking the specific element. But at least we might find information // valid for all elements of the vector (for example if vector is sign // extended, shifted, etc). computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; case Instruction::ExtractValue: if (IntrinsicInst *II = dyn_cast(I->getOperand(0))) { const ExtractValueInst *EVI = cast(I); if (EVI->getNumIndices() != 1) break; if (EVI->getIndices()[0] == 0) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), II->getArgOperand(1), false, Known, Known2, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), II->getArgOperand(1), false, Known, Known2, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, Known, Known2, Depth, Q); break; } } } } } /// Determine which bits of V are known to be either zero or one and return /// them. KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { KnownBits Known(getBitWidth(V->getType(), Q.DL)); computeKnownBits(V, Known, Depth, Q); return Known; } /// Determine which bits of V are known to be either zero or one and return /// them in the Known bit set. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing /// it to be an explicit zero. If we don't change it to zero, other code could /// optimized based on the contradictory assumption that it is non-zero. /// Because instcombine aggressively folds operations with undef args anyway, /// this won't lose us code quality. /// /// This function is defined on values with integer type, values with pointer /// type, and vectors of integers. In the case /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Known.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy(BitWidth) || V->getType()->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); Type *ScalarTy = V->getType()->getScalarType(); unsigned ExpectedWidth = ScalarTy->isPointerTy() ? Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; (void)ExpectedWidth; const APInt *C; if (match(V, m_APInt(C))) { // We know all of the bits for a scalar constant or a splat vector constant! Known.One = *C; Known.Zero = ~Known.One; return; } // Null and aggregate-zero are all-zeros. if (isa(V) || isa(V)) { Known.setAllZero(); return; } // Handle a constant vector by taking the intersection of the known bits of // each element. if (const ConstantDataSequential *CDS = dyn_cast(V)) { // We know that CDS must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { APInt Elt = CDS->getElementAsAPInt(i); Known.Zero &= ~Elt; Known.One &= Elt; } return; } if (const auto *CV = dyn_cast(V)) { // We know that CV must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null(Element); if (!ElementCI) { Known.resetAll(); return; } const APInt &Elt = ElementCI->getValue(); Known.Zero &= ~Elt; Known.One &= Elt; } return; } // Start out not knowing anything. Known.resetAll(); // We can't imply anything about undefs. if (isa(V)) return; // There's no point in looking through other users of ConstantData for // assumptions. Confirm that we've handled them all. assert(!isa(V) && "Unhandled constant data!"); // Limit search depth. // All recursive calls that increase depth must come after this. if (Depth == MaxDepth) return; // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has // the bits of its aliasee. if (const GlobalAlias *GA = dyn_cast(V)) { if (!GA->isInterposable()) computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); return; } if (const Operator *I = dyn_cast(V)) computeKnownBitsFromOperator(I, Known, Depth, Q); // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { unsigned Align = V->getPointerAlignment(Q.DL); if (Align) Known.Zero.setLowBits(countTrailingZeros(Align)); } // computeKnownBitsFromAssume strictly refines Known. // Therefore, we run them after computeKnownBitsFromOperator. // Check whether a nearby assume intrinsic can determine some known bits. computeKnownBitsFromAssume(V, Known, Depth, Q); assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } /// Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q) { assert(Depth <= MaxDepth && "Limit Search Depth"); // Attempt to match against constants. if (OrZero && match(V, m_Power2OrZero())) return true; if (match(V, m_Power2())) return true; // 1 << X is clearly a power of two if the one is not shifted off the end. If // it is shifted off the end then the result is undefined. if (match(V, m_Shl(m_One(), m_Value()))) return true; // (signmask) >>l X is clearly a power of two if the one is not shifted off // the bottom. If it is shifted off the bottom then the result is undefined. if (match(V, m_LShr(m_SignMask(), m_Value()))) return true; // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth++ == MaxDepth) return false; Value *X = nullptr, *Y = nullptr; // A shift left or a logical shift right of a power of two is a power of two // or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_LShr(m_Value(X), m_Value())))) return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q); if (const ZExtInst *ZI = dyn_cast(V)) return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); if (const SelectInst *SI = dyn_cast(V)) return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) || isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) return true; return false; } // Adding a power-of-two or zero to the same power-of-two or zero yields // either the original power-of-two, a larger power-of-two or zero. if (match(V, m_Add(m_Value(X), m_Value(Y)))) { const OverflowingBinaryOperator *VOBO = cast(V); if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) return true; if (match(Y, m_And(m_Specific(X), m_Value())) || match(Y, m_And(m_Value(), m_Specific(X)))) if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); KnownBits LHSBits(BitWidth); computeKnownBits(X, LHSBits, Depth, Q); KnownBits RHSBits(BitWidth); computeKnownBits(Y, RHSBits, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) // If OrZero isn't set, we cannot give back a zero result. // Make sure either the LHS or RHS has a bit set. if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) return true; } } // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, Depth, Q); } return false; } /// Test whether a GEP's result is known to be non-null. /// /// Uses properties inherent in a GEP to try to determine whether it is known /// to be non-null. /// /// Currently this routine does not support vector GEPs. static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, const Query &Q) { const Function *F = nullptr; if (const Instruction *I = dyn_cast(GEP)) F = I->getFunction(); if (!GEP->isInBounds() || NullPointerIsDefined(F, GEP->getPointerAddressSpace())) return false; // FIXME: Support vector-GEPs. assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); // If the base pointer is non-null, we cannot walk to a null address with an // inbounds GEP in address space zero. if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) return true; // Walk the GEP operands and see if any operand introduces a non-zero offset. // If so, then the GEP cannot produce a null pointer, as doing so would // inherently violate the inbounds contract within address space zero. for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); GTI != GTE; ++GTI) { // Struct types are easy -- they must always be indexed by a constant. if (StructType *STy = GTI.getStructTypeOrNull()) { ConstantInt *OpC = cast(GTI.getOperand()); unsigned ElementIdx = OpC->getZExtValue(); const StructLayout *SL = Q.DL.getStructLayout(STy); uint64_t ElementOffset = SL->getElementOffset(ElementIdx); if (ElementOffset > 0) return true; continue; } // If we have a zero-sized type, the index doesn't matter. Keep looping. if (Q.DL.getTypeAllocSize(GTI.getIndexedType()) == 0) continue; // Fast path the constant operand case both for efficiency and so we don't // increment Depth when just zipping down an all-constant GEP. if (ConstantInt *OpC = dyn_cast(GTI.getOperand())) { if (!OpC->isZero()) return true; continue; } // We post-increment Depth here because while isKnownNonZero increments it // as well, when we pop back up that increment won't persist. We don't want // to recurse 10k times just because we have 10k GEP operands. We don't // bail completely out because we want to handle constant GEPs regardless // of depth. if (Depth++ >= MaxDepth) continue; if (isKnownNonZero(GTI.getOperand(), Depth, Q)) return true; } return false; } static bool isKnownNonNullFromDominatingCondition(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "V must be pointer type"); assert(!isa(V) && "Did not expect ConstantPointerNull"); if (!CtxI || !DT) return false; unsigned NumUsesExplored = 0; for (auto *U : V->users()) { // Avoid massive lists if (NumUsesExplored >= DomConditionsMaxUses) break; NumUsesExplored++; // If the value is used as an argument to a call or invoke, then argument // attributes may provide an answer about null-ness. if (auto CS = ImmutableCallSite(U)) if (auto *CalledFunc = CS.getCalledFunction()) for (const Argument &Arg : CalledFunc->args()) if (CS.getArgOperand(Arg.getArgNo()) == V && Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI)) return true; // Consider only compare instructions uniquely controlling a branch CmpInst::Predicate Pred; if (!match(const_cast(U), m_c_ICmp(Pred, m_Specific(V), m_Zero())) || (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)) continue; for (auto *CmpU : U->users()) { if (const BranchInst *BI = dyn_cast(CmpU)) { assert(BI->isConditional() && "uses a comparison!"); BasicBlock *NonNullSuccessor = BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0); BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) return true; } else if (Pred == ICmpInst::ICMP_NE && match(CmpU, m_Intrinsic()) && DT->dominates(cast(CmpU), CtxI)) { return true; } } } return false; } /// Does the 'Range' metadata (which must be a valid MD_range operand list) /// ensure that the value it's attached to is never Value? 'RangeType' is /// is the type of the value described by the range. static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { const unsigned NumRanges = Ranges->getNumOperands() / 2; assert(NumRanges >= 1); for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = mdconst::extract(Ranges->getOperand(2 * i + 0)); ConstantInt *Upper = mdconst::extract(Ranges->getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); if (Range.contains(Value)) return false; } return true; } /// Return true if the given value is known to be non-zero when defined. For /// vectors, return true if every element is known to be non-zero when /// defined. For pointers, if the context instruction and dominator tree are /// specified, perform context-sensitive analysis and return true if the /// pointer couldn't possibly be null at the specified instruction. /// Supports values with integer or pointer type and vectors of integers. bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (auto *C = dyn_cast(V)) { if (C->isNullValue()) return false; if (isa(C)) // Must be non-zero due to null test above. return true; // For constant vectors, check that all elements are undefined or known // non-zero to determine that the whole vector is known non-zero. if (auto *VecTy = dyn_cast(C->getType())) { for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { Constant *Elt = C->getAggregateElement(i); if (!Elt || Elt->isNullValue()) return false; if (!isa(Elt) && !isa(Elt)) return false; } return true; } // A global variable in address space 0 is non null unless extern weak // or an absolute symbol reference. Other address spaces may have null as a // valid address for a global, so we can't assume anything. if (const GlobalValue *GV = dyn_cast(V)) { if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && GV->getType()->getAddressSpace() == 0) return true; } else return false; } if (auto *I = dyn_cast(V)) { if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) { // If the possible ranges don't contain zero, then the value is // definitely non-zero. if (auto *Ty = dyn_cast(V->getType())) { const APInt ZeroValue(Ty->getBitWidth(), 0); if (rangeMetadataExcludesValue(Ranges, ZeroValue)) return true; } } } // Some of the tests below are recursive, so bail out if we hit the limit. if (Depth++ >= MaxDepth) return false; // Check for pointer simplifications. if (V->getType()->isPointerTy()) { // Alloca never returns null, malloc might. if (isa(V) && Q.DL.getAllocaAddrSpace() == 0) return true; // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast(V)) if (A->hasByValOrInAllocaAttr() || A->hasNonNullAttr()) return true; // A Load tagged with nonnull metadata is never null. if (const LoadInst *LI = dyn_cast(V)) if (LI->getMetadata(LLVMContext::MD_nonnull)) return true; if (auto CS = ImmutableCallSite(V)) { if (CS.isReturnNonNull()) return true; if (const auto *RP = getArgumentAliasingToReturnedPointer(CS)) return isKnownNonZero(RP, Depth, Q); } } // Check for recursive pointer simplifications. if (V->getType()->isPointerTy()) { if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) return true; if (const GEPOperator *GEP = dyn_cast(V)) if (isGEPKnownNonNull(GEP, Depth, Q)) return true; } unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL); // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) return isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q); // ext X != 0 if X != 0. if (isa(V) || isa(V)) return isKnownNonZero(cast(V)->getOperand(0), Depth, Q); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. if (match(V, m_Shl(m_Value(X), m_Value(Y)))) { // shl nuw can't remove any non-zero bits. const OverflowingBinaryOperator *BO = cast(V); if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, Depth, Q); KnownBits Known(BitWidth); computeKnownBits(X, Known, Depth, Q); if (Known.One[0]) return true; } // shr X, Y != 0 if X is negative. Note that the value of the shift is not // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { // shr exact can only shift out zero bits. const PossiblyExactOperator *BO = cast(V); if (BO->isExact()) return isKnownNonZero(X, Depth, Q); KnownBits Known = computeKnownBits(X, Depth, Q); if (Known.isNegative()) return true; // If the shifter operand is a constant, and all of the bits shifted // out are known to be zero, and X is known non-zero then at least one // non-zero bit must remain. if (ConstantInt *Shift = dyn_cast(Y)) { auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); // Is there a known one in the portion not shifted out? if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal) return true; // Are all the bits to be shifted out known zero? if (Known.countMinTrailingZeros() >= ShiftVal) return isKnownNonZero(X, Depth, Q); } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { return isKnownNonZero(X, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { KnownBits XKnown = computeKnownBits(X, Depth, Q); KnownBits YKnown = computeKnownBits(Y, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnown.isNonNegative() && YKnown.isNonNegative()) if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not // zero unless both X and Y equal INT_MIN. if (XKnown.isNegative() && YKnown.isNegative()) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. if (XKnown.One.intersects(Mask)) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. if (YKnown.One.intersects(Mask)) return true; } // The sum of a non-negative number and a power of two is not zero. if (XKnown.isNonNegative() && isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) return true; if (YKnown.isNonNegative() && isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) return true; } // X * Y. else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { const OverflowingBinaryOperator *BO = cast(V); // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (const SelectInst *SI = dyn_cast(V)) { if (isKnownNonZero(SI->getTrueValue(), Depth, Q) && isKnownNonZero(SI->getFalseValue(), Depth, Q)) return true; } // PHI else if (const PHINode *PN = dyn_cast(V)) { // Try and detect a recurrence that monotonically increases from a // starting value, as these are common as induction variables. if (PN->getNumIncomingValues() == 2) { Value *Start = PN->getIncomingValue(0); Value *Induction = PN->getIncomingValue(1); if (isa(Induction) && !isa(Start)) std::swap(Start, Induction); if (ConstantInt *C = dyn_cast(Start)) { if (!C->isZero() && !C->isNegative()) { ConstantInt *X; if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && !X->isNegative()) return true; } } } // Check if all incoming values are non-zero constant. bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) { return isa(V) && !cast(V)->isZero(); }); if (AllNonZeroConstants) return true; } KnownBits Known(BitWidth); computeKnownBits(V, Known, Depth, Q); return Known.One != 0; } /// Return true if V2 == V1 + X, where X is known non-zero. static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { const BinaryOperator *BO = dyn_cast(V1); if (!BO || BO->getOpcode() != Instruction::Add) return false; Value *Op = nullptr; if (V2 == BO->getOperand(0)) Op = BO->getOperand(1); else if (V2 == BO->getOperand(1)) Op = BO->getOperand(0); else return false; return isKnownNonZero(Op, 0, Q); } /// Return true if it is known that V1 != V2. static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { if (V1 == V2) return false; if (V1->getType() != V2->getType()) // We can't look through casts yet. return false; if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q)) return true; if (V1->getType()->isIntOrIntVectorTy()) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. KnownBits Known1 = computeKnownBits(V1, 0, Q); KnownBits Known2 = computeKnownBits(V2, 0, Q); if (Known1.Zero.intersects(Known2.One) || Known2.Zero.intersects(Known1.One)) return true; } return false; } /// Return true if 'V & Mask' is known to be zero. We use this predicate to /// simplify operations downstream. Mask is known to be zero for bits that V /// cannot have. /// /// This function is defined on values with integer type, values with pointer /// type, and vectors of integers. In the case /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q) { KnownBits Known(Mask.getBitWidth()); computeKnownBits(V, Known, Depth, Q); return Mask.isSubsetOf(Known.Zero); } /// For vector constants, loop over the elements and find the constant with the /// minimum number of sign bits. Return 0 if the value is not a vector constant /// or if any element was not analyzed; otherwise, return the count for the /// element with the minimum number of sign bits. static unsigned computeNumSignBitsVectorConstant(const Value *V, unsigned TyBits) { const auto *CV = dyn_cast(V); if (!CV || !CV->getType()->isVectorTy()) return 0; unsigned MinSignBits = TyBits; unsigned NumElts = CV->getType()->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { // If we find a non-ConstantInt, bail out. auto *Elt = dyn_cast_or_null(CV->getAggregateElement(i)); if (!Elt) return 0; MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); } return MinSignBits; } static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, const Query &Q); static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) { unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q); assert(Result > 0 && "At least one sign bit needs to be present!"); return Result; } /// Return the number of times the sign bit of the register is replicated into /// the other bits. We know that at least 1 bit is always equal to the sign bit /// (itself), but other cases can give us information. For example, immediately /// after an "ashr X, 2", we know that the top 3 bits are all equal to each /// other, so we return 3. For vectors, return the number of sign bits for the /// vector element with the minimum number of known sign bits. static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, const Query &Q) { assert(Depth <= MaxDepth && "Limit Search Depth"); // We return the minimum number of sign bits that are guaranteed to be present // in V, so for undef we have to conservatively return 1. We don't have the // same behavior for poison though -- that's a FIXME today. Type *ScalarTy = V->getType()->getScalarType(); unsigned TyBits = ScalarTy->isPointerTy() ? Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; // Note that ConstantInt is handled by the general computeKnownBits case // below. if (Depth == MaxDepth) return 1; // Limit search depth. const Operator *U = dyn_cast(V); switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; case Instruction::SDiv: { const APInt *Denominator; // sdiv X, C -> adds log(C) sign bits. if (match(U->getOperand(1), m_APInt(Denominator))) { // Ignore non-positive denominator. if (!Denominator->isStrictlyPositive()) break; // Calculate the incoming numerator bits. unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); // Add floor(log(C)) bits to the numerator bits. return std::min(TyBits, NumBits + Denominator->logBase2()); } break; } case Instruction::SRem: { const APInt *Denominator; // srem X, C -> we know that the result is within [-C+1,C) when C is a // positive constant. This let us put a lower bound on the number of sign // bits. if (match(U->getOperand(1), m_APInt(Denominator))) { // Ignore non-positive denominator. if (!Denominator->isStrictlyPositive()) break; // Calculate the incoming numerator bits. SRem by a positive constant // can't lower the number of sign bits. unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); // Calculate the leading sign bit constraints by examining the // denominator. Given that the denominator is positive, there are two // cases: // // 1. the numerator is positive. The result range is [0,C) and [0,C) u< // (1 << ceilLogBase2(C)). // // 2. the numerator is negative. Then the result range is (-C,0] and // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). // // Thus a lower bound on the number of sign bits is `TyBits - // ceilLogBase2(C)`. unsigned ResBits = TyBits - Denominator->ceilLogBase2(); return std::max(NumrBits, ResBits); } break; } case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { if (ShAmt->uge(TyBits)) break; // Bad shift. unsigned ShAmtLimited = ShAmt->getZExtValue(); Tmp += ShAmtLimited; if (Tmp > TyBits) Tmp = TyBits; } return Tmp; } case Instruction::Shl: { const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (ShAmt->uge(TyBits) || // Bad shift. ShAmt->uge(Tmp)) break; // Shifted all sign bits out. Tmp2 = ShAmt->getZExtValue(); return Tmp - Tmp2; } break; } case Instruction::And: case Instruction::Or: case Instruction::Xor: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (Tmp != 1) { Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses // computeKnownBits, and pick whichever answer is better. } break; case Instruction::Select: Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (Tmp == 1) break; Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); return std::min(Tmp, Tmp2); case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (Tmp == 1) break; // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { KnownBits Known(TyBits); computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry // out of the result. if (Known.isNonNegative()) return Tmp; } Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (Tmp2 == 1) break; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (Tmp2 == 1) break; // Handle NEG. if (const auto *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { KnownBits Known(TyBits); computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. if (Known.isNonNegative()) return Tmp2; // Otherwise, we treat this like a SUB. } // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (Tmp == 1) break; return std::min(Tmp, Tmp2)-1; case Instruction::Mul: { // The output of the Mul can be at most twice the valid bits in the inputs. unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); if (SignBitsOp0 == 1) break; unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); if (SignBitsOp1 == 1) break; unsigned OutValidBits = (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; } case Instruction::PHI: { const PHINode *PN = cast(U); unsigned NumIncomingValues = PN->getNumIncomingValues(); // Don't analyze large in-degree PHIs. if (NumIncomingValues > 4) break; // Unreachable blocks may have zero-operand PHI nodes. if (NumIncomingValues == 0) break; // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { if (Tmp == 1) return Tmp; Tmp = std::min( Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); } return Tmp; } case Instruction::Trunc: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. break; case Instruction::ExtractElement: // Look through extract element. At the moment we keep this simple and skip // tracking the specific element. But at least we might find information // valid for all elements of the vector (for example if vector is sign // extended, shifted, etc). return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); } // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. // If we can examine all elements of a vector constant successfully, we're // done (we can't do any better than that). If not, keep trying. if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits)) return VecSignBits; KnownBits Known(TyBits); computeKnownBits(V, Known, Depth, Q); // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. return std::max(FirstAnswer, Known.countMinSignBits()); } /// This function computes the integer multiple of Base that equals V. /// If successful, it returns true and returns the multiple in /// Multiple. If unsuccessful, it returns false. It looks /// through SExt instructions only if LookThroughSExt is true. bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt, unsigned Depth) { const unsigned MaxDepth = 6; assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); Type *T = V->getType(); ConstantInt *CI = dyn_cast(V); if (Base == 0) return false; if (Base == 1) { Multiple = V; return true; } ConstantExpr *CO = dyn_cast(V); Constant *BaseVal = ConstantInt::get(T, Base); if (CO && CO == BaseVal) { // Multiple is 1. Multiple = ConstantInt::get(T, 1); return true; } if (CI && CI->getZExtValue() % Base == 0) { Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); return true; } if (Depth == MaxDepth) return false; // Limit search depth. Operator *I = dyn_cast(V); if (!I) return false; switch (I->getOpcode()) { default: break; case Instruction::SExt: if (!LookThroughSExt) return false; // otherwise fall through to ZExt LLVM_FALLTHROUGH; case Instruction::ZExt: return ComputeMultiple(I->getOperand(0), Base, Multiple, LookThroughSExt, Depth+1); case Instruction::Shl: case Instruction::Mul: { Value *Op0 = I->getOperand(0); Value *Op1 = I->getOperand(1); if (I->getOpcode() == Instruction::Shl) { ConstantInt *Op1CI = dyn_cast(Op1); if (!Op1CI) return false; // Turn Op0 << Op1 into Op0 * 2^Op1 APInt Op1Int = Op1CI->getValue(); uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); APInt API(Op1Int.getBitWidth(), 0); API.setBit(BitToSet); Op1 = ConstantInt::get(V->getContext(), API); } Value *Mul0 = nullptr; if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast(Op1)) if (Constant *MulC = dyn_cast(Mul0)) { if (Op1C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); if (Op1C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) Multiple = ConstantExpr::getMul(MulC, Op1C); return true; } if (ConstantInt *Mul0CI = dyn_cast(Mul0)) if (Mul0CI->getValue() == 1) { // V == Base * Op1, so return Op1 Multiple = Op1; return true; } } Value *Mul1 = nullptr; if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast(Op0)) if (Constant *MulC = dyn_cast(Mul1)) { if (Op0C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); if (Op0C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) Multiple = ConstantExpr::getMul(MulC, Op0C); return true; } if (ConstantInt *Mul1CI = dyn_cast(Mul1)) if (Mul1CI->getValue() == 1) { // V == Base * Op0, so return Op0 Multiple = Op0; return true; } } } } // We could not determine if V is a multiple of Base. return false; } Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI) { const Function *F = ICS.getCalledFunction(); if (!F) return Intrinsic::not_intrinsic; if (F->isIntrinsic()) return F->getIntrinsicID(); if (!TLI) return Intrinsic::not_intrinsic; LibFunc Func; // We're going to make assumptions on the semantics of the functions, check // that the target knows that it's available in this environment and it does // not have local linkage. if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func)) return Intrinsic::not_intrinsic; if (!ICS.onlyReadsMemory()) return Intrinsic::not_intrinsic; // Otherwise check if we have a call to a function that can be turned into a // vector intrinsic. switch (Func) { default: break; case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: return Intrinsic::sin; case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: return Intrinsic::cos; case LibFunc_exp: case LibFunc_expf: case LibFunc_expl: return Intrinsic::exp; case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: return Intrinsic::exp2; case LibFunc_log: case LibFunc_logf: case LibFunc_logl: return Intrinsic::log; case LibFunc_log10: case LibFunc_log10f: case LibFunc_log10l: return Intrinsic::log10; case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: return Intrinsic::log2; case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: return Intrinsic::fabs; case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: return Intrinsic::minnum; case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: return Intrinsic::maxnum; case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: return Intrinsic::copysign; case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: return Intrinsic::floor; case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: return Intrinsic::ceil; case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: return Intrinsic::trunc; case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: return Intrinsic::rint; case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: return Intrinsic::nearbyint; case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: return Intrinsic::round; case LibFunc_pow: case LibFunc_powf: case LibFunc_powl: return Intrinsic::pow; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: return Intrinsic::sqrt; } return Intrinsic::not_intrinsic; } /// Return true if we can prove that the specified FP value is never equal to /// -0.0. /// /// NOTE: this function will need to be revisited when we support non-default /// rounding modes! bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth) { if (auto *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegZero(); // Limit search depth. if (Depth == MaxDepth) return false; auto *Op = dyn_cast(V); if (!Op) return false; // Check if the nsz fast-math flag is set. if (auto *FPO = dyn_cast(Op)) if (FPO->hasNoSignedZeros()) return true; // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. if (match(Op, m_FAdd(m_Value(), m_PosZeroFP()))) return true; // sitofp and uitofp turn into +0.0 for zero. if (isa(Op) || isa(Op)) return true; if (auto *Call = dyn_cast(Op)) { Intrinsic::ID IID = getIntrinsicForCallSite(Call, TLI); switch (IID) { default: break; // sqrt(-0.0) = -0.0, no other negative results are possible. case Intrinsic::sqrt: return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1); // fabs(x) != -0.0 case Intrinsic::fabs: return true; } } return false; } /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign /// bit despite comparing equal. static bool cannotBeOrderedLessThanZeroImpl(const Value *V, const TargetLibraryInfo *TLI, bool SignBitOnly, unsigned Depth) { // TODO: This function does not do the right thing when SignBitOnly is true // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform // which flips the sign bits of NaNs. See // https://llvm.org/bugs/show_bug.cgi?id=31702. if (const ConstantFP *CFP = dyn_cast(V)) { return !CFP->getValueAPF().isNegative() || (!SignBitOnly && CFP->getValueAPF().isZero()); } // Handle vector of constants. if (auto *CV = dyn_cast(V)) { if (CV->getType()->isVectorTy()) { unsigned NumElts = CV->getType()->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { auto *CFP = dyn_cast_or_null(CV->getAggregateElement(i)); if (!CFP) return false; if (CFP->getValueAPF().isNegative() && (SignBitOnly || !CFP->getValueAPF().isZero())) return false; } // All non-negative ConstantFPs. return true; } } if (Depth == MaxDepth) return false; // Limit search depth. const Operator *I = dyn_cast(V); if (!I) return false; switch (I->getOpcode()) { default: break; // Unsigned integers are always nonnegative. case Instruction::UIToFP: return true; case Instruction::FMul: // x*x is always non-negative or a NaN. if (I->getOperand(0) == I->getOperand(1) && (!SignBitOnly || cast(I)->hasNoNaNs())) return true; LLVM_FALLTHROUGH; case Instruction::FAdd: case Instruction::FDiv: case Instruction::FRem: return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1) && cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, Depth + 1); case Instruction::Select: return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, Depth + 1) && cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, Depth + 1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1); case Instruction::ExtractElement: // Look through extract element. At the moment we keep this simple and skip // tracking the specific element. But at least we might find information // valid for all elements of the vector. return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1); case Instruction::Call: const auto *CI = cast(I); Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI); switch (IID) { default: break; case Intrinsic::maxnum: - return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, - Depth + 1) || - cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, - Depth + 1); + return (isKnownNeverNaN(I->getOperand(0)) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, + SignBitOnly, Depth + 1)) || + (isKnownNeverNaN(I->getOperand(1)) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, + SignBitOnly, Depth + 1)); + case Intrinsic::minnum: return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1) && cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, Depth + 1); case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: return true; case Intrinsic::sqrt: // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. if (!SignBitOnly) return true; return CI->hasNoNaNs() && (CI->hasNoSignedZeros() || CannotBeNegativeZero(CI->getOperand(0), TLI)); case Intrinsic::powi: if (ConstantInt *Exponent = dyn_cast(I->getOperand(1))) { // powi(x,n) is non-negative if n is even. if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) return true; } // TODO: This is not correct. Given that exp is an integer, here are the // ways that pow can return a negative value: // // pow(x, exp) --> negative if exp is odd and x is negative. // pow(-0, exp) --> -inf if exp is negative odd. // pow(-0, exp) --> -0 if exp is positive odd. // pow(-inf, exp) --> -0 if exp is negative odd. // pow(-inf, exp) --> -inf if exp is positive odd. // // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, // but we must return false if x == -0. Unfortunately we do not currently // have a way of expressing this constraint. See details in // https://llvm.org/bugs/show_bug.cgi?id=31702. return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1); case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. return I->getOperand(0) == I->getOperand(1) && (!SignBitOnly || cast(I)->hasNoNaNs()) && cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, Depth + 1); } break; } return false; } bool llvm::CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI) { return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0); } bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) { return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); } bool llvm::isKnownNeverNaN(const Value *V) { assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type"); // If we're told that NaNs won't happen, assume they won't. if (auto *FPMathOp = dyn_cast(V)) if (FPMathOp->hasNoNaNs()) return true; // TODO: Handle instructions and potentially recurse like other 'isKnown' // functions. For example, the result of sitofp is never NaN. // Handle scalar constants. if (auto *CFP = dyn_cast(V)) return !CFP->isNaN(); // Bail out for constant expressions, but try to handle vector constants. if (!V->getType()->isVectorTy() || !isa(V)) return false; // For vectors, verify that each element is not NaN. unsigned NumElts = V->getType()->getVectorNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = cast(V)->getAggregateElement(i); if (!Elt) return false; if (isa(Elt)) continue; auto *CElt = dyn_cast(Elt); if (!CElt || CElt->isNaN()) return false; } // All elements were confirmed not-NaN or undefined. return true; } /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. Value *llvm::isBytewiseValue(Value *V) { // All byte-wide stores are splatable, even of arbitrary variables. if (V->getType()->isIntegerTy(8)) return V; // Handle 'null' ConstantArrayZero etc. if (Constant *C = dyn_cast(V)) if (C->isNullValue()) return Constant::getNullValue(Type::getInt8Ty(V->getContext())); // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast(V)) { if (CFP->getType()->isFloatTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); if (CFP->getType()->isDoubleTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); // Don't handle long double formats, which have strange constraints. } // We can handle constant integers that are multiple of 8 bits. if (ConstantInt *CI = dyn_cast(V)) { if (CI->getBitWidth() % 8 == 0) { assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); if (!CI->getValue().isSplat(8)) return nullptr; return ConstantInt::get(V->getContext(), CI->getValue().trunc(8)); } } // A ConstantDataArray/Vector is splatable if all its members are equal and // also splatable. if (ConstantDataSequential *CA = dyn_cast(V)) { Value *Elt = CA->getElementAsConstant(0); Value *Val = isBytewiseValue(Elt); if (!Val) return nullptr; for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) return nullptr; return Val; } // Conceptually, we could handle things like: // %a = zext i8 %X to i16 // %b = shl i16 %a, 8 // %c = or i16 %a, %b // but until there is an example that actually needs this, it doesn't seem // worth worrying about. return nullptr; } // This is the recursive version of BuildSubAggregate. It takes a few different // arguments. Idxs is the index within the nested struct From that we are // looking at now (which is of type IndexedType). IdxSkip is the number of // indices from Idxs that should be left out when inserting into the resulting // struct. To is the result struct built so far, new insertvalue instructions // build on that. static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVectorImpl &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { StructType *STy = dyn_cast(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; // General case, the type indexed by Idxs is a struct for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { // Process each struct element recursively Idxs.push_back(i); Value *PrevTo = To; To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, InsertBefore); Idxs.pop_back(); if (!To) { // Couldn't find any inserted value for this index? Cleanup while (PrevTo != OrigTo) { InsertValueInst* Del = cast(PrevTo); PrevTo = Del->getAggregateOperand(); Del->eraseFromParent(); } // Stop processing elements break; } } // If we successfully found a value for each of our subaggregates if (To) return To; } // Base case, the type indexed by SourceIdxs is not a struct, or not all of // the struct's elements had a value that was inserted directly. In the latter // case, perhaps we can't determine each of the subelements individually, but // we might be able to find the complete struct somewhere. // Find the value that is at that particular spot Value *V = FindInsertedValue(From, Idxs); if (!V) return nullptr; // Insert the value in the new (sub) aggregate return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), "tmp", InsertBefore); } // This helper takes a nested struct and extracts a part of it (which is again a // struct) into a new value. For example, given the struct: // { a, { b, { c, d }, e } } // and the indices "1, 1" this returns // { c, d }. // // It does this by inserting an insertvalue for each element in the resulting // struct, as opposed to just inserting a single struct. This will only work if // each of the elements of the substruct are known (ie, inserted into From by an // insertvalue instruction somewhere). // // All inserted insertvalue instructions are inserted before InsertBefore static Value *BuildSubAggregate(Value *From, ArrayRef idx_range, Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), idx_range); Value *To = UndefValue::get(IndexedType); SmallVector Idxs(idx_range.begin(), idx_range.end()); unsigned IdxSkip = Idxs.size(); return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); } /// Given an aggregate and a sequence of indices, see if the scalar value /// indexed is already around as a register, for example if it was inserted /// directly into the aggregate. /// /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our // recursion). if (idx_range.empty()) return V; // We have indices, so V should have an indexable type. assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && "Not looking at a struct or array?"); assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && "Invalid indices for type?"); if (Constant *C = dyn_cast(V)) { C = C->getAggregateElement(idx_range[0]); if (!C) return nullptr; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices const unsigned *req_idx = idx_range.begin(); for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); i != e; ++i, ++req_idx) { if (req_idx == idx_range.end()) { // We can't handle this without inserting insertvalues if (!InsertBefore) return nullptr; // The requested index identifies a part of a nested aggregate. Handle // this specially. For example, // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 // %C = extractvalue {i32, { i32, i32 } } %B, 1 // This can be changed into // %A = insertvalue {i32, i32 } undef, i32 10, 0 // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), InsertBefore); } // This insert value inserts something else than what we are looking for. // See if the (aggregate) value inserted into has the value we are // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_range, InsertBefore); } // If we end up here, the indices of the insertvalue match with those // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. return FindInsertedValue(I->getInsertedValueOperand(), makeArrayRef(req_idx, idx_range.end()), InsertBefore); } if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. // Calculate the number of indices required unsigned size = I->getNumIndices() + idx_range.size(); // Allocate some space to put the new indices in SmallVector Idxs; Idxs.reserve(size); // Add indices from the extract value instruction Idxs.append(I->idx_begin(), I->idx_end()); // Add requested indices Idxs.append(idx_range.begin(), idx_range.end()); assert(Idxs.size() == size && "Number of indices added not correct?"); return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) return nullptr; } /// Analyze the specified pointer to see if it can be expressed as a base /// pointer plus a constant offset. Return the base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL) { unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType()); APInt ByteOffset(BitWidth, 0); // We walk up the defs but use a visited set to handle unreachable code. In // that case, we stop after accumulating the cycle once (not that it // matters). SmallPtrSet Visited; while (Visited.insert(Ptr).second) { if (Ptr->getType()->isVectorTy()) break; if (GEPOperator *GEP = dyn_cast(Ptr)) { // If one of the values we have visited is an addrspacecast, then // the pointer type of this GEP may be different from the type // of the Ptr parameter which was passed to this function. This // means when we construct GEPOffset, we need to use the size // of GEP's pointer type rather than the size of the original // pointer type. APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); if (!GEP->accumulateConstantOffset(DL, GEPOffset)) break; ByteOffset += GEPOffset.getSExtValue(); Ptr = GEP->getPointerOperand(); } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) { Ptr = cast(Ptr)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(Ptr)) { if (GA->isInterposable()) break; Ptr = GA->getAliasee(); } else { break; } } Offset = ByteOffset.getSExtValue(); return Ptr; } bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, unsigned CharSize) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; // Make sure the index-ee is a pointer to array of \p CharSize integers. // CharSize. ArrayType *AT = dyn_cast(GEP->getSourceElementType()); if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) return false; // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); if (!FirstIdx || !FirstIdx->isZero()) return false; return true; } bool llvm::getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice, unsigned ElementSize, uint64_t Offset) { assert(V); // Look through bitcast instructions and geps. V = V->stripPointerCasts(); // If the value is a GEP instruction or constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast(V)) { // The GEP operator should be based on a pointer to string constant, and is // indexing into the string constant. if (!isGEPBasedOnPointerToString(GEP, ElementSize)) return false; // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. uint64_t StartIdx = 0; if (const ConstantInt *CI = dyn_cast(GEP->getOperand(2))) StartIdx = CI->getZExtValue(); else return false; return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize, StartIdx + Offset); } // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. const GlobalVariable *GV = dyn_cast(V); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; const ConstantDataArray *Array; ArrayType *ArrayTy; if (GV->getInitializer()->isNullValue()) { Type *GVTy = GV->getValueType(); if ( (ArrayTy = dyn_cast(GVTy)) ) { // A zeroinitializer for the array; there is no ConstantDataArray. Array = nullptr; } else { const DataLayout &DL = GV->getParent()->getDataLayout(); uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy); uint64_t Length = SizeInBytes / (ElementSize / 8); if (Length <= Offset) return false; Slice.Array = nullptr; Slice.Offset = 0; Slice.Length = Length - Offset; return true; } } else { // This must be a ConstantDataArray. Array = dyn_cast(GV->getInitializer()); if (!Array) return false; ArrayTy = Array->getType(); } if (!ArrayTy->getElementType()->isIntegerTy(ElementSize)) return false; uint64_t NumElts = ArrayTy->getArrayNumElements(); if (Offset > NumElts) return false; Slice.Array = Array; Slice.Offset = Offset; Slice.Length = NumElts - Offset; return true; } /// This function computes the length of a null-terminated C string pointed to /// by V. If successful, it returns true and returns the string in Str. /// If unsuccessful, it returns false. bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, uint64_t Offset, bool TrimAtNul) { ConstantDataArraySlice Slice; if (!getConstantDataArrayInfo(V, Slice, 8, Offset)) return false; if (Slice.Array == nullptr) { if (TrimAtNul) { Str = StringRef(); return true; } if (Slice.Length == 1) { Str = StringRef("", 1); return true; } // We cannot instantiate a StringRef as we do not have an appropriate string // of 0s at hand. return false; } // Start out with the entire array in the StringRef. Str = Slice.Array->getAsString(); // Skip over 'offset' bytes. Str = Str.substr(Slice.Offset); if (TrimAtNul) { // Trim off the \0 and anything after it. If the array is not nul // terminated, we just return the whole end of string. The client may know // some other way that the string is length-bound. Str = Str.substr(0, Str.find('\0')); } return true; } // These next two are very similar to the above, but also look through PHI // nodes. // TODO: See if we can integrate these two together. /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLengthH(const Value *V, SmallPtrSetImpl &PHIs, unsigned CharSize) { // Look through noop bitcast instructions. V = V->stripPointerCasts(); // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. if (const PHINode *PN = dyn_cast(V)) { if (!PHIs.insert(PN).second) return ~0ULL; // already in the set. // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; for (Value *IncValue : PN->incoming_values()) { uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); if (Len == 0) return 0; // Unknown length -> unknown. if (Len == ~0ULL) continue; if (Len != LenSoFar && LenSoFar != ~0ULL) return 0; // Disagree -> unknown. LenSoFar = Len; } // Success, all agree. return LenSoFar; } // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) if (const SelectInst *SI = dyn_cast(V)) { uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); if (Len1 == 0) return 0; uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); if (Len2 == 0) return 0; if (Len1 == ~0ULL) return Len2; if (Len2 == ~0ULL) return Len1; if (Len1 != Len2) return 0; return Len1; } // Otherwise, see if we can read the string. ConstantDataArraySlice Slice; if (!getConstantDataArrayInfo(V, Slice, CharSize)) return 0; if (Slice.Array == nullptr) return 1; // Search for nul characters unsigned NullIndex = 0; for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) break; } return NullIndex + 1; } /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { if (!V->getType()->isPointerTy()) return 0; SmallPtrSet PHIs; uint64_t Len = GetStringLengthH(V, PHIs, CharSize); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return // an empty string as a length. return Len == ~0ULL ? 1 : Len; } const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) { assert(CS && "getArgumentAliasingToReturnedPointer only works on nonnull CallSite"); if (const Value *RV = CS.getReturnedArgOperand()) return RV; // This can be used only as a aliasing property. if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) return CS.getArgOperand(0); return nullptr; } bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( ImmutableCallSite CS) { return CS.getIntrinsicID() == Intrinsic::launder_invariant_group || CS.getIntrinsicID() == Intrinsic::strip_invariant_group; } /// \p PN defines a loop-variant pointer to an object. Check if the /// previous iteration of the loop was referring to the same object as \p PN. static bool isSameUnderlyingObjectInLoop(const PHINode *PN, const LoopInfo *LI) { // Find the loop-defined value. Loop *L = LI->getLoopFor(PN->getParent()); if (PN->getNumIncomingValues() != 2) return true; // Find the value from previous iteration. auto *PrevValue = dyn_cast(PN->getIncomingValue(0)); if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) PrevValue = dyn_cast(PN->getIncomingValue(1)); if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) return true; // If a new pointer is loaded in the loop, the pointer references a different // object in every iteration. E.g.: // for (i) // int *p = a[i]; // ... if (auto *Load = dyn_cast(PrevValue)) if (!L->isLoopInvariant(Load->getPointerOperand())) return false; return true; } Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { if (GEPOperator *GEP = dyn_cast(V)) { V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast || Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->isInterposable()) return V; V = GA->getAliasee(); } else if (isa(V)) { // An alloca can't be further simplified. return V; } else { if (auto CS = CallSite(V)) { // CaptureTracking can know about special capturing properties of some // intrinsics like launder.invariant.group, that can't be expressed with // the attributes, but have properties like returning aliasing pointer. // Because some analysis may assume that nocaptured pointer is not // returned from some special intrinsic (because function would have to // be marked with returns attribute), it is crucial to use this function // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) { V = RP; continue; } } // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) // TODO: Acquire a DominatorTree and AssumptionCache and use them. if (Value *Simplified = SimplifyInstruction(I, {DL, I})) { V = Simplified; continue; } return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } return V; } void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, const DataLayout &DL, LoopInfo *LI, unsigned MaxLookup) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); do { Value *P = Worklist.pop_back_val(); P = GetUnderlyingObject(P, DL, MaxLookup); if (!Visited.insert(P).second) continue; if (SelectInst *SI = dyn_cast(P)) { Worklist.push_back(SI->getTrueValue()); Worklist.push_back(SI->getFalseValue()); continue; } if (PHINode *PN = dyn_cast(P)) { // If this PHI changes the underlying object in every iteration of the // loop, don't look through it. Consider: // int **A; // for (i) { // Prev = Curr; // Prev = PHI (Prev_0, Curr) // Curr = A[i]; // *Prev, *Curr; // // Prev is tracking Curr one iteration behind so they refer to different // underlying objects. if (!LI || !LI->isLoopHeader(PN->getParent()) || isSameUnderlyingObjectInLoop(PN, LI)) for (Value *IncValue : PN->incoming_values()) Worklist.push_back(IncValue); continue; } Objects.push_back(P); } while (!Worklist.empty()); } /// This is the function that does the work of looking through basic /// ptrtoint+arithmetic+inttoptr sequences. static const Value *getUnderlyingObjectFromInt(const Value *V) { do { if (const Operator *U = dyn_cast(V)) { // If we find a ptrtoint, we can transfer control back to the // regular getUnderlyingObjectFromInt. if (U->getOpcode() == Instruction::PtrToInt) return U->getOperand(0); // If we find an add of a constant, a multiplied value, or a phi, it's // likely that the other operand will lead us to the base // object. We don't have to worry about the case where the // object address is somehow being computed by the multiply, // because our callers only care when the result is an // identifiable object. if (U->getOpcode() != Instruction::Add || (!isa(U->getOperand(1)) && Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && !isa(U->getOperand(1)))) return V; V = U->getOperand(0); } else { return V; } assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); } while (true); } /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. /// It returns false if unidentified object is found in GetUnderlyingObjects. bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL) { SmallPtrSet Visited; SmallVector Working(1, V); do { V = Working.pop_back_val(); SmallVector Objs; GetUnderlyingObjects(const_cast(V), Objs, DL); for (Value *V : Objs) { if (!Visited.insert(V).second) continue; if (Operator::getOpcode(V) == Instruction::IntToPtr) { const Value *O = getUnderlyingObjectFromInt(cast(V)->getOperand(0)); if (O->getType()->isPointerTy()) { Working.push_back(O); continue; } } // If GetUnderlyingObjects fails to find an identifiable object, // getUnderlyingObjectsForCodeGen also fails for safety. if (!isIdentifiedObject(V)) { Objects.clear(); return false; } Objects.push_back(const_cast(V)); } } while (!Working.empty()); return true; } /// Return true if the only users of this pointer are lifetime markers. bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { for (const User *U : V->users()) { const IntrinsicInst *II = dyn_cast(U); if (!II) return false; if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; } return true; } bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) if (C->canTrap()) return false; switch (Inst->getOpcode()) { default: return true; case Instruction::UDiv: case Instruction::URem: { // x / y is undefined if y == 0. const APInt *V; if (match(Inst->getOperand(1), m_APInt(V))) return *V != 0; return false; } case Instruction::SDiv: case Instruction::SRem: { // x / y is undefined if y == 0 or x == INT_MIN and y == -1 const APInt *Numerator, *Denominator; if (!match(Inst->getOperand(1), m_APInt(Denominator))) return false; // We cannot hoist this division if the denominator is 0. if (*Denominator == 0) return false; // It's safe to hoist if the denominator is not 0 or -1. if (*Denominator != -1) return true; // At this point we know that the denominator is -1. It is safe to hoist as // long we know that the numerator is not INT_MIN. if (match(Inst->getOperand(0), m_APInt(Numerator))) return !Numerator->isMinSignedValue(); // The numerator *might* be MinSignedValue. return false; } case Instruction::Load: { const LoadInst *LI = cast(Inst); if (!LI->isUnordered() || // Speculative load may create a race that did not exist in the source. LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || // Speculative load may load data from dirty regions. LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast(Inst); const Function *Callee = CI->getCalledFunction(); // The called function could have undefined behavior or side-effects, even // if marked readnone nounwind. return Callee && Callee->isSpeculatable(); } case Instruction::VAArg: case Instruction::Alloca: case Instruction::Invoke: case Instruction::PHI: case Instruction::Store: case Instruction::Ret: case Instruction::Br: case Instruction::IndirectBr: case Instruction::Switch: case Instruction::Unreachable: case Instruction::Fence: case Instruction::AtomicRMW: case Instruction::AtomicCmpXchg: case Instruction::LandingPad: case Instruction::Resume: case Instruction::CatchSwitch: case Instruction::CatchPad: case Instruction::CatchRet: case Instruction::CleanupPad: case Instruction::CleanupRet: return false; // Misc instructions which have effects } } bool llvm::mayBeMemoryDependent(const Instruction &I) { return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); } OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. // This means if we have enough leading zero bits in the operands // we can guarantee that the result does not overflow. // Ref: "Hacker's Delight" by Henry Warren unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); KnownBits LHSKnown(BitWidth); KnownBits RHSKnown(BitWidth); computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); // Note that underestimating the number of zero bits gives a more // conservative answer. unsigned ZeroBits = LHSKnown.countMinLeadingZeros() + RHSKnown.countMinLeadingZeros(); // First handle the easy case: if we have enough zero bits there's // definitely no overflow. if (ZeroBits >= BitWidth) return OverflowResult::NeverOverflows; // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnown.Zero; APInt RHSMax = ~RHSKnown.Zero; // We know the multiply operation doesn't overflow if the maximum values for // each operand will not overflow after we multiply them together. bool MaxOverflow; (void)LHSMax.umul_ov(RHSMax, MaxOverflow); if (!MaxOverflow) return OverflowResult::NeverOverflows; // We know it always overflows if multiplying the smallest possible values for // the operands also results in overflow. bool MinOverflow; (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow); if (MinOverflow) return OverflowResult::AlwaysOverflows; return OverflowResult::MayOverflow; } OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. // This means if we have enough leading sign bits in the operands // we can guarantee that the result does not overflow. // Ref: "Hacker's Delight" by Henry Warren unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); // Note that underestimating the number of sign bits gives a more // conservative answer. unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. if (SignBits > BitWidth + 1) return OverflowResult::NeverOverflows; // There are two ambiguous cases where there can be no overflow: // SignBits == BitWidth + 1 and // SignBits == BitWidth // The second case is difficult to check, therefore we only handle the // first case. if (SignBits == BitWidth + 1) { // It overflows only when both arguments are negative and the true // product is exactly the minimum negative number. // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return OverflowResult::NeverOverflows; } return OverflowResult::MayOverflow; } OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); if (LHSKnown.isNegative() && RHSKnown.isNegative()) { // The sign bit is set in both cases: this MUST overflow. // Create a simple add instruction, and insert it into the struct. return OverflowResult::AlwaysOverflows; } if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) { // The sign bit is clear in both cases: this CANNOT overflow. // Create a simple add instruction, and insert it into the struct. return OverflowResult::NeverOverflows; } } return OverflowResult::MayOverflow; } /// Return true if we can prove that adding the two values of the /// knownbits will not overflow. /// Otherwise return false. static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, const KnownBits &RHSKnown) { // Addition of two 2's complement numbers having opposite signs will never // overflow. if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || (LHSKnown.isNonNegative() && RHSKnown.isNegative())) return true; // If either of the values is known to be non-negative, adding them can only // overflow if the second is also non-negative, so we can assume that. // Two non-negative numbers will only overflow if there is a carry to the // sign bit, so we can check if even when the values are as big as possible // there is no overflow to the sign bit. if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { APInt MaxLHS = ~LHSKnown.Zero; MaxLHS.clearSignBit(); APInt MaxRHS = ~RHSKnown.Zero; MaxRHS.clearSignBit(); APInt Result = std::move(MaxLHS) + std::move(MaxRHS); return Result.isSignBitClear(); } // If either of the values is known to be negative, adding them can only // overflow if the second is also negative, so we can assume that. // Two negative number will only overflow if there is no carry to the sign // bit, so we can check if even when the values are as small as possible // there is overflow to the sign bit. if (LHSKnown.isNegative() || RHSKnown.isNegative()) { APInt MinLHS = LHSKnown.One; MinLHS.clearSignBit(); APInt MinRHS = RHSKnown.One; MinRHS.clearSignBit(); APInt Result = std::move(MinLHS) + std::move(MinRHS); return Result.isSignBitSet(); } // If we reached here it means that we know nothing about the sign bits. // In this case we can't know if there will be an overflow, since by // changing the sign bits any two values can be made to overflow. return false; } static OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } // If LHS and RHS each have at least two sign bits, the addition will look // like // // XX..... + // YY..... // // If the carry into the most significant position is 0, X and Y can't both // be 1 and therefore the carry out of the addition is also 0. // // If the carry into the most significant position is 1, X and Y can't both // be 0 and therefore the carry out of the addition is also 1. // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) return OverflowResult::NeverOverflows; KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); if (checkRippleForSignedAdd(LHSKnown, RHSKnown)) return OverflowResult::NeverOverflows; // The remaining code needs Add to be available. Early returns if not so. if (!Add) return OverflowResult::MayOverflow; // If the sign of Add is the same as at least one of the operands, this add // CANNOT overflow. This is particularly useful when the sum is // @llvm.assume'ed non-negative rather than proved so from analyzing its // operands. bool LHSOrRHSKnownNonNegative = (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); bool LHSOrRHSKnownNegative = (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || (AddKnown.isNegative() && LHSOrRHSKnownNegative)) { return OverflowResult::NeverOverflows; } } return OverflowResult::MayOverflow; } OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) return OverflowResult::NeverOverflows; return OverflowResult::MayOverflow; } OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) return OverflowResult::NeverOverflows; KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT); KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT); // Subtraction of two 2's complement numbers having identical signs will // never overflow. if ((LHSKnown.isNegative() && RHSKnown.isNegative()) || (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())) return OverflowResult::NeverOverflows; // TODO: implement logic similar to checkRippleForAdd return OverflowResult::MayOverflow; } bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, const DominatorTree &DT) { #ifndef NDEBUG auto IID = II->getIntrinsicID(); assert((IID == Intrinsic::sadd_with_overflow || IID == Intrinsic::uadd_with_overflow || IID == Intrinsic::ssub_with_overflow || IID == Intrinsic::usub_with_overflow || IID == Intrinsic::smul_with_overflow || IID == Intrinsic::umul_with_overflow) && "Not an overflow intrinsic!"); #endif SmallVector GuardingBranches; SmallVector Results; for (const User *U : II->users()) { if (const auto *EVI = dyn_cast(U)) { assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); if (EVI->getIndices()[0] == 0) Results.push_back(EVI); else { assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); for (const auto *U : EVI->users()) if (const auto *B = dyn_cast(U)) { assert(B->isConditional() && "How else is it using an i1?"); GuardingBranches.push_back(B); } } } else { // We are using the aggregate directly in a way we don't want to analyze // here (storing it to a global, say). return false; } } auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); if (!NoWrapEdge.isSingleEdge()) return false; // Check if all users of the add are provably no-wrap. for (const auto *Result : Results) { // If the extractvalue itself is not executed on overflow, the we don't // need to check each use separately, since domination is transitive. if (DT.dominates(NoWrapEdge, Result->getParent())) continue; for (auto &RU : Result->uses()) if (!DT.dominates(NoWrapEdge, RU)) return false; } return true; }; return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), Add, DL, AC, CxtI, DT); } OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // A memory operation returns normally if it isn't volatile. A volatile // operation is allowed to trap. // // An atomic operation isn't guaranteed to return in a reasonable amount of // time because it's possible for another thread to interfere with it for an // arbitrary length of time, but programs aren't allowed to rely on that. if (const LoadInst *LI = dyn_cast(I)) return !LI->isVolatile(); if (const StoreInst *SI = dyn_cast(I)) return !SI->isVolatile(); if (const AtomicCmpXchgInst *CXI = dyn_cast(I)) return !CXI->isVolatile(); if (const AtomicRMWInst *RMWI = dyn_cast(I)) return !RMWI->isVolatile(); if (const MemIntrinsic *MII = dyn_cast(I)) return !MII->isVolatile(); // If there is no successor, then execution can't transfer to it. if (const auto *CRI = dyn_cast(I)) return !CRI->unwindsToCaller(); if (const auto *CatchSwitch = dyn_cast(I)) return !CatchSwitch->unwindsToCaller(); if (isa(I)) return false; if (isa(I)) return false; if (isa(I)) return false; // Calls can throw, or contain an infinite loop, or kill the process. if (auto CS = ImmutableCallSite(I)) { // Call sites that throw have implicit non-local control flow. if (!CS.doesNotThrow()) return false; // Non-throwing call sites can loop infinitely, call exit/pthread_exit // etc. and thus not return. However, LLVM already assumes that // // - Thread exiting actions are modeled as writes to memory invisible to // the program. // // - Loops that don't have side effects (side effects are volatile/atomic // stores and IO) always terminate (see http://llvm.org/PR965). // Furthermore IO itself is also modeled as writes to memory invisible to // the program. // // We rely on those assumptions here, and use the memory effects of the call // target as a proxy for checking that it always returns. // FIXME: This isn't aggressive enough; a call which only writes to a global // is guaranteed to return. return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || match(I, m_Intrinsic()) || match(I, m_Intrinsic()); } // Other instructions return normally. return true; } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { // TODO: This is slightly consdervative for invoke instruction since exiting // via an exception *is* normal control for them. for (auto I = BB->begin(), E = BB->end(); I != E; ++I) if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; return true; } bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L) { // The loop header is guaranteed to be executed for every iteration. // // FIXME: Relax this constraint to cover all basic blocks that are // guaranteed to be executed at every iteration. if (I->getParent() != L->getHeader()) return false; for (const Instruction &LI : *L->getHeader()) { if (&LI == I) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; } llvm_unreachable("Instruction not contained in its own parent basic block."); } bool llvm::propagatesFullPoison(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Xor: case Instruction::Trunc: case Instruction::BitCast: case Instruction::AddrSpaceCast: case Instruction::Mul: case Instruction::Shl: case Instruction::GetElementPtr: // These operations all propagate poison unconditionally. Note that poison // is not any particular value, so xor or subtraction of poison with // itself still yields poison, not zero. return true; case Instruction::AShr: case Instruction::SExt: // For these operations, one bit of the input is replicated across // multiple output bits. A replicated poison bit is still poison. return true; case Instruction::ICmp: // Comparing poison with any value yields poison. This is why, for // instance, x s< (x +nsw 1) can be folded to true. return true; default: return false; } } const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Store: return cast(I)->getPointerOperand(); case Instruction::Load: return cast(I)->getPointerOperand(); case Instruction::AtomicCmpXchg: return cast(I)->getPointerOperand(); case Instruction::AtomicRMW: return cast(I)->getPointerOperand(); case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: return I->getOperand(1); default: return nullptr; } } bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { // We currently only look for uses of poison values within the same basic // block, as that makes it easier to guarantee that the uses will be // executed given that PoisonI is executed. // // FIXME: Expand this to consider uses beyond the same basic block. To do // this, look out for the distinction between post-dominance and strong // post-dominance. const BasicBlock *BB = PoisonI->getParent(); // Set of instructions that we have proved will yield poison if PoisonI // does. SmallSet YieldsPoison; SmallSet Visited; YieldsPoison.insert(PoisonI); Visited.insert(PoisonI->getParent()); BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end(); unsigned Iter = 0; while (Iter++ < MaxDepth) { for (auto &I : make_range(Begin, End)) { if (&I != PoisonI) { const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I); if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) return false; } // Mark poison that propagates from I through uses of I. if (YieldsPoison.count(&I)) { for (const User *User : I.users()) { const Instruction *UserI = cast(User); if (propagatesFullPoison(UserI)) YieldsPoison.insert(User); } } } if (auto *NextBB = BB->getSingleSuccessor()) { if (Visited.insert(NextBB).second) { BB = NextBB; Begin = BB->getFirstNonPHI()->getIterator(); End = BB->end(); continue; } } break; } return false; } static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { if (FMF.noNaNs()) return true; if (auto *C = dyn_cast(V)) return !C->isNaN(); return false; } static bool isKnownNonZero(const Value *V) { if (auto *C = dyn_cast(V)) return !C->isZero(); return false; } /// Match clamp pattern for float types without care about NaNs or signed zeros. /// Given non-min/max outer cmp/select from the clamp pattern this /// function recognizes if it can be substitued by a "canonical" min/max /// pattern. static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS) { // Try to match // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) // and return description of the outer Max/Min. // First, check if select has inverse order: if (CmpRHS == FalseVal) { std::swap(TrueVal, FalseVal); Pred = CmpInst::getInversePredicate(Pred); } // Assume success now. If there's no match, callers should not use these anyway. LHS = TrueVal; RHS = FalseVal; const APFloat *FC1; if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) return {SPF_UNKNOWN, SPNB_NA, false}; const APFloat *FC2; switch (Pred) { case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: if (match(FalseVal, m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan) return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; break; case CmpInst::FCMP_OGT: case CmpInst::FCMP_OGE: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE: if (match(FalseVal, m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan) return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; break; default: break; } return {SPF_UNKNOWN, SPNB_NA, false}; } /// Recognize variations of: /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) static SelectPatternResult matchClamp(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal) { // Swap the select operands and predicate to match the patterns below. if (CmpRHS != TrueVal) { Pred = ICmpInst::getSwappedPredicate(Pred); std::swap(TrueVal, FalseVal); } const APInt *C1; if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { const APInt *C2; // (X SMAX(SMIN(X, C2), C1) if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) return {SPF_SMAX, SPNB_NA, false}; // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) return {SPF_SMIN, SPNB_NA, false}; // (X UMAX(UMIN(X, C2), C1) if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) return {SPF_UMAX, SPNB_NA, false}; // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) return {SPF_UMIN, SPNB_NA, false}; } return {SPF_UNKNOWN, SPNB_NA, false}; } /// Recognize variations of: /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TVal, Value *FVal, unsigned Depth) { // TODO: Allow FP min/max with nnan/nsz. assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); Value *A, *B; SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); if (!SelectPatternResult::isMinOrMax(L.Flavor)) return {SPF_UNKNOWN, SPNB_NA, false}; Value *C, *D; SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; // We have something like: x Pred y ? min(a, b) : min(c, d). // Try to match the compare to the min/max operations of the select operands. // First, make sure we have the right compare predicate. switch (L.Flavor) { case SPF_SMIN: if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { Pred = ICmpInst::getSwappedPredicate(Pred); std::swap(CmpLHS, CmpRHS); } if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) break; return {SPF_UNKNOWN, SPNB_NA, false}; case SPF_SMAX: if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { Pred = ICmpInst::getSwappedPredicate(Pred); std::swap(CmpLHS, CmpRHS); } if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) break; return {SPF_UNKNOWN, SPNB_NA, false}; case SPF_UMIN: if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { Pred = ICmpInst::getSwappedPredicate(Pred); std::swap(CmpLHS, CmpRHS); } if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) break; return {SPF_UNKNOWN, SPNB_NA, false}; case SPF_UMAX: if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { Pred = ICmpInst::getSwappedPredicate(Pred); std::swap(CmpLHS, CmpRHS); } if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) break; return {SPF_UNKNOWN, SPNB_NA, false}; default: return {SPF_UNKNOWN, SPNB_NA, false}; } // If there is a common operand in the already matched min/max and the other // min/max operands match the compare operands (either directly or inverted), // then this is min/max of the same flavor. // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) if (D == B) { if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && match(A, m_Not(m_Specific(CmpRHS))))) return {L.Flavor, SPNB_NA, false}; } // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) if (C == B) { if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && match(A, m_Not(m_Specific(CmpRHS))))) return {L.Flavor, SPNB_NA, false}; } // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) if (D == A) { if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && match(B, m_Not(m_Specific(CmpRHS))))) return {L.Flavor, SPNB_NA, false}; } // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) if (C == A) { if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && match(B, m_Not(m_Specific(CmpRHS))))) return {L.Flavor, SPNB_NA, false}; } return {SPF_UNKNOWN, SPNB_NA, false}; } /// Match non-obvious integer minimum and maximum sequences. static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, unsigned Depth) { // Assume success. If there's no match, callers should not use these anyway. LHS = TrueVal; RHS = FalseVal; SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; // Z = X -nsw Y // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0) // (X (Z SMAX(Z, 0) if (match(TrueVal, m_Zero()) && match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; // Z = X -nsw Y // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) // (X (Z SMIN(Z, 0) if (match(FalseVal, m_Zero()) && match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; const APInt *C1; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; // An unsigned min/max can be written with a signed compare. const APInt *C2; if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { // Is the sign bit set? // (X (X >u MAXVAL) ? X : MAXVAL ==> UMAX // (X (X >u MAXVAL) ? MAXVAL : X ==> UMIN if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() && C2->isMaxSignedValue()) return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; // Is the sign bit clear? // (X >s -1) ? MINVAL : X ==> (X UMAX // (X >s -1) ? X : MINVAL ==> (X UMIN if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() && C2->isMinSignedValue()) return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; } // Look through 'not' ops to find disguised signed min/max. // (X >s C) ? ~X : ~C ==> (~X SMIN(~X, ~C) // (X (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C) if (match(TrueVal, m_Not(m_Specific(CmpLHS))) && match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; // (X >s C) ? ~C : ~X ==> (~X SMAX(~C, ~X) // (X (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X) if (match(FalseVal, m_Not(m_Specific(CmpLHS))) && match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; return {SPF_UNKNOWN, SPNB_NA, false}; } bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) { assert(X && Y && "Invalid operand"); // X = sub (0, Y) || X = sub nsw (0, Y) if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) || (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y))))) return true; // Y = sub (0, X) || Y = sub nsw (0, X) if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) || (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X))))) return true; // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) Value *A, *B; return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); } static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, FastMathFlags FMF, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, unsigned Depth) { LHS = CmpLHS; RHS = CmpRHS; // Signed zero may return inconsistent results between implementations. // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) // Therefore, we behave conservatively and only proceed if at least one of the // operands is known to not be zero or if we don't care about signed zero. switch (Pred) { default: break; // FIXME: Include OGT/OLT/UGT/ULT. case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && !isKnownNonZero(CmpRHS)) return {SPF_UNKNOWN, SPNB_NA, false}; } SelectPatternNaNBehavior NaNBehavior = SPNB_NA; bool Ordered = false; // When given one NaN and one non-NaN input: // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. // - A simple C99 (a < b ? a : b) construction will return 'b' (as the // ordered comparison fails), which could be NaN or non-NaN. // so here we discover exactly what NaN behavior is required/accepted. if (CmpInst::isFPPredicate(Pred)) { bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); if (LHSSafe && RHSSafe) { // Both operands are known non-NaN. NaNBehavior = SPNB_RETURNS_ANY; } else if (CmpInst::isOrdered(Pred)) { // An ordered comparison will return false when given a NaN, so it // returns the RHS. Ordered = true; if (LHSSafe) // LHS is non-NaN, so if RHS is NaN then NaN will be returned. NaNBehavior = SPNB_RETURNS_NAN; else if (RHSSafe) NaNBehavior = SPNB_RETURNS_OTHER; else // Completely unsafe. return {SPF_UNKNOWN, SPNB_NA, false}; } else { Ordered = false; // An unordered comparison will return true when given a NaN, so it // returns the LHS. if (LHSSafe) // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. NaNBehavior = SPNB_RETURNS_OTHER; else if (RHSSafe) NaNBehavior = SPNB_RETURNS_NAN; else // Completely unsafe. return {SPF_UNKNOWN, SPNB_NA, false}; } } if (TrueVal == CmpRHS && FalseVal == CmpLHS) { std::swap(CmpLHS, CmpRHS); Pred = CmpInst::getSwappedPredicate(Pred); if (NaNBehavior == SPNB_RETURNS_NAN) NaNBehavior = SPNB_RETURNS_OTHER; else if (NaNBehavior == SPNB_RETURNS_OTHER) NaNBehavior = SPNB_RETURNS_NAN; Ordered = !Ordered; } // ([if]cmp X, Y) ? X : Y if (TrueVal == CmpLHS && FalseVal == CmpRHS) { switch (Pred) { default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; case FCmpInst::FCMP_UGT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_ULE: case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; } } if (isKnownNegation(TrueVal, FalseVal)) { // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can // match against either LHS or sext(LHS). auto MaybeSExtCmpLHS = m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); if (match(TrueVal, MaybeSExtCmpLHS)) { // Set the return values. If the compare uses the negated value (-X >s 0), // swap the return values because the negated value is always 'RHS'. LHS = TrueVal; RHS = FalseVal; if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) std::swap(LHS, RHS); // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) return {SPF_ABS, SPNB_NA, false}; // (X NABS(X) // (-X NABS(X) if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) return {SPF_NABS, SPNB_NA, false}; } else if (match(FalseVal, MaybeSExtCmpLHS)) { // Set the return values. If the compare uses the negated value (-X >s 0), // swap the return values because the negated value is always 'RHS'. LHS = FalseVal; RHS = TrueVal; if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) std::swap(LHS, RHS); // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) return {SPF_NABS, SPNB_NA, false}; // (X ABS(X) // (-X ABS(X) if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) return {SPF_ABS, SPNB_NA, false}; } } if (CmpInst::isIntPredicate(Pred)) return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar // may return either -0.0 or 0.0, so fcmp/select pair has stricter // semantics than minNum. Be conservative in such case. if (NaNBehavior != SPNB_RETURNS_ANY || (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && !isKnownNonZero(CmpRHS))) return {SPF_UNKNOWN, SPNB_NA, false}; return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } /// Helps to match a select pattern in case of a type mismatch. /// /// The function processes the case when type of true and false values of a /// select instruction differs from type of the cmp instruction operands because /// of a cast instruction. The function checks if it is legal to move the cast /// operation after "select". If yes, it returns the new second value of /// "select" (with the assumption that cast is moved): /// 1. As operand of cast instruction when both values of "select" are same cast /// instructions. /// 2. As restored constant (by applying reverse cast operation) when the first /// value of the "select" is a cast operation and the second value is a /// constant. /// NOTE: We return only the new second value because the first value could be /// accessed as operand of cast instruction. static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, Instruction::CastOps *CastOp) { auto *Cast1 = dyn_cast(V1); if (!Cast1) return nullptr; *CastOp = Cast1->getOpcode(); Type *SrcTy = Cast1->getSrcTy(); if (auto *Cast2 = dyn_cast(V2)) { // If V1 and V2 are both the same cast from the same type, look through V1. if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) return Cast2->getOperand(0); return nullptr; } auto *C = dyn_cast(V2); if (!C) return nullptr; Constant *CastedTo = nullptr; switch (*CastOp) { case Instruction::ZExt: if (CmpI->isUnsigned()) CastedTo = ConstantExpr::getTrunc(C, SrcTy); break; case Instruction::SExt: if (CmpI->isSigned()) CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); break; case Instruction::Trunc: Constant *CmpConst; if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && CmpConst->getType() == SrcTy) { // Here we have the following case: // // %cond = cmp iN %x, CmpConst // %tr = trunc iN %x to iK // %narrowsel = select i1 %cond, iK %t, iK C // // We can always move trunc after select operation: // // %cond = cmp iN %x, CmpConst // %widesel = select i1 %cond, iN %x, iN CmpConst // %tr = trunc iN %widesel to iK // // Note that C could be extended in any way because we don't care about // upper bits after truncation. It can't be abs pattern, because it would // look like: // // select i1 %cond, x, -x. // // So only min/max pattern could be matched. Such match requires widened C // == CmpConst. That is why set widened C = CmpConst, condition trunc // CmpConst == C is checked below. CastedTo = CmpConst; } else { CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); } break; case Instruction::FPTrunc: CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true); break; case Instruction::FPExt: CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true); break; case Instruction::FPToUI: CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true); break; case Instruction::FPToSI: CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true); break; case Instruction::UIToFP: CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true); break; case Instruction::SIToFP: CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true); break; default: break; } if (!CastedTo) return nullptr; // Make sure the cast doesn't lose any information. Constant *CastedBack = ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true); if (CastedBack != C) return nullptr; return CastedTo; } SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp, unsigned Depth) { if (Depth >= MaxDepth) return {SPF_UNKNOWN, SPNB_NA, false}; SelectInst *SI = dyn_cast(V); if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; CmpInst *CmpI = dyn_cast(SI->getCondition()); if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; CmpInst::Predicate Pred = CmpI->getPredicate(); Value *CmpLHS = CmpI->getOperand(0); Value *CmpRHS = CmpI->getOperand(1); Value *TrueVal = SI->getTrueValue(); Value *FalseVal = SI->getFalseValue(); FastMathFlags FMF; if (isa(CmpI)) FMF = CmpI->getFastMathFlags(); // Bail out early. if (CmpI->isEquality()) return {SPF_UNKNOWN, SPNB_NA, false}; // Deal with type mismatches. if (CastOp && CmpLHS->getType() != TrueVal->getType()) { if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { // If this is a potential fmin/fmax with a cast to integer, then ignore // -0.0 because there is no corresponding integer value. if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast(TrueVal)->getOperand(0), C, LHS, RHS, Depth); } if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { // If this is a potential fmin/fmax with a cast to integer, then ignore // -0.0 because there is no corresponding integer value. if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast(FalseVal)->getOperand(0), LHS, RHS, Depth); } } return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); } CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; if (SPF == SPF_FMINNUM) return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; if (SPF == SPF_FMAXNUM) return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; llvm_unreachable("unhandled!"); } SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { if (SPF == SPF_SMIN) return SPF_SMAX; if (SPF == SPF_UMIN) return SPF_UMAX; if (SPF == SPF_SMAX) return SPF_SMIN; if (SPF == SPF_UMAX) return SPF_UMIN; llvm_unreachable("unhandled!"); } CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) { return getMinMaxPred(getInverseMinMaxFlavor(SPF)); } /// Return true if "icmp Pred LHS RHS" is always true. static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const DataLayout &DL, unsigned Depth) { assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; switch (Pred) { default: return false; case CmpInst::ICMP_SLE: { const APInt *C; // LHS s<= LHS +_{nsw} C if C >= 0 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) return !C->isNegative(); return false; } case CmpInst::ICMP_ULE: { const APInt *C; // LHS u<= LHS +_{nuw} C for any C if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C)))) return true; // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, const Value *&X, const APInt *&CA, const APInt *&CB) { if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) return true; // If X & C == 0 then (X | C) == X +_{nuw} C if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { KnownBits Known(CA->getBitWidth()); computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, /*CxtI*/ nullptr, /*DT*/ nullptr); if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } return false; }; const Value *X; const APInt *CLHS, *CRHS; if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) return CLHS->ule(*CRHS); return false; } } } /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred /// ALHS ARHS" is true. Otherwise, return None. static Optional isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, const Value *ARHS, const Value *BLHS, const Value *BRHS, const DataLayout &DL, unsigned Depth) { switch (Pred) { default: return None; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) return true; return None; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) return true; return None; } } /// Return true if the operands of the two compares match. IsSwappedOps is true /// when the operands match, but are swapped. static bool isMatchingOps(const Value *ALHS, const Value *ARHS, const Value *BLHS, const Value *BRHS, bool &IsSwappedOps) { bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS); IsSwappedOps = (ALHS == BRHS && ARHS == BLHS); return IsMatchingOps || IsSwappedOps; } /// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is /// true. Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS /// BRHS" is false. Otherwise, return None if we can't infer anything. static Optional isImpliedCondMatchingOperands(CmpInst::Predicate APred, const Value *ALHS, const Value *ARHS, CmpInst::Predicate BPred, const Value *BLHS, const Value *BRHS, bool IsSwappedOps) { // Canonicalize the operands so they're matching. if (IsSwappedOps) { std::swap(BLHS, BRHS); BPred = ICmpInst::getSwappedPredicate(BPred); } if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred)) return true; if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred)) return false; return None; } /// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is /// true. Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS /// C2" is false. Otherwise, return None if we can't infer anything. static Optional isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, const ConstantInt *C1, CmpInst::Predicate BPred, const Value *BLHS, const ConstantInt *C2) { assert(ALHS == BLHS && "LHS operands must match."); ConstantRange DomCR = ConstantRange::makeExactICmpRegion(APred, C1->getValue()); ConstantRange CR = ConstantRange::makeAllowedICmpRegion(BPred, C2->getValue()); ConstantRange Intersection = DomCR.intersectWith(CR); ConstantRange Difference = DomCR.difference(CR); if (Intersection.isEmptySet()) return false; if (Difference.isEmptySet()) return true; return None; } /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is /// false. Otherwise, return None if we can't infer anything. static Optional isImpliedCondICmps(const ICmpInst *LHS, const ICmpInst *RHS, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { Value *ALHS = LHS->getOperand(0); Value *ARHS = LHS->getOperand(1); // The rest of the logic assumes the LHS condition is true. If that's not the // case, invert the predicate to make it so. ICmpInst::Predicate APred = LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); Value *BLHS = RHS->getOperand(0); Value *BRHS = RHS->getOperand(1); ICmpInst::Predicate BPred = RHS->getPredicate(); // Can we infer anything when the two compares have matching operands? bool IsSwappedOps; if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) { if (Optional Implication = isImpliedCondMatchingOperands( APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps)) return Implication; // No amount of additional analysis will infer the second condition, so // early exit. return None; } // Can we infer anything when the LHS operands match and the RHS operands are // constants (not necessarily matching)? if (ALHS == BLHS && isa(ARHS) && isa(BRHS)) { if (Optional Implication = isImpliedCondMatchingImmOperands( APred, ALHS, cast(ARHS), BPred, BLHS, cast(BRHS))) return Implication; // No amount of additional analysis will infer the second condition, so // early exit. return None; } if (APred == BPred) return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth); return None; } /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is /// false. Otherwise, return None if we can't infer anything. We expect the /// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction. static Optional isImpliedCondAndOr(const BinaryOperator *LHS, const ICmpInst *RHS, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // The LHS must be an 'or' or an 'and' instruction. assert((LHS->getOpcode() == Instruction::And || LHS->getOpcode() == Instruction::Or) && "Expected LHS to be 'and' or 'or'."); assert(Depth <= MaxDepth && "Hit recursion limit"); // If the result of an 'or' is false, then we know both legs of the 'or' are // false. Similarly, if the result of an 'and' is true, then we know both // legs of the 'and' are true. Value *ALHS, *ARHS; if ((!LHSIsTrue && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || (LHSIsTrue && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { // FIXME: Make this non-recursion. if (Optional Implication = isImpliedCondition(ALHS, RHS, DL, LHSIsTrue, Depth + 1)) return Implication; if (Optional Implication = isImpliedCondition(ARHS, RHS, DL, LHSIsTrue, Depth + 1)) return Implication; return None; } return None; } Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // Bail out when we hit the limit. if (Depth == MaxDepth) return None; // A mismatch occurs when we compare a scalar cmp to a vector cmp, for // example. if (LHS->getType() != RHS->getType()) return None; Type *OpTy = LHS->getType(); assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!"); // LHS ==> RHS by definition if (LHS == RHS) return LHSIsTrue; // FIXME: Extending the code below to handle vectors. if (OpTy->isVectorTy()) return None; assert(OpTy->isIntegerTy(1) && "implied by above"); // Both LHS and RHS are icmps. const ICmpInst *LHSCmp = dyn_cast(LHS); const ICmpInst *RHSCmp = dyn_cast(RHS); if (LHSCmp && RHSCmp) return isImpliedCondICmps(LHSCmp, RHSCmp, DL, LHSIsTrue, Depth); // The LHS should be an 'or' or an 'and' instruction. We expect the RHS to be // an icmp. FIXME: Add support for and/or on the RHS. const BinaryOperator *LHSBO = dyn_cast(LHS); if (LHSBO && RHSCmp) { if ((LHSBO->getOpcode() == Instruction::And || LHSBO->getOpcode() == Instruction::Or)) return isImpliedCondAndOr(LHSBO, RHSCmp, DL, LHSIsTrue, Depth); } return None; } Index: projects/clang700-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (revision 337645) @@ -1,4793 +1,4789 @@ //===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the SelectionDAG::Legalize method. // //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "legalizedag" namespace { /// Keeps track of state when getting the sign of a floating-point value as an /// integer. struct FloatSignAsInt { EVT FloatVT; SDValue Chain; SDValue FloatPtr; SDValue IntPtr; MachinePointerInfo IntPointerInfo; MachinePointerInfo FloatPointerInfo; SDValue IntValue; APInt SignMask; uint8_t SignBit; }; //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves /// eliminating value sizes the machine cannot handle (promoting small sizes to /// large sizes or splitting up large values into small values) as well as /// eliminating operations the machine cannot handle. /// /// This code also does a small amount of optimization and recognition of idioms /// as part of its processing. For example, if a target does not support a /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; /// The set of nodes which have already been legalized. We hold a /// reference to it in order to update as necessary on node deletion. SmallPtrSetImpl &LegalizedNodes; /// A set of all the nodes updated during legalization. SmallSetVector *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } // Libcall insertion helpers. public: SelectionDAGLegalize(SelectionDAG &DAG, SmallPtrSetImpl &LegalizedNodes, SmallSetVector *UpdatedNodes = nullptr) : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} /// Legalizes the given operation. void LegalizeOp(SDNode *Node); private: SDValue OptimizeFloatStore(StoreSDNode *ST); void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); /// Some targets cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl); SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl); /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, const SDLoc &dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, const SDLoc &dl); std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, SDValue Value) const; SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); SDValue ExpandConstant(ConstantSDNode *CP); // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall bool ExpandNode(SDNode *Node); void ConvertNodeToLibcall(SDNode *Node); void PromoteNode(SDNode *Node); public: // Node replacement helpers void ReplacedNode(SDNode *N) { LegalizedNodes.erase(N); if (UpdatedNodes) UpdatedNodes->insert(N); } void ReplaceNode(SDNode *Old, SDNode *New) { LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); assert(Old->getNumValues() == New->getNumValues() && "Replacing one node with another that produces a different number " "of values!"); DAG.ReplaceAllUsesWith(Old, New); if (UpdatedNodes) UpdatedNodes->insert(New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); if (UpdatedNodes) UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); New[i]->dump(&DAG)); if (UpdatedNodes) UpdatedNodes->insert(New[i].getNode()); } ReplacedNode(Old); } }; } // end anonymous namespace /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType( EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); if (NumEltsGrowth == 1) return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask); SmallVector NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { int Idx = Mask[i]; for (unsigned j = 0; j != NumEltsGrowth; ++j) { if (Idx < 0) NewMask.push_back(-1); else NewMask.push_back(Idx * NumEltsGrowth + j); } } assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask); } /// Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; SDLoc dl(CFP); // If a FP immediate is precise when represented as a float and if the // target can do an extending load from float to double, we put it into // the constant pool as a float, even if it's is statically typed as a // double. This shrinks FP constants and canonicalizes them for targets where // an FP extending load is the same cost as a normal load (such as on the x87 // fp stack or PPC FP unit). EVT VT = CFP->getValueType(0); ConstantFP *LLVMC = const_cast(CFP->getConstantFPValue()); if (!UseCP) { assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl, (VT == MVT::f64) ? MVT::i64 : MVT::i32); } APFloat APF = CFP->getValueAPF(); EVT OrigVT = VT; EVT SVT = VT; // We don't want to shrink SNaNs. Converting the SNaN back to its real type // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ). if (!APF.isSignaling()) { while (SVT != MVT::f32 && SVT != MVT::f16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, APF) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast(ConstantExpr::getFPTrunc(LLVMC, SType)); VT = SVT; Extend = true; } } } SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); if (Extend) { SDValue Result = DAG.getExtLoad( ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, Alignment); return Result; } SDValue Result = DAG.getLoad( OrigVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } /// Expands the Constant node to a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { SDLoc dl(CP); EVT VT = CP->getValueType(0); SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); SDValue Result = DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } /// Some target cannot handle a variable insertion index for the /// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; // If the target doesn't support this, we have to spill the input vector // to a temporary stack slot, update the element, then reload it. This is // badness. We could also load the value into a vector register (either // with a "move to register" or "extload into register" instruction, then // permute it into place, if the idx is a constant and if the idx is // supported by the target. EVT VT = Tmp1.getValueType(); EVT EltVT = VT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast(StackPtr.getNode())->getIndex(); // Store the vector. SDValue Ch = DAG.getStore( DAG.getEntryNode(), dl, Tmp1, StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), SPFI)); } SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, const SDLoc &dl) { if (ConstantSDNode *InsertPos = dyn_cast(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for // integers in which case the inserted value can be over width. EVT EltVT = Vec.getValueType().getVectorElementType(); if (Val.getValueType() == EltVT || (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, Vec.getValueType(), Val); unsigned NumElts = Vec.getValueType().getVectorNumElements(); // We generate a shuffle of InVec and ScVec, so the shuffle mask // should be 0,1,2,3,4,5... with the appropriate element replaced with // elt 0 of the RHS. SmallVector ShufOps; for (unsigned i = 0; i != NumElts; ++i) ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); } } return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due // to phase ordering between legalized code and the dag combiner. This // probably means that we need to integrate dag combiner and legalizer // together. // We generally can't do this one for long doubles. SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), MinAlign(Alignment, 4U), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } } } return SDValue(nullptr, 0); } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StoreSDNode *ST = cast(Node); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { LLVM_DEBUG(dbgs() << "Legalizing store operation\n"); if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { ReplaceNode(ST, OptStore); return; } SDValue Value = ST->getValue(); MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } else LLVM_DEBUG(dbgs() << "Legal store\n"); break; } case TargetLowering::Custom: { LLVM_DEBUG(dbgs() << "Trying custom lowering\n"); SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } case TargetLowering::Promote: { MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } } return; } LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); auto &DL = DAG.getDataLayout(); if (StWidth != StVT.getStoreSizeInBits()) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); unsigned RoundWidth = 1 << Log2_32(StWidth); assert(RoundWidth < StWidth); unsigned ExtraWidth = StWidth - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi; unsigned IncrementSize; if (DL.isLittleEndian()) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore( Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Lo = DAG.getTruncStore( Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } break; } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } case TargetLowering::Expand: assert(!StVT.isVector() && "Vector Stores are handled in LegalizeVectorOps"); SDValue Result; // TRUNCSTORE:i16 i32 -> STORE i16 if (TLI.isTypeLegal(StVT)) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), Alignment, MMOFlags, AAInfo); } else { // The in-memory type isn't legal. Truncate to the type it would promote // to, and then do a truncstore. Value = DAG.getNode(ISD::TRUNCATE, dl, TLI.getTypeToTransformTo(*DAG.getContext(), StVT), Value); Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT, Alignment, MMOFlags, AAInfo); } ReplaceNode(SDValue(Node, 0), Result); break; } } } void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { LoadSDNode *LD = cast(Node); SDValue Chain = LD->getChain(); // The chain. SDValue Ptr = LD->getBasePtr(); // The base pointer. SDValue Value; // The value returned by the load op. SDLoc dl(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n"); MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; } case TargetLowering::Custom: if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { RVal = Res; RChain = Res.getValue(1); } break; case TargetLowering::Promote: { MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; } } if (RChain.getNode() != Node) { assert(RVal.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); if (UpdatedNodes) { UpdatedNodes->insert(RVal.getNode()); UpdatedNodes->insert(RChain.getNode()); } ReplacedNode(Node); } return; } LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually // load an i8. This trick is correct for ZEXTLOAD because the top 7 // bits are guaranteed to be zero; it helps the optimizers understand // that these bits are zero. It is also useful for EXTLOAD, since it // tells the optimizers that those bits are undefined. It would be // nice to have an effective generic way of getting these benefits... // Until such a way is found, don't insist on promoting i1 here. (SrcVT != MVT::i1 || TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == TargetLowering::Promote)) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); SDValue Ch; // The extra bits are guaranteed to be zero, since we stored them that // way. A zext load from NVT thus automatically gives zext from SrcVT. ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. if (ExtType == ISD::SEXTLOAD) // Having the top bits zero doesn't help when sign extending. Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) // All the top bits are guaranteed to be zero - inform the optimizers. Result = DAG.getNode(ISD::AssertZext, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); Value = Result; Chain = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); unsigned RoundWidth = 1 << Log2_32(SrcWidth); assert(RoundWidth < SrcWidth); unsigned ExtraWidth = SrcWidth - RoundWidth; assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Load size not an integral number of bytes!"); EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi, Ch; unsigned IncrementSize; auto &DL = DAG.getDataLayout(); if (DL.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Move the top bits to the right place. Hi = DAG.getNode( ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Hi.getValueType(), DL))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Move the top bits to the right place. Hi = DAG.getNode( ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Hi.getValueType(), DL))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } Chain = Ch; } else { bool isCustom = false; switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; LLVM_FALLTHROUGH; case TargetLowering::Legal: Value = SDValue(Node, 0); Chain = SDValue(Node, 1); if (isCustom) { if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { Value = Res; Chain = Res.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, // expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); } } break; case TargetLowering::Expand: { EVT DestVT = Node->getValueType(0); if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { // If we are loading a legal type, this is a non-extload followed by a // full extend. ISD::LoadExtType MidExtType = (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, SrcVT, LD->getMemOperand()); unsigned ExtendOp = ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); Chain = Load.getValue(1); break; } // Handle the special case of fp16 extloads. EXTLOAD doesn't have the // normal undefined upper bits behavior to allow using an in-reg extend // with the illegal FP type, so load as an integer and do the // from-integer conversion. if (SrcVT.getScalarType() == MVT::f16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, Chain, Ptr, ISrcVT, LD->getMemOperand()); Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); Chain = Result.getValue(1); break; } } assert(!SrcVT.isVector() && "Vector Loads are handled in LegalizeVectorOps"); // FIXME: This does not work for vectors on most targets. Sign- // and zero-extend operations are currently folded into extending // loads, whether they are legal or not, and then we end up here // without any support for legalizing them. assert(ExtType != ISD::EXTLOAD && "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an // explicit zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), Chain, Ptr, SrcVT, LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Result.getValueType(), Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); Value = ValRes; Chain = Result.getValue(1); break; } } } // Since loads produce two values, make sure to remember that we legalized // both of them. if (Chain.getNode() != Node) { assert(Value.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); if (UpdatedNodes) { UpdatedNodes->insert(Value.getNode()); UpdatedNodes->insert(Chain.getNode()); } ReplacedNode(Node); } } /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); // Allow illegal target nodes and illegal registers. if (Node->getOpcode() == ISD::TargetConstant || Node->getOpcode() == ISD::Register) return; #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal || TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || TLI.isTypeLegal(Op.getValueType()) || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; switch (Node->getOpcode()) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::GET_DYNAMIC_AREA_OFFSET: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::VAARG: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action != TargetLowering::Promote) Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::FP_TO_FP16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } case ISD::ATOMIC_STORE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(2).getValueType()); break; case ISD::SELECT_CC: case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); Action = TLI.getCondCodeAction(CCCode, OpVT); if (Action == TargetLowering::Legal) { if (Node->getOpcode() == ISD::SELECT_CC) Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); else Action = TLI.getOperationAction(Node->getOpcode(), OpVT); } break; } case ISD::LOAD: case ISD::STORE: // FIXME: Model these properly. LOAD and STORE are complicated, and // STORE expects the unlegalized operand in some cases. SimpleFinishLegalizing = false; break; case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: // FIXME: This shouldn't be necessary. These nodes have special properties // dealing with the recursive nature of legalization. Removing this // special case should be done as part of making LegalizeDAG non-recursive. SimpleFinishLegalizing = false; break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_DWARF_CFA: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: case ISD::EH_SJLJ_SETUP_DISPATCH: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; case ISD::INIT_TRAMPOLINE: case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::ADDROFRETURNADDR: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; case ISD::READCYCLECOUNTER: // READCYCLECOUNTER returns an i64, even if type legalization might have // expanded that to several smaller types. Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); break; case ISD::READ_REGISTER: case ISD::WRITE_REGISTER: // Named register is legal in the DAG, but blocked by register name // selection if not implemented by target (to chose the correct register) // They'll be converted to Copy(To/From)Reg. Action = TargetLowering::Legal; break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { // replace ISD::DEBUGTRAP with ISD::TRAP SDValue NewVal; NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), Node->getOperand(0)); ReplaceNode(Node, NewVal.getNode()); LegalizeOp(NewVal.getNode()); return; } break; case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: case ISD::STRICT_FPOWI: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FEXP: case ISD::STRICT_FEXP2: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does // ISD::STRICT_FSQRT. Action = TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)); break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; } else { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); } break; } if (SimpleFinishLegalizing) { SDNode *NewNode = Node; switch (Node->getOpcode()) { default: break; case ISD::SHL: case ISD::SRL: case ISD::SRA: case ISD::ROTL: case ISD::ROTR: { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); if (!Op1.getValueType().isVector()) { SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1); // The getShiftAmountOperand() may create a new operand node or // return the existing one. If new operand is created we need // to update the parent node. // Do not try to legalize SAO here! It will be automatically legalized // in the next round. if (SAO != Op1) NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO); } } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue Op2 = Node->getOperand(2); if (!Op2.getValueType().isVector()) { SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2); // The getShiftAmountOperand() may create a new operand node or // return the existing one. If new operand is created we need // to update the parent node. if (SAO != Op2) NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO); } break; } } if (NewNode != Node) { ReplaceNode(Node, NewNode); Node = NewNode; } switch (Action) { case TargetLowering::Legal: LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; if (Node->getNumValues() == 1) { LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; } SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; } LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; case TargetLowering::Expand: if (ExpandNode(Node)) return; LLVM_FALLTHROUGH; case TargetLowering::LibCall: ConvertNodeToLibcall(Node); return; case TargetLowering::Promote: PromoteNode(Node); return; } } switch (Node->getOpcode()) { default: #ifndef NDEBUG dbgs() << "NODE: "; Node->dump( &DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to legalize this operator!"); case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: break; case ISD::LOAD: return LegalizeLoadOps(Node); case ISD::STORE: return LegalizeStoreOps(Node); } } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); SDLoc dl(Op); // Before we generate a new store to a temporary stack slot, see if there is // already one that we can use. There often is because when we scalarize // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in // the vector. If all are expanded here, we don't want one store per vector // element. // Caches for hasPredecessorHelper SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (StoreSDNode *ST = dyn_cast(User)) { if (ST->isIndexed() || ST->isTruncatingStore() || ST->getValue() != Vec) continue; // Make sure that nothing else could have stored into the destination of // this store. if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) continue; // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). Also, the store might be // dependent on the extractelement and introduce a cycle when creating // the load. if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) || ST->hasPredecessor(Op.getNode())) continue; StackPtr = ST->getBasePtr(); Ch = SDValue(ST, 0); break; } } EVT VecVT = Vec.getValueType(); if (!Ch.getNode()) { // Store the value to a temporary stack slot, then LOAD the returned part. StackPtr = DAG.CreateStackTemporary(VecVT); Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); } StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); SDValue NewLoad; if (Op.getValueType().isVector()) NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); else NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), VecVT.getVectorElementType()); // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); // We introduced a cycle though, so update the loads operands, making sure // to use the original store's chain as an incoming chain. SmallVector NewLoadOperands(NewLoad->op_begin(), NewLoad->op_end()); NewLoadOperands[0] = Ch; NewLoad = SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); return NewLoad; } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); SDValue Vec = Op.getOperand(0); SDValue Part = Op.getOperand(1); SDValue Idx = Op.getOperand(2); SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. EVT VecVT = Vec.getValueType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); // Store the subvector. Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // We can't handle this case efficiently. Allocate a sufficiently // aligned object on the stack, store each element into it, then load // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store of each element to the stack slot. SmallVector Stores; unsigned TypeByteSize = EltVT.getSizeInBits() / 8; // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. if (Node->getOperand(i).isUndef()) continue; unsigned Offset = TypeByteSize*i; SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); // If the destination vector element type is narrower than the source // element type, only store the bits necessary. if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), EltVT)); } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset))); } SDValue StoreChain; if (!Stores.empty()) // Not all undef elements? StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } /// Bitcast a floating-point value to an integer value. Only bitcast the part /// containing the sign bit if the target has no integer value capable of /// holding all bits of the floating-point value. void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, SDValue Value) const { EVT FloatVT = Value.getValueType(); unsigned NumBits = FloatVT.getSizeInBits(); State.FloatVT = FloatVT; EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); State.SignMask = APInt::getSignMask(NumBits); State.SignBit = NumBits - 1; return; } auto &DataLayout = DAG.getDataLayout(); // Store the float to memory, then load the sign part out as an integer. MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); // First create a temporary that is aligned for both the load and store. SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); int FI = cast(StackPtr.getNode())->getIndex(); // Then store the float to it. State.FloatPtr = StackPtr; MachineFunction &MF = DAG.getMachineFunction(); State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, State.FloatPointerInfo); SDValue IntPtr; if (DataLayout.isBigEndian()) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. IntPtr = StackPtr; State.IntPointerInfo = State.FloatPointerInfo; } else { // Advance the pointer so that the loaded byte will contain the sign bit. unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); } State.IntPtr = IntPtr; State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, State.IntPointerInfo, MVT::i8); State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); State.SignBit = 7; } /// Replace the integer value produced by getSignAsIntValue() with a new value /// and cast the result back to a floating-point type. SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, SDValue NewIntValue) const { if (!State.Chain) return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); // Override the part containing the sign bit in the value stored on the stack. SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, State.IntPointerInfo, MVT::i8); return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, State.FloatPointerInfo); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SDLoc DL(Node); SDValue Mag = Node->getOperand(0); SDValue Sign = Node->getOperand(1); // Get sign bit into an integer value. FloatSignAsInt SignAsInt; getSignAsIntValue(SignAsInt, DL, Sign); EVT IntVT = SignAsInt.IntValue.getValueType(); SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, SignMask); // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) EVT FloatVT = Mag.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, DAG.getConstant(0, DL, IntVT), ISD::SETNE); return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); } // Transform Mag value to integer, and clear the sign bit. FloatSignAsInt MagAsInt; getSignAsIntValue(MagAsInt, DL, Mag); EVT MagVT = MagAsInt.IntValue.getValueType(); SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT); SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue, ClearSignMask); // Get the signbit at the right position for MagAsInt. int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit; + EVT ShiftVT = IntVT; + if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { + SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); + ShiftVT = MagVT; + } + if (ShiftAmount > 0) { + SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT); + SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst); + } else if (ShiftAmount < 0) { + SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT); + SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst); + } if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) { - if (ShiftAmount > 0) { - SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT); - SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst); - } else if (ShiftAmount < 0) { - SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT); - SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst); - } SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); - } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { - SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); - if (ShiftAmount > 0) { - SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT); - SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst); - } else if (ShiftAmount < 0) { - SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT); - SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst); - } } // Store the part with the modified sign and convert back to float. SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit); return modifySignAsInt(MagAsInt, DL, CopiedSign); } SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { SDLoc DL(Node); SDValue Value = Node->getOperand(0); // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. EVT FloatVT = Value.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); } // Transform value to integer, clear the sign bit and transform back. FloatSignAsInt ValueAsInt; getSignAsIntValue(ValueAsInt, DL, Value); EVT IntVT = ValueAsInt.IntValue.getValueType(); SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, ClearSignMask); return modifySignAsInt(ValueAsInt, DL, ClearedSign); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SmallVectorImpl &Results) { unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); SDValue Chain = Tmp1.getOperand(0); // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); unsigned StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlignment(); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); Results.push_back(Tmp1); Results.push_back(Tmp2); } /// Legalize a SETCC with given LHS and RHS and condition code CC on the current /// target. /// /// If the SETCC has been legalized using AND / OR, then the legalized node /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert /// will be set to false. /// /// If the SETCC has been legalized by using getSetCCSwappedOperands(), /// then the values of LHS and RHS will be swapped, CC will be set to the /// new condition, and NeedInvert will be set to false. /// /// If the SETCC has been legalized using the inverse condcode, then LHS and /// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert /// will be set to true. The caller must invert the result of the SETCC with /// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect /// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert, const SDLoc &dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); NeedInvert = false; bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { std::swap(LHS, RHS); CC = DAG.getCondCode(InvCC); return true; } // Swapping operands didn't work. Try inverting the condition. InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands // on top of it. InvCC = ISD::getSetCCSwappedOperands(InvCC); NeedSwap = true; } if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { CC = DAG.getCondCode(InvCC); NeedInvert = true; if (NeedSwap) std::swap(LHS, RHS); return true; } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETO: assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; case ISD::SETUO: assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT) && "If SETUO is expanded, SETUNE must be legal!"); CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: case ISD::SETONE: case ISD::SETUEQ: case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: case ISD::SETULT: case ISD::SETULE: // If we are floating point, assign and break, otherwise fall through. if (!OpVT.isInteger()) { // We can use the 4th bit to tell if we are the unordered // or ordered version of the opcode. CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); break; } // Fallthrough if we are unsigned integer. LLVM_FALLTHROUGH; case ISD::SETLE: case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: case ISD::SETNE: case ISD::SETEQ: // If all combinations of inverting the condition and swapping operands // didn't work then we have no means to expand the condition. llvm_unreachable("Don't know how to expand this condition!"); } SDValue SetCC1, SetCC2; if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); } LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); return true; } } return false; } /// Emit a store/load combination to the stack. This stores /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl) { // Create the stack frame object. unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); FrameIndexSDNode *StackPtrFI = cast(FIPtr); int SPFI = StackPtrFI->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); unsigned SrcSize = SrcOp.getValueSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. SDValue Store; if (SrcSize > SlotSize) Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDLoc dl(Node); // Create a vector sized/aligned stack slot, store the value to element #0, // then load the whole vector back out. SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); FrameIndexSDNode *StackPtrFI = cast(StackPtr); int SPFI = StackPtrFI->getIndex(); SDValue Ch = DAG.getTruncStore( DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), Node->getValueType(0).getVectorElementType()); return DAG.getLoad( Node->getValueType(0), dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); } static bool ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, SDValue &Res) { unsigned NumElems = Node->getNumOperands(); SDLoc dl(Node); EVT VT = Node->getValueType(0); // Try to group the scalars into pairs, shuffle the pairs together, then // shuffle the pairs of pairs together, etc. until the vector has // been built. This will work only if all of the necessary shuffle masks // are legal. // We do this in two phases; first to check the legality of the shuffles, // and next, assuming that all shuffles are legal, to create the new nodes. for (int Phase = 0; Phase < 2; ++Phase) { SmallVector>, 16> IntermedVals, NewIntermedVals; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) continue; SDValue Vec; if (Phase) Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); IntermedVals.push_back(std::make_pair(Vec, SmallVector(1, i))); } while (IntermedVals.size() > 2) { NewIntermedVals.clear(); for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { // This vector and the next vector are shuffled together (simply to // append the one to the other). SmallVector ShuffleVec(NumElems, -1); SmallVector FinalIndices; FinalIndices.reserve(IntermedVals[i].second.size() + IntermedVals[i+1].second.size()); int k = 0; for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; ++j, ++k) { ShuffleVec[k] = j; FinalIndices.push_back(IntermedVals[i].second[j]); } for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; ++j, ++k) { ShuffleVec[k] = NumElems + j; FinalIndices.push_back(IntermedVals[i+1].second[j]); } SDValue Shuffle; if (Phase) Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, IntermedVals[i+1].first, ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; NewIntermedVals.push_back( std::make_pair(Shuffle, std::move(FinalIndices))); } // If we had an odd number of defined values, then append the last // element to the array of new vectors. if ((IntermedVals.size() & 1) != 0) NewIntermedVals.push_back(IntermedVals.back()); IntermedVals.swap(NewIntermedVals); } assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && "Invalid number of intermediate vectors"); SDValue Vec1 = IntermedVals[0].first; SDValue Vec2; if (IntermedVals.size() > 1) Vec2 = IntermedVals[1].first; else if (Phase) Vec2 = DAG.getUNDEF(VT); SmallVector ShuffleVec(NumElems, -1); for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) ShuffleVec[IntermedVals[0].second[i]] = i; for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; if (Phase) Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; } return true; } /// Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT OpVT = Node->getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); // If the only non-undef value is the low element, turn this into a // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. bool isOnlyLowElement = true; bool MoreThanTwoValues = false; bool isConstant = true; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; if (!isa(V) && !isa(V)) isConstant = false; if (!Value1.getNode()) { Value1 = V; } else if (!Value2.getNode()) { if (V != Value1) Value2 = V; } else if (V != Value1 && V != Value2) { MoreThanTwoValues = true; } } if (!Value1.getNode()) return DAG.getUNDEF(VT); if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); // If all elements are constants, create a load from the constant pool. if (isConstant) { SmallVector CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast(Node->getOperand(i))) { CV.push_back(const_cast(V->getConstantFPValue())); } else if (ConstantSDNode *V = dyn_cast(Node->getOperand(i))) { if (OpVT==EltVT) CV.push_back(const_cast(V->getConstantIntValue())); else { // If OpVT and EltVT don't match, EltVT is not legal and the // element values have been promoted/truncated earlier. Undo this; // we don't want a v16i8 to become a v16i32 for example. const ConstantInt *CI = V->getConstantIntValue(); CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), CI->getZExtValue())); } } else { assert(Node->getOperand(i).isUndef()); Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); return DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); } SmallSet DefinedValues; for (unsigned i = 0; i < NumElems; ++i) { if (Node->getOperand(i).isUndef()) continue; DefinedValues.insert(Node->getOperand(i)); } if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { if (!MoreThanTwoValues) { SmallVector ShuffleVec(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) continue; ShuffleVec[i] = V == Value1 ? 0 : NumElems; } if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { // Get the splatted value into the low element of a vector register. SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); SDValue Vec2; if (Value2.getNode()) Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); else Vec2 = DAG.getUNDEF(VT); // Return shuffle(LowValVec, undef, <0,0,0,0>) return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); } } else { SDValue Res; if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) return Res; } } // Otherwise, we can't handle this case efficiently. return ExpandVectorBuildThroughStack(Node); } // Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); // By default, the input chain to this libcall is the entry node of the // function. If the libcall is going to be emitted as a tail call then // TLI.isUsedByReturnOnly will change it to the right chain if the return // node which is being folded has a non-entry input chain. SDValue InChain = DAG.getEntryNode(); // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; const Function &F = DAG.getMachineFunction().getFunction(); bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain) && (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(SDLoc(Node)) .setChain(InChain) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall) .setSExtResult(signExtend) .setZExtResult(!signExtend) .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); return CallInfo.first; } /// Generate a libcall taking the given operands as arguments /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, const SDLoc &dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) .setZExtResult(!isSigned) .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; } // Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { EVT ArgVT = Node->getOperand(i).getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)) .setChain(InChain) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) .setZExtResult(!isSigned); std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; } SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { if (Node->isStrictFPOpcode()) Node = DAG.mutateStrictFPToFP(Node); RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } return ExpandLibCall(LC, Node, false); } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; case MVT::i64: LC = Call_I64; break; case MVT::i128: LC = Call_I128; break; } return ExpandLibCall(LC, Node, isSigned); } /// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results) { unsigned Opcode = Node->getOpcode(); bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); } // Also pass the return address of the remainder. SDValue FIPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = FIPtr; Entry.Ty = RetTy->getPointerTo(); Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(InChain) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) .setZExtResult(!isSigned); std::pair CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo()); Results.push_back(CallInfo.first); Results.push_back(Rem); } /// Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; case MVT::f80: LC = RTLIB::SINCOS_F80; break; case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } return TLI.getLibcallName(LC) != nullptr; } /// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; SDValue Op0 = Node->getOperand(0); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User == Node) continue; // The other user might have been turned into sincos already. if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) return true; } return false; } /// Issue libcalls to sincos to compute sin / cos pairs. void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; case MVT::f80: LC = RTLIB::SINCOS_F80; break; case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; // Pass the argument. Entry.Node = Node->getOperand(0); Entry.Ty = RetTy; Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; Entry.Ty = RetTy->getPointerTo(); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; Entry.Ty = RetTy->getPointerTo(); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain).setLibCallee( TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)); std::pair CallInfo = TLI.LowerCallTo(CLI); Results.push_back( DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo())); Results.push_back( DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo())); } /// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " "expansion\n"); // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation SDValue WordOff = DAG.getConstant(sizeof(int), dl, StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), StackSlot, WordOff); if (DAG.getDataLayout().isLittleEndian()) std::swap(Hi, Lo); // if signed map to unsigned space SDValue Op0Mapped; if (isSigned) { // constant used to invert sign bit (signed to unsigned mapping) SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); } else { Op0Mapped = Op0; } // store the lo of the constructed double - based on integer input SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, MachinePointerInfo()); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); // store the hi of the constructed double - biased exponent SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); // load the constructed double SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // subtract the bias SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); // final result SDValue Result; // handle final rounding if (DestVT == MVT::f64) { // do nothing Result = Sub; } else if (DestVT.bitsLT(MVT::f64)) { Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, DAG.getIntPtrConstant(0, dl)); } else if (DestVT.bitsGT(MVT::f64)) { Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); } return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation has the advantage // of performing rounding correctly, both in the default rounding mode // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, MVT::f64); SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64); SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, DAG.getConstant(32, dl, MVT::i64)); SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); SDValue ShiftConst = DAG.getConstant( 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, DAG.getConstant(UINT64_C(0x800), dl, MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64)); SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), dl, MVT::i64), ISD::SETNE); SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), dl, MVT::i64), ISD::SETUGE); SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, DAG.getConstant(32, dl, SHVT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); SDValue TwoP32 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, MVT::f64); SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, DAG.getIntPtrConstant(0, dl)); } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, dl, Op0.getValueType()), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0, dl), Four = DAG.getIntPtrConstant(4, dl); SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); // If the sign bit of the integer is set, the large number will be treated // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) } if (DAG.getDataLayout().isLittleEndian()) FF <<= 32; Constant *FudgeFactor = ConstantInt::get( Type::getInt64Ty(*DAG.getContext()), FF); SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad( MVT::f32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); else { SDValue Load = DAG.getExtLoad( ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } /// This function is responsible for legalizing a /// *INT_TO_FP operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (true) { NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { OpToUse = ISD::SINT_TO_FP; break; } if (isSigned) continue; // If the target supports UINT_TO_FP of this type, use it. if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { OpToUse = ISD::UINT_TO_FP; break; } // Otherwise, try a larger type. } // Okay, we found the operation and type to use. Zero extend our input to the // desired type then run the operation on it. return DAG.getNode(OpToUse, dl, DestVT, DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NewInTy, LegalOp)); } /// This function is responsible for legalizing a /// FP_TO_*INT operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (true) { NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); // A larger signed type can hold all unsigned values of the requested type, // so using FP_TO_SINT is valid if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { OpToUse = ISD::FP_TO_SINT; break; } // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { OpToUse = ISD::FP_TO_UINT; break; } // Otherwise, try a larger type. } // Okay, we found the operation and type to use. SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); // Truncate the result of the extended FP_TO_*INT operation to the desired // size. return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Sz = VT.getScalarSizeInBits(); SDValue Tmp, Tmp2, Tmp3; // If we can, perform BSWAP first and then the mask+swap the i4, then i2 // and finally the i1 pairs. // TODO: We can easily support i4/i2 legal types if any target ever does. if (Sz >= 8 && isPowerOf2_32(Sz)) { // Create the masks - repeating the pattern every byte. APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0); APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0); for (unsigned J = 0; J != Sz; J += 8) { MaskHi4 = MaskHi4 | (0xF0ull << J); MaskLo4 = MaskLo4 | (0x0Full << J); MaskHi2 = MaskHi2 | (0xCCull << J); MaskLo2 = MaskLo2 | (0x33ull << J); MaskHi1 = MaskHi1 | (0xAAull << J); MaskLo1 = MaskLo1 | (0x55ull << J); } // BSWAP if the type is wider than a single byte. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); return Tmp; } Tmp = DAG.getConstant(0, dl, VT); for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { if (I < J) Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); else Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); APInt Shift(Sz, 1); Shift <<= J; Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); } return Tmp; } /// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().getScalarType().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); case MVT::i32: Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, dl, VT)); Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, dl, VT)); Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, dl, VT)); Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, dl, VT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , dl, VT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); } } /// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Len = VT.getSizeInBits(); assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && "CTPOP not implemented for this type."); // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(1, dl, ShVT)), Mask55)); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(2, dl, ShVT)), Mask33)); // v = (v + (v >> 4)) & 0x0F0F0F0F... Op = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT))), Mask0F); // v = (v * 0x01010101...) >> (Len - 8) Op = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, dl, ShVT)); return Op; } case ISD::CTLZ_ZERO_UNDEF: // This trivially expands to CTLZ. return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { EVT VT = Op.getValueType(); unsigned Len = VT.getSizeInBits(); if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { EVT SetCCVT = getSetCCResultType(VT); SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, DAG.getConstant(Len, dl, VT), CTLZ); } // for now, we do this: // x = x | (x >> 1); // x = x | (x >> 2); // ... // x = x | (x >>16); // x = x | (x >>32); // for 64-bit input // return popcount(~x); // // Ref: "Hacker's Delight" by Henry Warren EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); } Op = DAG.getNOT(dl, Op, VT); return DAG.getNode(ISD::CTPOP, dl, VT, Op); } case ISD::CTTZ_ZERO_UNDEF: // This trivially expands to CTTZ. return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { EVT VT = Op.getValueType(); unsigned Len = VT.getSizeInBits(); if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { EVT SetCCVT = getSetCCResultType(VT); SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, DAG.getConstant(Len, dl, VT), CTTZ); } // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT))); // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(VT.getSizeInBits(), dl, VT), DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); } } } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; case ISD::BITREVERSE: Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); break; case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); break; case ISD::EH_DWARF_CFA: { SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Offset = DAG.getNode(ISD::ADD, dl, CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode( ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()), DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()))); Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(), FA, Offset)); break; } case ISD::FLT_ROUNDS_: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); break; case ISD::EH_RETURN: case ISD::EH_LABEL: case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: // If the target didn't expand these, there's nothing to do, so just // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; case ISD::READCYCLECOUNTER: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.append(Node->getNumValues() - 1, DAG.getConstant(0, dl, Node->getValueType(0))); Results.push_back(Node->getOperand(0)); break; case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast(Node)->getMemoryVT(), VTs, Node->getOperand(0), Node->getOperand(1), Zero, Zero, cast(Node)->getMemOperand()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; } case ISD::ATOMIC_STORE: { // There is no libcall for atomic store; fake it with ATOMIC_SWAP. SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), cast(Node)->getMemOperand()); Results.push_back(Swap.getValue(1)); break; } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and // splits out the success value as a comparison. Expanding the resulting // ATOMIC_CMP_SWAP will produce a libcall. SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); SDValue Res = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast(Node)->getMemoryVT(), VTs, Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), Node->getOperand(3), cast(Node)->getMemOperand()); SDValue ExtRes = Res; SDValue LHS = Res; SDValue RHS = Node->getOperand(1); EVT AtomicType = cast(Node)->getMemoryVT(); EVT OuterType = Node->getValueType(0); switch (TLI.getExtendForAtomicOps()) { case ISD::SIGN_EXTEND: LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res, DAG.getValueType(AtomicType)); RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType, Node->getOperand(2), DAG.getValueType(AtomicType)); ExtRes = LHS; break; case ISD::ZERO_EXTEND: LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, DAG.getValueType(AtomicType)); RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); ExtRes = LHS; break; case ISD::ANY_EXTEND: LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); break; default: llvm_unreachable("Invalid atomic op extension"); } SDValue Success = DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ); Results.push_back(ExtRes.getValue(0)); Results.push_back(Success); Results.push_back(Res.getValue(1)); break; } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; case ISD::MERGE_VALUES: for (unsigned i = 0; i < Node->getNumValues(); i++) Results.push_back(Node->getOperand(i)); break; case ISD::UNDEF: { EVT VT = Node->getValueType(0); if (VT.isInteger()) Results.push_back(DAG.getConstant(0, dl, VT)); else { assert(VT.isFloatingPoint() && "Unknown value type!"); Results.push_back(DAG.getConstantFP(0, dl, VT)); } break; } case ISD::FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_EXTEND: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getOperand(0).getValueType(), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::SIGN_EXTEND_INREG: { EVT ExtraVT = cast(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); // An in-register sign-extend of a boolean is a negation: // 'true' (1) sign-extended is -1. // 'false' (0) sign-extended is 0. // However, we must mask the high bits of the source operand because the // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero. // TODO: Do this for vectors too? if (ExtraVT.getSizeInBits() == 1) { SDValue One = DAG.getConstant(1, dl, VT); SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And); Results.push_back(Neg); break; } // NOTE: we could fall back on load/store here too for targets without // SRA. However, it is doubtful that any exist. EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned BitsDiff = VT.getScalarSizeInBits() - ExtraVT.getScalarSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); Results.push_back(Tmp1); break; } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create // new ones, as reuse may inhibit scheduling. EVT ExtraVT = cast(Node->getOperand(1))->getVT(); Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, Node->getValueType(0), dl); Results.push_back(Tmp1); break; } case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); APFloat apf(DAG.EVTToAPFloatSemantics(VT), APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignMask(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); // TODO: Should any fast-math-flags be set for the FSUB? False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, dl, NVT)); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; } case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; case ISD::VACOPY: Results.push_back(DAG.expandVACopy(Node)); break; case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); Results.push_back(Tmp1); break; case ISD::EXTRACT_SUBVECTOR: Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; case ISD::INSERT_SUBVECTOR: Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); break; case ISD::CONCAT_VECTORS: Results.push_back(ExpandVectorBuildThroughStack(Node)); break; case ISD::SCALAR_TO_VECTOR: Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); break; case ISD::INSERT_VECTOR_ELT: Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), dl)); break; case ISD::VECTOR_SHUFFLE: { SmallVector NewMask; ArrayRef Mask = cast(Node)->getMask(); EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); if (!TLI.isTypeLegal(EltVT)) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. // If original node was v4i64 and the new EltVT is i32, // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { if (Mask[i] < 0) { for (unsigned fi = 0; fi < factor; ++fi) NewMask.push_back(Mask[i]); } else { for (unsigned fi = 0; fi < factor; ++fi) NewMask.push_back(Mask[i]*factor+fi); } } Mask = NewMask; VT = NewVT; } EltVT = NewEltVT; } unsigned NumElems = VT.getVectorNumElements(); SmallVector Ops; for (unsigned i = 0; i != NumElems; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); continue; } unsigned Idx = Mask[i]; if (Idx < NumElems) Ops.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); else Ops.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, DAG.getConstant(Idx - NumElems, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); } Tmp1 = DAG.getBuildVector(VT, dl, Ops); // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); break; } case ISD::EXTRACT_ELEMENT: { EVT OpTy = Node->getOperand(0).getValueType(); if (cast(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), DAG.getConstant(OpTy.getSizeInBits() / 2, dl, TLI.getShiftAmountTy( Node->getOperand(0).getValueType(), DAG.getDataLayout()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { // 0 -> Lo Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Node->getOperand(0)); } Results.push_back(Tmp1); break; } case ISD::STACKSAVE: // Expand to CopyFromReg if the target set // StackPointerRegisterToSaveRestore. if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, Node->getValueType(0))); Results.push_back(Results[0].getValue(1)); } else { Results.push_back(DAG.getUNDEF(Node->getValueType(0))); Results.push_back(Node->getOperand(0)); } break; case ISD::STACKRESTORE: // Expand to CopyToReg if the target set // StackPointerRegisterToSaveRestore. if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, Node->getOperand(1))); } else { Results.push_back(Node->getOperand(0)); } break; case ISD::GET_DYNAMIC_AREA_OFFSET: Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); Results.push_back(Results[0].getValue(0)); break; case ISD::FCOPYSIGN: Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); break; case ISD::FABS: Results.push_back(ExpandFABS(Node)); break; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: { // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B ISD::CondCode Pred; switch (Node->getOpcode()) { default: llvm_unreachable("How did we get here?"); case ISD::SMAX: Pred = ISD::SETGT; break; case ISD::SMIN: Pred = ISD::SETLT; break; case ISD::UMAX: Pred = ISD::SETUGT; break; case ISD::UMIN: Pred = ISD::SETULT; break; } Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred); Results.push_back(Tmp1); break; } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || isSinCosLibcallAvailable(Node, TLI)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); if (Node->getOpcode() == ISD::FCOS) Tmp1 = Tmp1.getValue(1); Results.push_back(Tmp1); } break; } case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); case ISD::FP16_TO_FP: if (Node->getValueType(0) != MVT::f32) { // We can extend to types bigger than f32 in two steps without changing // the result. Since "f16 -> f32" is much more commonly available, give // CodeGen the option of emitting that before resorting to a libcall. SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); Results.push_back( DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } break; case ISD::FP_TO_FP16: LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); if ((SVT == MVT::f64 || SVT == MVT::f80) && TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { // Under fastmath, we can expand this node into a fround followed by // a float-half conversion. SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, DAG.getIntPtrConstant(0, dl)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); } } break; case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::Constant: { ConstantSDNode *CP = cast(Node); Results.push_back(ExpandConstant(CP)); break; } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { const SDNodeFlags Flags = Node->getFlags(); Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); } break; } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT)); Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; } case ISD::UREM: case ISD::SREM: { EVT VT = Node->getValueType(0); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); Results.push_back(Tmp1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); Results.push_back(Tmp1); } break; } case ISD::UDIV: case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); Results.push_back(Tmp1); } break; } case ISD::MULHU: case ISD::MULHS: { unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), Node->getOperand(1)); Results.push_back(Tmp1.getValue(1)); break; } case ISD::UMUL_LOHI: case ISD::SMUL_LOHI: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); MVT VT = LHS.getSimpleValueType(); unsigned MULHOpcode = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS; if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) { Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS)); Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS)); break; } SmallVector Halves; EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext()); assert(TLI.isTypeLegal(HalfType)); if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves, HalfType, DAG, TargetLowering::MulExpansionKind::Always)) { for (unsigned i = 0; i < 2; ++i) { SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]); SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]); SDValue Shift = DAG.getConstant( HalfType.getScalarSizeInBits(), dl, TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); } break; } break; } case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); // See if multiply or divide can be lowered using two-result operations. // We just need the low half of the multiply; try both the signed // and unsigned forms. If the target supports both SMUL_LOHI and // UMUL_LOHI, form a preference by checking which forms of plain // MULH it supports. bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); unsigned OpToUse = 0; if (HasSMUL_LOHI && !HasMULHS) { OpToUse = ISD::SMUL_LOHI; } else if (HasUMUL_LOHI && !HasMULHU) { OpToUse = ISD::UMUL_LOHI; } else if (HasSMUL_LOHI) { OpToUse = ISD::SMUL_LOHI; } else if (HasUMUL_LOHI) { OpToUse = ISD::UMUL_LOHI; } if (OpToUse) { Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), Node->getOperand(1))); break; } SDValue Lo, Hi; EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && TLI.isOperationLegalOrCustom(ISD::SHL, VT) && TLI.isOperationLegalOrCustom(ISD::OR, VT) && TLI.expandMUL(Node, Lo, Hi, HalfType, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl, TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); } break; } case ISD::SADDO: case ISD::SSUBO: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); EVT ResultType = Node->getValueType(1); EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); // LHSSign -> LHS >= 0 // RHSSign -> RHS >= 0 // SumSign -> Sum >= 0 // // Add: // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, Node->getOpcode() == ISD::SADDO ? ISD::SETEQ : ISD::SETNE); SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: case ISD::USUBO: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); bool IsAdd = Node->getOpcode() == ISD::UADDO; // If ADD/SUBCARRY is legal, use that instead. unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), { LHS, RHS, CarryIn }); Results.push_back(SDValue(NodeCarry.getNode(), 0)); Results.push_back(SDValue(NodeCarry.getNode(), 1)); break; } SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType(Node->getValueType(0)); ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; SDValue TopHalf; static const unsigned Ops[2][3] = { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; bool isSigned = Node->getOpcode() == ISD::SMULO; if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(WideVT)) { LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(1, dl)); } else { // We can fall back to a libcall with an illegal type for the MUL if we // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; else if (WideVT == MVT::i32) LC = RTLIB::MUL_I32; else if (WideVT == MVT::i64) LC = RTLIB::MUL_I64; else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); SDValue HiLHS; SDValue HiRHS; if (isSigned) { // The high part is obtained by SRA'ing all but one of the bits of low // part. unsigned LoSize = VT.getSizeInBits(); HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); } else { HiLHS = DAG.getConstant(0, dl, VT); HiRHS = DAG.getConstant(0, dl, VT); } // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not // being a legal type for the architecture and thus has to be split to // two arguments. SDValue Ret; if(DAG.getDataLayout().isLittleEndian()) { // Halves of WideVT are packed into registers in different order // depending on platform endianness. This is usually handled by // the C calling convention, but we can't defer to it in // the legalizer. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } else { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } assert(Ret.getOpcode() == ISD::MERGE_VALUES && "Ret value is a collection of constituent nodes holding result."); BottomHalf = Ret.getOperand(0); TopHalf = Ret.getOperand(1); } if (isSigned) { Tmp1 = DAG.getConstant( VT.getSizeInBits() - 1, dl, TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, DAG.getConstant(0, dl, VT), ISD::SETNE); } // Truncate the result if SetCC returns a larger type than needed. EVT RType = Node->getValueType(1); if (RType.getSizeInBits() < TopHalf.getValueSizeInBits()) TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf); assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() && "Unexpected result type for S/UMULO legalization"); Results.push_back(BottomHalf); Results.push_back(TopHalf); break; } case ISD::BUILD_PAIR: { EVT PairTy = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode( ISD::SHL, dl, PairTy, Tmp2, DAG.getConstant(PairTy.getSizeInBits() / 2, dl, TLI.getShiftAmountTy(PairTy, DAG.getDataLayout()))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; } case ISD::SELECT: Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); if (Tmp1.getOpcode() == ISD::SETCC) { Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), Tmp2, Tmp3, cast(Tmp1.getOperand(2))->get()); } else { Tmp1 = DAG.getSelectCC(dl, Tmp1, DAG.getConstant(0, dl, Tmp1.getValueType()), Tmp2, Tmp3, ISD::SETNE); } Results.push_back(Tmp1); break; case ISD::BR_JT: { SDValue Chain = Node->getOperand(0); SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); const DataLayout &TD = DAG.getDataLayout(); EVT PTy = TLI.getPointerTy(TD); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); // For power-of-two jumptable entry sizes convert multiplication to a shift. // This transformation needs to be done here since otherwise the MIPS // backend will end up emitting a three instruction multiply sequence // instead of a single shift and MSP430 will call a runtime function. if (llvm::isPowerOf2_32(EntrySize)) Index = DAG.getNode( ISD::SHL, dl, Index.getValueType(), Index, DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType())); else Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad( ISD::SEXTLOAD, dl, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); Addr = LD; if (TLI.isJumpTableRelative()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, TLI.getPICJumpTableRelocBase(Table, DAG)); } Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG); Results.push_back(Tmp1); break; } case ISD::BRCOND: // Expand brcond's setcc into its constituent parts and create a BR_CC // Node. Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); if (Tmp2.getOpcode() == ISD::SETCC) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2), Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { // We test only the i1 bit. Skip the AND if UNDEF or another AND. if (Tmp2.isUndef() || (Tmp2.getOpcode() == ISD::AND && isa(Tmp2.getOperand(1)) && cast(Tmp2.getOperand(1))->getZExtValue() == 1)) Tmp3 = Tmp2; else Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, DAG.getCondCode(ISD::SETNE), Tmp3, DAG.getConstant(0, dl, Tmp3.getValueType()), Node->getOperand(2)); } Results.push_back(Tmp1); break; case ISD::SETCC: { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, NeedInvert, dl); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SETCC node. if (Tmp3.getNode()) Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3); // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); break; } // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; switch (TLI.getBooleanContents(Tmp1.getValueType())) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; break; case TargetLowering::ZeroOrNegativeOneBooleanContent: TrueValue = -1; break; } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, DAG.getConstant(TrueValue, dl, VT), DAG.getConstant(0, dl, VT), Tmp3); Results.push_back(Tmp1); break; } case ISD::SELECT_CC: { Tmp1 = Node->getOperand(0); // LHS Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False EVT VT = Node->getValueType(0); SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast(CC)->get(); if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) { // If the condition code is legal, then we need to expand this // node using SETCC and SELECT. EVT CmpVT = Tmp1.getValueType(); assert(!TLI.isOperationExpand(ISD::SELECT, VT) && "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " "expanded."); EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); break; } // SELECT_CC is legal, so the condition code must not be. bool Legalized = false; // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); } else { // If The inverse is not legal, then try to swap the arguments using // the inverse condition code. ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) { // The swapped inverse condition is legal, so swap true and false, // lhs and rhs. Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); } } if (!Legalized) { Legalized = LegalizeSetCCCondCode( getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, dl); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); // If we expanded the SETCC by inverting the condition code, then swap // the True/False operands to match. if (NeedInvert) std::swap(Tmp3, Tmp4); // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SELECT_CC node. if (CC.getNode()) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } else { Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); break; } case ISD::BR_CC: { Tmp1 = Node->getOperand(0); // Chain Tmp2 = Node->getOperand(2); // LHS Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC // node. if (Tmp4.getNode()) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } else { Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; } case ISD::BUILD_VECTOR: Results.push_back(ExpandBUILD_VECTOR(Node)); break; case ISD::SRA: case ISD::SRL: case ISD::SHL: { // Scalarize vector SRA/SRL/SHL. EVT VT = Node->getValueType(0); assert(VT.isVector() && "Unable to legalize non-vector shift"); assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); unsigned NumElem = VT.getVectorNumElements(); SmallVector Scalars; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue Sh = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars); ReplaceNode(SDValue(Node, 0), Result); break; } case ISD::ROTL: case ISD::ROTR: { bool IsLeft = Node->getOpcode() == ISD::ROTL; SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); EVT ResVT = Node->getValueType(0); EVT OpVT = Op0.getValueType(); assert(OpVT == ResVT && "The result and the operand types of rotate should match"); EVT ShVT = Op1.getValueType(); SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); // If a rotate in the other direction is legal, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; if (TLI.isOperationLegal(RevRot, ResVT)) { SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); break; } // Otherwise, // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) // assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && "Expecting the type bitwidth to be a power of 2"); unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, DAG.getConstant(1, dl, ShVT)); SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, DAG.getNode(ShOpc, dl, ResVT, Op0, And0), DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); Results.push_back(Or); break; } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: case ISD::ExternalSymbol: case ISD::ConstantPool: case ISD::JumpTable: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! break; } // Replace the original node with the legalized result. if (Results.empty()) { LLVM_DEBUG(dbgs() << "Cannot expand node\n"); return false; } LLVM_DEBUG(dbgs() << "Succesfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector Results; SDLoc dl(Node); // FIXME: Check flags on the node to see if we can use a finite call. bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Node->getOperand(0)) .setLibCallee( CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); break; } // By default, atomic intrinsics are marked Legal and lowered. Targets // which don't support them directly, however, may want libcalls, in which // case they mark them Expand, and we get here. case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_MIN: case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); std::pair Tmp = ExpandChainLibCall(LC, Node, false); Results.push_back(Tmp.first); Results.push_back(Tmp.second); break; } case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Node->getOperand(0)) .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol( "abort", TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); break; } case ISD::FMINNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128)); break; case ISD::FMAXNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128)); break; case ISD::FSQRT: case ISD::STRICT_FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: case ISD::STRICT_FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128)); break; case ISD::FCOS: case ISD::STRICT_FCOS: Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128)); break; case ISD::FSINCOS: // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: case ISD::STRICT_FLOG: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, RTLIB::LOG_FINITE_F64, RTLIB::LOG_FINITE_F80, RTLIB::LOG_FINITE_F128, RTLIB::LOG_FINITE_PPCF128)); else Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, RTLIB::LOG2_FINITE_F64, RTLIB::LOG2_FINITE_F80, RTLIB::LOG2_FINITE_F128, RTLIB::LOG2_FINITE_PPCF128)); else Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, RTLIB::LOG10_FINITE_F64, RTLIB::LOG10_FINITE_F80, RTLIB::LOG10_FINITE_F128, RTLIB::LOG10_FINITE_PPCF128)); else Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: case ISD::STRICT_FEXP: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, RTLIB::EXP_FINITE_F64, RTLIB::EXP_FINITE_F80, RTLIB::EXP_FINITE_F128, RTLIB::EXP_FINITE_PPCF128)); else Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, RTLIB::EXP2_FINITE_F64, RTLIB::EXP2_FINITE_F80, RTLIB::EXP2_FINITE_F128, RTLIB::EXP2_FINITE_PPCF128)); else Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128)); break; case ISD::FFLOOR: Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128)); break; case ISD::FCEIL: Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: case ISD::STRICT_FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: case ISD::STRICT_FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; case ISD::FROUND: Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128)); break; case ISD::FPOWI: case ISD::STRICT_FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128)); break; case ISD::FPOW: case ISD::STRICT_FPOW: if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, RTLIB::POW_FINITE_F64, RTLIB::POW_FINITE_F80, RTLIB::POW_FINITE_F128, RTLIB::POW_FINITE_PPCF128)); else Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128)); break; case ISD::FREM: Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128)); break; case ISD::FMA: case ISD::STRICT_FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; case ISD::FADD: Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128)); break; case ISD::FMUL: Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128)); break; case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); } break; case ISD::FP_TO_FP16: { RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); Results.push_back(ExpandLibCall(LC, Node, false)); break; } case ISD::FSUB: Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128)); break; case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128)); break; case ISD::UREM: Results.push_back(ExpandIntLibCall(Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128)); break; case ISD::SDIV: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, RTLIB::SDIV_I64, RTLIB::SDIV_I128)); break; case ISD::UDIV: Results.push_back(ExpandIntLibCall(Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, RTLIB::UDIV_I64, RTLIB::UDIV_I128)); break; case ISD::SDIVREM: case ISD::UDIVREM: // Expand into divrem libcall ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: Results.push_back(ExpandIntLibCall(Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, RTLIB::MUL_I64, RTLIB::MUL_I128)); break; } // Replace the original node with the legalized result. if (!Results.empty()) { LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n"); ReplaceNode(Node, Results.data()); } else LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n"); } // Determine the vector type to use in place of an original scalar element when // promoting equally sized vectors. static MVT getPromotedVectorElementType(const TargetLowering &TLI, MVT EltVT, MVT NewEltVT) { unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); assert(TLI.isTypeLegal(MidVT) && "unexpected"); return MidVT; } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to promote node\n"); SmallVector Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); if (Node->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(), OVT.getSizeInBits()); Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, DAG.getConstant(TopBit, dl, NVT)); } // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - OVT.getSizeInBits(), dl, NVT)); } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; case ISD::BITREVERSE: case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Tmp1 = DAG.getNode( ISD::SRL, dl, NVT, Tmp1, DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; } case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), Node->getOpcode() == ISD::FP_TO_SINT, dl); Results.push_back(Tmp1); break; case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; case ISD::VAARG: { SDValue Chain = Node->getOperand(0); // Get the chain. SDValue Ptr = Node->getOperand(1); // Get the pointer. unsigned TruncOp; if (OVT.isVector()) { TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "VAARG promotion is supported only for vectors or integer types"); TruncOp = ISD::TRUNCATE; } // Perform the larger operation, then convert back Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), Node->getConstantOperandVal(3)); Chain = Tmp1.getValue(1); Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); // Modified the chain result - switch anything that used the old chain to // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); if (UpdatedNodes) { UpdatedNodes->insert(Tmp2.getNode()); UpdatedNodes->insert(Chain.getNode()); } ReplacedNode(Node); break; } case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::UDIV: case ISD::UREM: case ISD::AND: case ISD::OR: case ISD::XOR: { unsigned ExtOp, TruncOp; if (OVT.isVector()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "Cannot promote logic operation"); switch (Node->getOpcode()) { default: ExtOp = ISD::ANY_EXTEND; break; case ISD::SDIV: case ISD::SREM: ExtOp = ISD::SIGN_EXTEND; break; case ISD::UDIV: case ISD::UREM: ExtOp = ISD::ZERO_EXTEND; break; } TruncOp = ISD::TRUNCATE; } // Promote each of the values to the new type. Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); // Perform the larger operation, then convert back Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); break; } case ISD::UMUL_LOHI: case ISD::SMUL_LOHI: { // Promote to a multiply in a wider integer type. unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2); auto &DL = DAG.getDataLayout(); unsigned OriginalSize = OVT.getScalarSizeInBits(); Tmp2 = DAG.getNode( ISD::SRL, dl, NVT, Tmp1, DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT))); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); break; } case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector() || Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; } else { ExtOp = ISD::FP_EXTEND; TruncOp = ISD::FP_ROUND; } Tmp1 = Node->getOperand(0); // Promote each of the values to the new type. Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, DAG.getIntPtrConstant(0, dl)); Results.push_back(Tmp1); break; } case ISD::VECTOR_SHUFFLE: { ArrayRef Mask = cast(Node)->getMask(); // Cast the two input vectors. Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); // Convert the shuffle mask to the right # elements. Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); Results.push_back(Tmp1); break; } case ISD::SETCC: { unsigned ExtOp = ISD::FP_EXTEND; if (NVT.isInteger()) { ISD::CondCode CCCode = cast(Node->getOperand(2))->get(); ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Node->getOperand(2))); break; } case ISD::BR_CC: { unsigned ExtOp = ISD::FP_EXTEND; if (NVT.isInteger()) { ISD::CondCode CCCode = cast(Node->getOperand(1))->get(); ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Node->getOperand(0), Node->getOperand(1), Tmp1, Tmp2, Node->getOperand(4))); break; } case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FPOW: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Node->getFlags()); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; case ISD::FMA: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); Results.push_back( DAG.getNode(ISD::FP_ROUND, dl, OVT, DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); // fcopysign doesn't change anything but the sign bit, so // (fp_round (fcopysign (fpext a), b)) // is as precise as // (fp_round (fpext a)) // which is a no-op. Mark it as a TRUNCating FP_ROUND. const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FTRUNC: case ISD::FNEG: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: case ISD::FABS: case ISD::FEXP: case ISD::FEXP2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, DAG.getIntPtrConstant(0, dl))); break; case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to a different vector type with the same total bit size // // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 // => // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for build_vector"); assert(NewEltVT.bitsLT(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); SmallVector NewOps; for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { SDValue Op = Node->getOperand(I); NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); } SDLoc SL(Node); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); Results.push_back(CvtVec); break; } case ISD::EXTRACT_VECTOR_ELT: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to a different vector type with the same total bit size. // // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 // => // v4i32:castx = bitcast x:v2i64 // // i64 = bitcast // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), // (i32 (extract_vector_elt castx, (2 * y + 1))) // assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for extract_vector_elt"); assert(NewEltVT.bitsLT(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); SDValue Idx = Node->getOperand(1); EVT IdxVT = Idx.getValueType(); SDLoc SL(Node); SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); SmallVector NewOps; for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, CastVec, TmpIdx); NewOps.push_back(Elt); } SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps); Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); break; } case ISD::INSERT_VECTOR_ELT: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to a different vector type with the same total bit size // // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 // => // v4i32:castx = bitcast x:v2i64 // v2i32:casty = bitcast y:i64 // // v2i64 = bitcast // (v4i32 insert_vector_elt // (v4i32 insert_vector_elt v4i32:castx, // (extract_vector_elt casty, 0), 2 * z), // (extract_vector_elt casty, 1), (2 * z + 1)) assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && "Invalid promote type for insert_vector_elt"); assert(NewEltVT.bitsLT(EltVT) && "not handled"); MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); SDValue Val = Node->getOperand(1); SDValue Idx = Node->getOperand(2); EVT IdxVT = Idx.getValueType(); SDLoc SL(Node); SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); SDValue NewVec = CastVec; for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, CastVal, IdxOffset); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, NewVec, Elt, InEltIdx); } Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); break; } case ISD::SCALAR_TO_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); // Handle bitcasts to different vector type with the same total bit size. // // e.g. v2i64 = scalar_to_vector x:i64 // => // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) // MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); SDValue Val = Node->getOperand(0); SDLoc SL(Node); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); SDValue Undef = DAG.getUNDEF(MidVT); SmallVector NewElts; NewElts.push_back(CastVal); for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) NewElts.push_back(Undef); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); Results.push_back(CvtVec); break; } } // Replace the original node with the legalized result. if (!Results.empty()) { LLVM_DEBUG(dbgs() << "Successfully promoted node\n"); ReplaceNode(Node, Results.data()); } else LLVM_DEBUG(dbgs() << "Could not promote node\n"); } /// This is the entry point for the file. void SelectionDAG::Legalize() { AssignTopologicalOrder(); SmallPtrSet LegalizedNodes; // Use a delete listener to remove nodes which were deleted during // legalization from LegalizeNodes. This is needed to handle the situation // where a new node is allocated by the object pool to the same address of a // previously deleted node. DAGNodeDeletedListener DeleteListener( *this, [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); }); SelectionDAGLegalize Legalizer(*this, LegalizedNodes); // Visit all the nodes. We start in topological order, so that we see // nodes with their original operands intact. Legalization can produce // new nodes which may themselves need to be legalized. Iterate until all // nodes have been legalized. while (true) { bool AnyLegalized = false; for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; SDNode *N = &*NI; if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); continue; } if (LegalizedNodes.insert(N).second) { AnyLegalized = true; Legalizer.LegalizeOp(N); if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); } } } if (!AnyLegalized) break; } // Remove dead nodes now. RemoveDeadNodes(); } bool SelectionDAG::LegalizeOp(SDNode *N, SmallSetVector &UpdatedNodes) { SmallPtrSet LegalizedNodes; SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); // Directly insert the node in question, and legalize it. This will recurse // as needed through operands. LegalizedNodes.insert(N); Legalizer.LegalizeOp(N); return LegalizedNodes.count(N); } Index: projects/clang700-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 337645) @@ -1,4194 +1,4211 @@ //===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file performs vector type splitting and scalarization for LegalizeTypes. // Scalarization is the act of changing a computation in an illegal one-element // vector type to be a computation in its scalar element type. For example, // implementing <1 x f32> arithmetic in a scalar f32 register. This is needed // as a base case when scalarizing vector arithmetic like <4 x f32>, which // eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 // types. // Splitting is the act of changing a computation in an invalid vector type to // be a computation in two vectors of half the size. For example, implementing // <128 x f32> operations in terms of two <64 x f32> operations. // //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "legalize-types" //===----------------------------------------------------------------------===// // Result Vector Scalarization: <1 x ty> -> ty. //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif report_fatal_error("Do not know how to scalarize the result of this " "operator!\n"); case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: R = ScalarizeVecRes_VecInregOp(N); break; case ISD::ANY_EXTEND: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: case ISD::FCANONICALIZE: R = ScalarizeVecRes_UnaryOp(N); break; case ISD::ADD: case ISD::AND: case ISD::FADD: case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINNAN: case ISD::FMAXNAN: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: case ISD::FPOW: case ISD::FREM: case ISD::FSUB: case ISD::MUL: case ISD::OR: case ISD::SDIV: case ISD::SREM: case ISD::SUB: case ISD::UDIV: case ISD::UREM: case ISD::XOR: case ISD::SHL: case ISD::SRA: case ISD::SRL: R = ScalarizeVecRes_BinOp(N); break; case ISD::FMA: R = ScalarizeVecRes_TernaryOp(N); break; } // If R is null, the sub-method took care of registering the result. if (R.getNode()) SetScalarizedVector(SDValue(N, ResNo), R); } SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, Op2); } SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); return GetScalarizedVector(Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { SDValue Op = N->getOperand(0); if (Op.getValueType().isVector() && Op.getValueType().getVectorNumElements() == 1 && !isSimpleLegalType(Op.getValueType())) Op = GetScalarizedVector(Op); EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BITCAST, SDLoc(N), NewVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); // The BUILD_VECTOR operands may be of wider element types and // we may need to truncate them back to the requested return type. if (EltVT.isInteger()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0).getVectorElementType(), N->getOperand(0), N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FP_ROUND, SDLoc(N), NewVT, Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { // The value to insert may have a wider type than the vector element type, // so be sure to truncate it to the element type if necessary. SDValue Op = N->getOperand(1); EVT EltVT = N->getValueType(0).getVectorElementType(); if (Op.getValueType() != EltVT) // FIXME: Can this happen for floating point types? Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); return Op; } SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); SDValue Result = DAG.getLoad( ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->getOriginalAlignment(), N->getMemOperand()->getFlags(), N->getAAInfo()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); return Result; } SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); SDValue Op = N->getOperand(0); EVT OpVT = Op.getValueType(); SDLoc DL(N); // The result needs scalarizing, but it's not a given that the source does. // This is a workaround for targets where it's impossible to scalarize the // result of a conversion, because the source type is legal. // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is // legal and was not scalarized. // See the similar logic in ScalarizeVecRes_SETCC if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Op = GetScalarizedVector(Op); } else { EVT VT = OpVT.getVectorElementType(); Op = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); SDValue LHS = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, LHS, DAG.getValueType(ExtVT)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { SDLoc DL(N); SDValue Op = N->getOperand(0); EVT OpVT = Op.getValueType(); EVT OpEltVT = OpVT.getVectorElementType(); EVT EltVT = N->getValueType(0).getVectorElementType(); if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Op = GetScalarizedVector(Op); } else { Op = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } switch (N->getOpcode()) { case ISD::ANY_EXTEND_VECTOR_INREG: return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op); case ISD::SIGN_EXTEND_VECTOR_INREG: return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op); case ISD::ZERO_EXTEND_VECTOR_INREG: return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op); } llvm_unreachable("Illegal extend_vector_inreg opcode"); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); if (InOp.getValueType() != EltVT) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDValue Cond = N->getOperand(0); EVT OpVT = Cond.getValueType(); SDLoc DL(N); // The vselect result and true/value operands needs scalarizing, but it's // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. // See the similar logic in ScalarizeVecRes_SETCC if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Cond = GetScalarizedVector(Cond); } else { EVT VT = OpVT.getVectorElementType(); Cond = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } SDValue LHS = GetScalarizedVector(N->getOperand(1)); TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false, false); TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); // If integer and float booleans have different contents then we can't // reliably optimize in all cases. There is a full explanation for this in // DAGCombiner::visitSELECT() where the same issue affects folding // (select C, 0, 1) to (xor C, 1). if (TLI.getBooleanContents(false, false) != TLI.getBooleanContents(false, true)) { // At least try the common case where the boolean is generated by a // comparison. if (Cond->getOpcode() == ISD::SETCC) { EVT OpVT = Cond->getOperand(0).getValueType(); ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); VecBool = TLI.getBooleanContents(OpVT); } else ScalarBool = TargetLowering::UndefinedBooleanContent; } EVT CondVT = Cond.getValueType(); if (ScalarBool != VecBool) { switch (ScalarBool) { case TargetLowering::UndefinedBooleanContent: break; case TargetLowering::ZeroOrOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); // Vector read from all ones, scalar expects a single 1 so mask. Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, Cond, DAG.getConstant(1, SDLoc(N), CondVT)); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrOneBooleanContent); // Vector reads from a one, scalar from all ones so sign extend. Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, Cond, DAG.getValueType(MVT::i1)); break; } } // Truncate the condition if needed auto BoolVT = getSetCCResultType(CondVT); if (BoolVT.bitsLT(CondVT)) Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond); return DAG.getSelect(SDLoc(N), LHS.getValueType(), Cond, LHS, GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(2)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, GetScalarizedVector(N->getOperand(3)), N->getOperand(4)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { // Figure out if the scalar is the LHS or RHS and return it. SDValue Arg = N->getOperand(2).getOperand(0); if (Arg.isUndef()) return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); unsigned Op = !cast(Arg)->isNullValue(); return GetScalarizedVector(N->getOperand(Op)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); EVT OpVT = LHS.getValueType(); EVT NVT = N->getValueType(0).getVectorElementType(); SDLoc DL(N); // The result needs scalarizing, but it's not a given that the source does. if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { LHS = GetScalarizedVector(LHS); RHS = GetScalarizedVector(RHS); } else { EVT VT = OpVT.getVectorElementType(); LHS = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); RHS = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); return DAG.getNode(ExtendCode, DL, NVT, Res); } //===----------------------------------------------------------------------===// // Operand Vector Scalarization <1 x ty> -> ty. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif report_fatal_error("Do not know how to scalarize this operator's " "operand!\n"); case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::TRUNCATE: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::VSELECT: Res = ScalarizeVecOp_VSELECT(N); break; case ISD::SETCC: Res = ScalarizeVecOp_VSETCC(N); break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; case ISD::FP_ROUND: Res = ScalarizeVecOp_FP_ROUND(N, OpNo); break; } } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer // core about this. if (Res.getNode() == N) return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } /// If the value to convert is a vector that needs to be scalarized, it must be /// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Elt); } /// If the input is a vector that needs to be scalarized, it must be <1 x ty>. /// Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexpected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } /// If the input is a vector that needs to be scalarized, it must be <1 x ty>, /// so just return the element, ignoring the index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { EVT VT = N->getValueType(0); SDValue Res = GetScalarizedVector(N->getOperand(0)); if (Res.getValueType() != VT) Res = VT.isFloatingPoint() ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res) : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); return Res; } /// If the input condition is a vector that needs to be scalarized, it must be /// <1 x i1>, so just convert to a normal ISD::SELECT /// (still with vector output type since that was acceptable if we got here). SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); EVT VT = N->getValueType(0); return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), N->getOperand(2)); } /// If the operand is a vector that needs to be scalarized then the /// result must be v1i1, so just convert to a scalar SETCC and wrap /// with a scalar_to_vector since the res type is legal if we got here SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); EVT VT = N->getValueType(0); SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); EVT OpVT = N->getOperand(0).getValueType(); EVT NVT = VT.getVectorElementType(); SDLoc DL(N); // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); Res = DAG.getNode(ExtendCode, DL, NVT, Res); return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); } /// If the value to store is a vector that needs to be scalarized, it must be /// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(N->isUnindexed() && "Indexed store of one-element vector?"); assert(OpNo == 1 && "Do not know how to scalarize this operand!"); SDLoc dl(N); if (N->isTruncatingStore()) return DAG.getTruncStore( N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->getAlignment(), N->getMemOperand()->getFlags(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->getOriginalAlignment(), N->getMemOperand()->getFlags(), N->getAAInfo()); } /// If the value to round is a vector that needs to be scalarized, it must be /// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), N->getValueType(0).getVectorElementType(), Elt, N->getOperand(1)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); } //===----------------------------------------------------------------------===// // Result Vector Splitting //===----------------------------------------------------------------------===// /// This method is called when the specified result of the specified node is /// found to need vector splitting. At this point, the node may also have /// invalid operands or may have other results that need legalization, we just /// know that (at least) one result needs vector splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(ResNo), true)) return; switch (N->getOpcode()) { default: #ifndef NDEBUG dbgs() << "SplitVectorResult #" << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif report_fatal_error("Do not know how to split the result of this " "operator!\n"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::VSELECT: case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; case ISD::MLOAD: SplitVecRes_MLOAD(cast(N), Lo, Hi); break; case ISD::MGATHER: SplitVecRes_MGATHER(cast(N), Lo, Hi); break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; case ISD::VECTOR_SHUFFLE: SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: SplitVecRes_ExtendOp(N, Lo, Hi); break; case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINNAN: case ISD::FMAXNAN: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: case ISD::FPOW: case ISD::AND: case ISD::OR: case ISD::XOR: case ISD::SHL: case ISD::SRA: case ISD::SRL: case ISD::UREM: case ISD::SREM: case ISD::FREM: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: SplitVecRes_TernaryOp(N, Lo, Hi); break; case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: case ISD::STRICT_FPOWI: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FEXP: case ISD::STRICT_FEXP2: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: SplitVecRes_StrictFPOp(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. if (Lo.getNode()) SetSplitVector(SDValue(N, ResNo), Lo, Hi); } void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Op0Lo, Op0Hi; GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); SDValue Op1Lo, Op1Hi; GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); SDValue Op2Lo, Op2Hi; GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo); Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi); } void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypePromoteFloat: case TargetLowering::TypeSoftenFloat: case TargetLowering::TypeScalarizeVector: case TargetLowering::TypeWidenVector: break; case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: // A scalar to vector conversion, where the scalar needs expansion. // If the vector is being split in two then we can just convert the // expanded pieces. if (LoVT == HiVT) { GetExpandedOp(InOp, Lo, Hi); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; } break; case TargetLowering::TypeSplitVector: // If the input is a vector that needs to be split, convert each split // piece of the input now. GetSplitVector(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; } // In the general case, convert the input to an integer and split it by hand. EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); if (DAG.getDataLayout().isBigEndian()) std::swap(LoIntVT, HiIntVT); SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getBuildVector(LoVT, dl, LoOps); SmallVector HiOps(N->op_begin()+LoNumElts, N->op_end()); Hi = DAG.getBuildVector(HiVT, dl, HiOps); } void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); SDLoc dl(N); unsigned NumSubvectors = N->getNumOperands() / 2; if (NumSubvectors == 1) { Lo = N->getOperand(0); Hi = N->getOperand(1); return; } EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); SmallVector HiOps(N->op_begin()+NumSubvectors, N->op_end()); Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue SubVec = N->getOperand(1); SDValue Idx = N->getOperand(2); SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); EVT VecVT = Vec.getValueType(); unsigned VecElems = VecVT.getVectorNumElements(); unsigned SubElems = SubVec.getValueType().getVectorNumElements(); // If we know the index is 0, and we know the subvector doesn't cross the // boundary between the halves, we can avoid spilling the vector, and insert // into the lower half of the split vector directly. // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever // the index is constant and there is no boundary crossing. But those cases // don't seem to get hit in practice. if (ConstantSDNode *ConstIdx = dyn_cast(Idx)) { unsigned IdxVal = ConstIdx->getZExtValue(); if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); return; } } // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new subvector into the specified index. SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); // Load the Lo part from the stack slot. Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); } void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDLoc DL(N); SDValue RHSLo, RHSHi; SDValue RHS = N->getOperand(1); EVT RHSVT = RHS.getValueType(); if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) GetSplitVector(RHS, RHSLo, RHSHi); else std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); } void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDLoc dl(N); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(cast(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, DAG.getValueType(LoVT)); Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, DAG.getValueType(HiVT)); } void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDLoc dl(N); SDValue InLo, InHi; if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(N0, InLo, InHi); else std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0); EVT InLoVT = InLo.getValueType(); unsigned InNumElements = InLoVT.getVectorNumElements(); EVT OutLoVT, OutHiVT; std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned OutNumElements = OutLoVT.getVectorNumElements(); assert((2 * OutNumElements) <= InNumElements && "Illegal extend vector in reg split"); // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the // input vector (i.e. we only use InLo): // OutLo will extend the first OutNumElements from InLo. // OutHi will extend the next OutNumElements from InLo. // Shuffle the elements from InLo for OutHi into the bottom elements to // create a 'fake' InHi. SmallVector SplitHi(InNumElements, -1); for (unsigned i = 0; i != OutNumElements; ++i) SplitHi[i] = i + OutNumElements; InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi); Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo); Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); } void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NumOps = N->getNumOperands(); SDValue Chain = N->getOperand(0); EVT LoVT, HiVT; SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector OpsLo; SmallVector OpsHi; // The Chain is the first operand. OpsLo.push_back(Chain); OpsHi.push_back(Chain); // Now process the remaining operands. for (unsigned i = 1; i < NumOps; ++i) { SDValue Op = N->getOperand(i); SDValue OpLo = Op; SDValue OpHi = Op; EVT InVT = Op.getValueType(); if (InVT.isVector()) { // If the input also splits, handle it directly for a // compile time speedup. Otherwise split it by hand. if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(Op, OpLo, OpHi); else std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); } OpsLo.push_back(OpLo); OpsHi.push_back(OpHi); } EVT LoValueVTs[] = {LoVT, MVT::Other}; EVT HiValueVTs[] = {HiVT, MVT::Other}; Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); // Build a factor node to remember that this Op is independent of the // other one. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Elt = N->getOperand(1); SDValue Idx = N->getOperand(2); SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); if (ConstantSDNode *CIdx = dyn_cast(Idx)) { unsigned IdxVal = CIdx->getZExtValue(); unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); if (IdxVal < LoNumElts) Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, DAG.getConstant(IdxVal - LoNumElts, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); return; } // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(0), true)) return; // Make the vector elements byte-addressable if they aren't already. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, VecVT.getVectorNumElements()); Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); // Extend the element type to match if needed. if (EltVT.bitsGT(Elt.getValueType())) Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt); } // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); auto &MF = DAG.getMachineFunction(); auto FrameIndex = cast(StackPtr.getNode())->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); // Load the Lo part from the stack slot. Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other part. unsigned IncrementSize = LoVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, PtrInfo.getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize)); // If we adjusted the original type, we need to truncate the results. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); if (LoVT != Lo.getValueType()) Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo); if (HiVT != Hi.getValueType()) Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); SDValue Ptr = LD->getBasePtr(); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, Alignment, MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(LD, 1), Ch); } void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(MLD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); SDValue Mask = MLD->getMask(); SDValue Src0 = MLD->getSrc0(); unsigned Alignment = MLD->getOriginalAlignment(); ISD::LoadExtType ExtType = MLD->getExtensionType(); // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? Alignment/2 : Alignment; // Split Mask operand SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Mask, MaskLo, MaskHi); else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Src0, Src0Lo, Src0Hi); else std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, ExtType, MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); unsigned HiOffset = LoMemVT.getStoreSize(); MMO = DAG.getMachineFunction().getMachineMemOperand( MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ExtType, MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MLD, 1), Ch); } void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(MGT); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); unsigned Alignment = MGT->getOriginalAlignment(); // Split Mask operand SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Mask, MaskLo, MaskHi); else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; // Split MemoryVT std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Src0, Src0Lo, Src0Hi); else std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Index, IndexLo, IndexHi); else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); } void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); EVT LoVT, HiVT; SDLoc DL(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly. Otherwise split it by hand. SDValue LL, LH, RL, RH; if (getTypeAction(N->getOperand(0).getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), LL, LH); else std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); if (getTypeAction(N->getOperand(1).getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(1), RL, RH); else std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); } void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); else std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); } } void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT SrcVT = N->getOperand(0).getValueType(); EVT DestVT = N->getValueType(0); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); // We can do better than a generic split operation if the extend is doing // more than just doubling the width of the elements and the following are // true: // - The number of vector elements is even, // - the source type is legal, // - the type of a split source is illegal, // - the type of an extended (by doubling element size) source is legal, and // - the type of that extended source when split is legal. // // This won't necessarily completely legalize the operation, but it will // more effectively move in the right direction and prevent falling down // to scalarization in many cases due to the input vector being split too // far. unsigned NumElements = SrcVT.getVectorNumElements(); if ((NumElements & 1) == 0 && SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { LLVMContext &Ctx = *DAG.getContext(); EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx); EVT SplitLoVT, SplitHiVT; std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; N->dump(&DAG); dbgs() << "\n"); // Extend the source vector by one step. SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); // Get the low and high halves of the new, extended one step, vector. std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); // Extend those vector halves the rest of the way. Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); return; } } // Fall back to the generic unary operator splitting otherwise. SplitVecRes_UnaryOp(N, Lo, Hi); } void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; SDLoc dl(N); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); EVT NewVT = Inputs[0].getValueType(); unsigned NewElts = NewVT.getVectorNumElements(); // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. SmallVector Ops; for (unsigned High = 0; High < 2; ++High) { SDValue &Output = High ? Hi : Lo; // Build a shuffle mask for the output, discovering on the fly which // input vectors to use as shuffle operands (recorded in InputUsed). // If building a suitable shuffle vector proves too hard, then bail // out with useBuildVector set. unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. unsigned FirstMaskIdx = High * NewElts; bool useBuildVector = false; for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { // The mask element. This indexes into the input. int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); // The input vector this mask element indexes into. unsigned Input = (unsigned)Idx / NewElts; if (Input >= array_lengthof(Inputs)) { // The mask element does not index into any input vector. Ops.push_back(-1); continue; } // Turn the index into an offset from the start of the input vector. Idx -= Input * NewElts; // Find or create a shuffle vector operand to hold this input. unsigned OpNo; for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { if (InputUsed[OpNo] == Input) { // This input vector is already an operand. break; } else if (InputUsed[OpNo] == -1U) { // Create a new operand for this input vector. InputUsed[OpNo] = Input; break; } } if (OpNo >= array_lengthof(InputUsed)) { // More than two input vectors used! Give up on trying to create a // shuffle vector. Insert all elements into a BUILD_VECTOR instead. useBuildVector = true; break; } // Add the mask index for the new shuffle vector. Ops.push_back(Idx + OpNo * NewElts); } if (useBuildVector) { EVT EltVT = NewVT.getVectorElementType(); SmallVector SVOps; // Extract the input elements by hand. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { // The mask element. This indexes into the input. int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); // The input vector this mask element indexes into. unsigned Input = (unsigned)Idx / NewElts; if (Input >= array_lengthof(Inputs)) { // The mask element is "undef" or indexes off the end of the input. SVOps.push_back(DAG.getUNDEF(EltVT)); continue; } // Turn the index into an offset from the start of the input vector. Idx -= Input * NewElts; // Extract the vector element by hand. SVOps.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input], DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); } // Construct the Lo/Hi output using a BUILD_VECTOR. Output = DAG.getBuildVector(NewVT, dl, SVOps); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); } else { SDValue Op0 = Inputs[InputUsed[0]]; // If only one input was used, use an undefined vector for the other. SDValue Op1 = InputUsed[1] == -1U ? DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; // At least one input vector was used. Create a new shuffle vector. Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops); } Ops.clear(); } } //===----------------------------------------------------------------------===// // Operand Vector Splitting //===----------------------------------------------------------------------===// /// This method is called when the specified operand of the specified node is /// found to need vector splitting. At this point, all of the result types of /// the node are known to be legal, but other operands of the node may need /// legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom split this node. if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) return false; if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif report_fatal_error("Do not know how to split this operator's " "operand!\n"); case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast(N), OpNo); break; case ISD::MSCATTER: Res = SplitVecOp_MSCATTER(cast(N), OpNo); break; case ISD::MGATHER: Res = SplitVecOp_MGATHER(cast(N), OpNo); break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) Res = SplitVecOp_TruncateHelper(N); else Res = SplitVecOp_UnaryOp(N); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) Res = SplitVecOp_TruncateHelper(N); else Res = SplitVecOp_UnaryOp(N); break; case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::FTRUNC: case ISD::FCANONICALIZE: Res = SplitVecOp_UnaryOp(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: Res = SplitVecOp_ExtVecInRegOp(N); break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: Res = SplitVecOp_VECREDUCE(N, OpNo); break; } } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer // core about this. if (Res.getNode() == N) return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { // The only possibility for an illegal operand is the mask, since result type // legalization would have handled this node already otherwise. assert(OpNo == 0 && "Illegal operand must be mask"); SDValue Mask = N->getOperand(0); SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); EVT Src0VT = Src0.getValueType(); SDLoc DL(N); assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); assert(Lo.getValueType() == Hi.getValueType() && "Lo and Hi have differing types"); EVT LoOpVT, HiOpVT; std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); SDValue HiSelect = DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); } SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc dl(N); SDValue VecOp = N->getOperand(OpNo); EVT VecVT = VecOp.getValueType(); assert(VecVT.isVector() && "Can only split reduce vector operand"); GetSplitVector(VecOp, Lo, Hi); EVT LoOpVT, HiOpVT; std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); bool NoNaN = N->getFlags().hasNoNaNs(); unsigned CombineOpc = 0; switch (N->getOpcode()) { case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break; case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break; case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break; case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break; case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break; case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break; case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break; case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break; case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break; case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; case ISD::VECREDUCE_FMAX: CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN; break; case ISD::VECREDUCE_FMIN: CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN; break; default: llvm_unreachable("Unexpected reduce ISD node"); } // Use the appropriate scalar instruction on the split subvectors before // reducing the now partially reduced smaller vector. SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags()); } SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will // end up being split all the way down to individual components. Convert the // split pieces into integers and reassemble. SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); Lo = BitConvertToInteger(Lo); Hi = BitConvertToInteger(Hi); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), JoinIntegers(Lo, Hi)); } SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); uint64_t LoElts = Lo.getValueType().getVectorNumElements(); uint64_t IdxVal = cast(Idx)->getZExtValue(); if (IdxVal < LoElts) { assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && "Extracted subvector crosses vector split!"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); } else { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, DAG.getConstant(IdxVal - LoElts, dl, Idx.getValueType())); } } SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); EVT VecVT = Vec.getValueType(); if (isa(Idx)) { uint64_t IdxVal = cast(Idx)->getZExtValue(); assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); SDValue Lo, Hi; GetSplitVector(Vec, Lo, Hi); uint64_t LoElts = Lo.getValueType().getVectorNumElements(); if (IdxVal < LoElts) return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, SDLoc(N), Idx.getValueType())), 0); } // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(0), true)) return SDValue(); // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); EVT EltVT = VecVT.getVectorElementType(); if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, VecVT.getVectorNumElements()); Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); } // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); auto &MF = DAG.getMachineFunction(); auto FrameIndex = cast(StackPtr.getNode())->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { SDValue Lo, Hi; // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so // splitting the result has the same effect as splitting the input operand. SplitVecRes_ExtVecInRegOp(N, Lo, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo) { EVT LoVT, HiVT; SDLoc dl(MGT); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) // Split Mask operand GetSplitVector(Mask, MaskLo, MaskHi); else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Src0, Src0Lo, Src0Hi); else std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Index, IndexLo, IndexHi); else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo, Hi); ReplaceValueWith(SDValue(MGT, 0), Res); return SDValue(); } SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) // Split Data operand GetSplitVector(Data, DataLo, DataHi); else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) // Split Mask operand GetSplitVector(Mask, MaskLo, MaskHi); else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = (Alignment == Data->getValueType(0).getSizeInBits()/8) ? Alignment/2 : Alignment; SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, N->isTruncatingStore(), N->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); unsigned HiOffset = LoMemVT.getStoreSize(); MMO = DAG.getMachineFunction().getMachineMemOperand( N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, N->isTruncatingStore(), N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); SDValue Mask = N->getMask(); SDValue Index = N->getIndex(); SDValue Scale = N->getScale(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); // Split all operands EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) // Split Data operand GetSplitVector(Data, DataLo, DataHi); else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) // Split Mask operand GetSplitVector(Mask, MaskLo, MaskHi); else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Index, IndexLo, IndexHi); else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); SDValue Lo; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); SDLoc DL(N); bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); // Scalarize if the split halves are not byte-sized. if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) return TLI.scalarizeVectorStore(N, DAG); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, Alignment, MMOFlags, AAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { SDLoc DL(N); // The input operands all must have the same type, and we know the result // type is valid. Convert this to a buildvector which extracts all the // input elements. // TODO: If the input elements are power-two vectors, we could convert this to // a new CONCAT_VECTORS node with elements that are half-wide. SmallVector Elts; EVT EltVT = N->getValueType(0).getVectorElementType(); for (const SDValue &Op : N->op_values()) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); } } return DAG.getBuildVector(N->getValueType(0), DL, Elts); } SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // The result type is legal, but the input type is illegal. If splitting // ends up with the result type of each half still being legal, just // do that. If, however, that would result in an illegal result type, // we can try to get more clever with power-two vectors. Specifically, // split the input type, but also widen the result element size, then // concatenate the halves and truncate again. For example, consider a target // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: // %inlo = v4i32 extract_subvector %in, 0 // %inhi = v4i32 extract_subvector %in, 4 // %lo16 = v4i16 trunc v4i32 %inlo // %hi16 = v4i16 trunc v4i32 %inhi // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 // %res = v8i8 trunc v8i16 %in16 // // Without this transform, the original truncate would end up being // scalarized, which is pretty much always a last resort. SDValue InVec = N->getOperand(0); EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); bool IsFloat = OutVT.isFloatingPoint(); // Widening should have already made sure this is a power-two vector // if we're trying to split it at all. assert() that's true, just in case. assert(!(NumElements & 1) && "Splitting vector, but not in half!"); unsigned InElementSize = InVT.getScalarSizeInBits(); unsigned OutElementSize = OutVT.getScalarSizeInBits(); // If the input elements are only 1/2 the width of the result elements, // just use the normal splitting. Our trick only work if there's room // to split more than once. if (InElementSize <= OutElementSize * 2) return SplitVecOp_UnaryOp(N); SDLoc DL(N); // Get the split input vector. SDValue InLoVec, InHiVec; GetSplitVector(InVec, InLoVec, InHiVec); // Truncate them to 1/2 the element size. EVT HalfElementVT = IsFloat ? EVT::getFloatingPointVT(InElementSize/2) : EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, HalfHi); // Now finish up by truncating all the way down to the original result // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. return IsFloat ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, DAG.getTargetConstant( 0, DL, TLI.getPointerTy(DAG.getDataLayout()))) : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); // The result has a legal vector type, but the input needs splitting. SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo0, Hi0); GetSplitVector(N->getOperand(1), Lo1, Hi1); unsigned PartElements = Lo0.getValueType().getVectorNumElements(); EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); return PromoteTargetBoolean(Con, N->getValueType(0)); } SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { // The result (and the first input) has a legal vector type, but the second // input needs splitting. return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); } //===----------------------------------------------------------------------===// // Result Vector Widening //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); // See if the target wants to custom widen this node. if (CustomWidenLowerNode(N, N->getValueType(ResNo))) return; SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG dbgs() << "WidenVectorResult #" << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to widen the result of this operator!"); case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::VSELECT: case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast(N)); break; case ISD::MGATHER: Res = WidenVecRes_MGATHER(cast(N)); break; case ISD::ADD: case ISD::AND: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: case ISD::OR: case ISD::SUB: case ISD::XOR: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINNAN: case ISD::FMAXNAN: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: Res = WidenVecRes_Binary(N); break; case ISD::FADD: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: case ISD::UREM: Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FCOPYSIGN: Res = WidenVecRes_FCOPYSIGN(N); break; case ISD::FPOWI: Res = WidenVecRes_POWI(N); break; case ISD::SHL: case ISD::SRA: case ISD::SRL: Res = WidenVecRes_Shift(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: Res = WidenVecRes_EXTEND_VECTOR_INREG(N); break; case ISD::ANY_EXTEND: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: Res = WidenVecRes_Convert(N); break; case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; case ISD::FMA: Res = WidenVecRes_Ternary(N); break; } // If Res is null, the sub-method took care of registering the result. if (Res.getNode()) SetWidenedVector(SDValue(N, ResNo), Res); } SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { // Ternary op widening. SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); SDValue InOp3 = GetWidenedVector(N->getOperand(2)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); } SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); const SDNodeFlags Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { // Operation doesn't trap so just widen as normal. SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } // No legal vector version so unroll the vector operation and then widen. if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); // Since the operation can trap, apply operation on the original vector. EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); SmallVector ConcatOps(CurNumElts); unsigned ConcatEnd = 0; // Current ConcatOps index. int Idx = 0; // Current Idx into input vectors. // NumElts := greatest legal vector size (at most WidenVT) // while (orig. vector has unhandled elements) { // take munches of size NumElts from the beginning and add to ConcatOps // NumElts := next smaller supported vector size or 1 // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue EOp2 = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; } do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } while (!TLI.isTypeLegal(VT) && NumElts != 1); if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue EOp2 = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2, Flags); } CurNumElts = 0; } } // Check to see if we have a single operation with the widen type. if (ConcatEnd == 1) { VT = ConcatOps[0].getValueType(); if (VT == WidenVT) return ConcatOps[0]; } // while (Some element of ConcatOps is not of type MaxVT) { // From the end of ConcatOps, collect elements of the same type and put // them into an op of the next larger supported type // } while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { Idx = ConcatEnd - 1; VT = ConcatOps[Idx--].getValueType(); while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) Idx--; int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; EVT NextVT; do { NextSize *= 2; NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); } while (!TLI.isTypeLegal(NextVT)); if (!VT.isVector()) { // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT SDValue VecOp = DAG.getUNDEF(NextVT); unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode( ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; } else { // Vector type, create a CONCAT_VECTORS of type NextVT SDValue undefVec = DAG.getUNDEF(VT); unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); SmallVector SubConcatOps(OpsToConcat); unsigned RealVals = ConcatEnd - Idx - 1; unsigned SubConcatEnd = 0; unsigned SubConcatIdx = Idx + 1; while (SubConcatEnd < RealVals) SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; while (SubConcatEnd < OpsToConcat) SubConcatOps[SubConcatEnd++] = undefVec; ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NextVT, SubConcatOps); ConcatEnd = SubConcatIdx + 1; } } // Check to see if we have a single operation with the widen type. if (ConcatEnd == 1) { VT = ConcatOps[0].getValueType(); if (VT == WidenVT) return ConcatOps[0]; } // add undefs of size MaxVT until ConcatOps grows to length of WidenVT unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { SDValue UndefVal = DAG.getUNDEF(MaxVT); for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, makeArrayRef(ConcatOps.data(), NumOps)); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); EVT InEltVT = InVT.getVectorElementType(); EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); const SDNodeFlags Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); if (InVTNumElts == WidenNumElts) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { // If both input and result vector types are of same width, extend // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which // accepts fewer elements in the result than in the input. if (Opcode == ISD::SIGN_EXTEND) return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); if (Opcode == ISD::ZERO_EXTEND) return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); } } if (TLI.isTypeLegal(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input // and then widening it. To avoid this, we widen the input only if // it results in a legal type. if (WidenNumElts % InVTNumElts == 0) { // Widen the input and call convert on the widened input vector. unsigned NumConcat = WidenNumElts/InVTNumElts; SmallVector Ops(NumConcat); Ops[0] = InOp; SDValue UndefVal = DAG.getUNDEF(InVT); for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } if (InVTNumElts % WidenNumElts == 0) { SDValue InVal = DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); } } // Otherwise unroll into some nasty scalar code and rebuild the vector. SmallVector Ops(WidenNumElts); EVT EltVT = WidenVT.getVectorElementType(); unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; return DAG.getBuildVector(WidenVT, DL, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue InOp = N->getOperand(0); SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenSVT = WidenVT.getVectorElementType(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); EVT InSVT = InVT.getVectorElementType(); unsigned InVTNumElts = InVT.getVectorNumElements(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(InOp); InVT = InOp.getValueType(); if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) { switch (Opcode) { case ISD::ANY_EXTEND_VECTOR_INREG: return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT); case ISD::SIGN_EXTEND_VECTOR_INREG: return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); case ISD::ZERO_EXTEND_VECTOR_INREG: return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); } } } // Unroll, extend the scalars and rebuild the vector. SmallVector Ops; for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); switch (Opcode) { case ISD::ANY_EXTEND_VECTOR_INREG: Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); break; case ISD::SIGN_EXTEND_VECTOR_INREG: Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val); break; case ISD::ZERO_EXTEND_VECTOR_INREG: Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val); break; default: llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected"); } Ops.push_back(Val); } while (Ops.size() != WidenNumElts) Ops.push_back(DAG.getUNDEF(WidenSVT)); return DAG.getBuildVector(WidenVT, DL, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { // If this is an FCOPYSIGN with same input types, we can treat it as a // normal (can trap) binary op. if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) return WidenVecRes_BinaryCanTrap(N); // If the types are different, fall back to unrolling. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); EVT ShVT = ShOp.getValueType(); if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(), WidenVT.getVectorNumElements()); if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), cast(N->getOperand(1))->getVT() .getVectorElementType(), WidenVT.getVectorNumElements()); SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); return GetWidenedVector(WidenVec); } SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: // If the incoming type is a vector that is being promoted, then // we know that the elements are arranged differently and that we // must perform the conversion using a stack slot. if (InVT.isVector()) break; // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); InVT = InOp.getValueType(); if (WidenVT.bitsEq(InVT)) return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; case TargetLowering::TypeSoftenFloat: case TargetLowering::TypePromoteFloat: case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: case TargetLowering::TypeScalarizeVector: case TargetLowering::TypeSplitVector: break; case TargetLowering::TypeWidenVector: // If the InOp is widened to the same size, convert it. Otherwise, fall // out of the switch and widen the widened input. InOp = GetWidenedVector(InOp); InVT = InOp.getValueType(); if (WidenVT.bitsEq(InVT)) // The input widens to the same size. Convert to the widen value. return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; } unsigned WidenSize = WidenVT.getSizeInBits(); unsigned InSize = InVT.getSizeInBits(); // x86mmx is not an acceptable vector element type, so don't try. if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. EVT NewInVT; unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } if (TLI.isTypeLegal(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input // and then widening it. To avoid this, we widen the input only if // it results in a legal type. SmallVector Ops(NewNumElts); SDValue UndefVal = DAG.getUNDEF(InVT); Ops[0] = InOp; for (unsigned i = 1; i < NewNumElts; ++i) Ops[i] = UndefVal; SDValue NewVec; if (InVT.isVector()) NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else NewVec = DAG.getBuildVector(NewInVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } return CreateStackStoreLoad(InOp, WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDLoc dl(N); // Build a vector with undefined for the new nodes. EVT VT = N->getValueType(0); // Integer BUILD_VECTOR operands may be larger than the node's vector element // type. The UNDEFs need to have the same type as the existing operands. EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector NewOps(N->op_begin(), N->op_end()); assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); return DAG.getBuildVector(WidenVT, dl, NewOps); } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { EVT InVT = N->getOperand(0).getValueType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); bool InputWidened = false; // Indicates we need to widen the input. if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { // Add undef vectors to widen to correct length. unsigned NumConcat = WidenVT.getVectorNumElements() / InVT.getVectorNumElements(); SDValue UndefVal = DAG.getUNDEF(InVT); SmallVector Ops(NumConcat); for (unsigned i=0; i < NumOperands; ++i) Ops[i] = N->getOperand(i); for (unsigned i = NumOperands; i != NumConcat; ++i) Ops[i] = UndefVal; return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); } } else { InputWidened = true; if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { // The inputs and the result are widen to the same value. unsigned i; for (i=1; i < NumOperands; ++i) if (!N->getOperand(i).isUndef()) break; if (i == NumOperands) // Everything but the first operand is an UNDEF so just return the // widened first operand. return GetWidenedVector(N->getOperand(0)); if (NumOperands == 2) { // Replace concat of two operands with a shuffle. SmallVector MaskOps(WidenNumElts, -1); for (unsigned i = 0; i < NumInElts; ++i) { MaskOps[i] = i; MaskOps[i + NumInElts] = i + WidenNumElts; } return DAG.getVectorShuffle(WidenVT, dl, GetWidenedVector(N->getOperand(0)), GetWidenedVector(N->getOperand(1)), MaskOps); } } } // Fall back to use extracts and build vector. EVT EltVT = WidenVT.getVectorElementType(); SmallVector Ops(WidenNumElts); unsigned Idx = 0; for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); if (InputWidened) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); // Check if we can just return the input vector after widening. uint64_t IdxVal = cast(Idx)->getZExtValue(); if (IdxVal == 0 && InVT == WidenVT) return InOp; // Check if we can extract from the vector. unsigned InNumElts = InVT.getVectorNumElements(); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector Ops(WidenNumElts); EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), InOp.getValueType(), InOp, N->getOperand(1), N->getOperand(2)); } SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Result; SmallVector LdChain; // Chain for the series of load if (ExtType != ISD::NON_EXTLOAD) Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); else Result = GenWidenVectorLoads(LdChain, LD); // If we generate a single load, we can use that for the chain. Otherwise, // build a factor node to remember the multiple loads are independent and // chain to that. SDValue NewChain; if (LdChain.size() == 1) NewChain = LdChain[0]; else NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); // Modified the chain - switch anything that used the old chain to use // the new one. ReplaceValueWith(SDValue(N, 1), NewChain); return Result; } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getSrc0()); ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); // The mask should be widened as well EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(), WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType, N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getValue()); SDValue Scale = N->getScale(); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(), WideVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); // Widen the Index operand SDValue Index = N->getIndex(); EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), Index.getValueType().getScalarType(), NumElts); Index = ModifyToType(Index, WideIndexVT); SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), WidenVT, N->getOperand(0)); } // Return true if this is a node that could have two SETCCs as operands. static inline bool isLogicalMaskOp(unsigned Opcode) { switch (Opcode) { case ISD::AND: case ISD::OR: case ISD::XOR: return true; } return false; } // This is used just for the assert in convertMask(). Check that this either // a SETCC or a previously handled SETCC by convertMask(). #ifndef NDEBUG static inline bool isSETCCorConvertedSETCC(SDValue N) { if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) N = N.getOperand(0); else if (N.getOpcode() == ISD::CONCAT_VECTORS) { for (unsigned i = 1; i < N->getNumOperands(); ++i) if (!N->getOperand(i)->isUndef()) return false; N = N.getOperand(0); } if (N.getOpcode() == ISD::TRUNCATE) N = N.getOperand(0); else if (N.getOpcode() == ISD::SIGN_EXTEND) N = N.getOperand(0); if (isLogicalMaskOp(N.getOpcode())) return isSETCCorConvertedSETCC(N.getOperand(0)) && isSETCCorConvertedSETCC(N.getOperand(1)); return (N.getOpcode() == ISD::SETCC || ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif // Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT // to ToMaskVT if needed with vector extension or truncation. SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT) { // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. // FIXME: This code seems to be too restrictive, we might consider // generalizing it or dropping it. assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. SmallVector Ops; for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. LLVMContext &Ctx = *DAG.getContext(); unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); if (MaskScalarBits < ToMaskScalBits) { EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(), MaskVT.getVectorNumElements()); Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask); } else if (MaskScalarBits > ToMaskScalBits) { EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(), MaskVT.getVectorNumElements()); Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask); } assert(Mask->getValueType(0).getScalarSizeInBits() == ToMaskVT.getScalarSizeInBits() && "Mask should have the right element size by now."); // Adjust Mask to the right number of elements. unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements(); if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) { MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy); Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask, ZeroIdx); } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls); EVT SubVT = Mask->getValueType(0); SmallVector SubOps(NumSubVecs, DAG.getUNDEF(SubVT)); SubOps[0] = Mask; Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps); } assert((Mask->getValueType(0) == ToMaskVT) && "A mask of ToMaskVT should have been produced by now."); return Mask; } // Get the target mask VT, and widen if needed. EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) { assert(SetCC->getOpcode() == ISD::SETCC); LLVMContext &Ctx = *DAG.getContext(); EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType()); if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT); return MaskVT; } // This method tries to handle VSELECT and its mask by legalizing operands // (which may require widening) and if needed adjusting the mask vector type // to match that of the VSELECT. Without it, many cases end up with // scalarization of the SETCC, with many unnecessary instructions. SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { LLVMContext &Ctx = *DAG.getContext(); SDValue Cond = N->getOperand(0); if (N->getOpcode() != ISD::VSELECT) return SDValue(); if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode())) return SDValue(); // If this is a splitted VSELECT that was previously already handled, do // nothing. EVT CondVT = Cond->getValueType(0); if (CondVT.getScalarSizeInBits() != 1) return SDValue(); EVT VSelVT = N->getValueType(0); // Only handle vector types which are a power of 2. if (!isPowerOf2_64(VSelVT.getSizeInBits())) return SDValue(); // Don't touch if this will be scalarized. EVT FinalVT = VSelVT; while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx); if (FinalVT.getVectorNumElements() == 1) return SDValue(); // If there is support for an i1 vector mask, don't touch. if (Cond.getOpcode() == ISD::SETCC) { EVT SetCCOpVT = Cond->getOperand(0).getValueType(); while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); EVT SetCCResVT = getSetCCResultType(SetCCOpVT); if (SetCCResVT.getScalarSizeInBits() == 1) return SDValue(); } else if (CondVT.getScalarType() == MVT::i1) { // If there is support for an i1 vector mask (or only scalar i1 conditions), // don't touch. while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal) CondVT = TLI.getTypeToTransformTo(Ctx, CondVT); if (CondVT.getScalarType() == MVT::i1) return SDValue(); } // Get the VT and operands for VSELECT, and widen if needed. SDValue VSelOp1 = N->getOperand(1); SDValue VSelOp2 = N->getOperand(2); if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) { VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT); VSelOp1 = GetWidenedVector(VSelOp1); VSelOp2 = GetWidenedVector(VSelOp2); } // The mask of the VSELECT should have integer elements. EVT ToMaskVT = VSelVT; if (!ToMaskVT.getScalarType().isInteger()) ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); SDValue Mask; if (Cond->getOpcode() == ISD::SETCC) { EVT MaskVT = getSETCCWidenedResultTy(Cond); Mask = convertMask(Cond, MaskVT, ToMaskVT); } else if (isLogicalMaskOp(Cond->getOpcode()) && Cond->getOperand(0).getOpcode() == ISD::SETCC && Cond->getOperand(1).getOpcode() == ISD::SETCC) { // Cond is (AND/OR/XOR (SETCC, SETCC)) SDValue SETCC0 = Cond->getOperand(0); SDValue SETCC1 = Cond->getOperand(1); EVT VT0 = getSETCCWidenedResultTy(SETCC0); EVT VT1 = getSETCCWidenedResultTy(SETCC1); unsigned ScalarBits0 = VT0.getScalarSizeInBits(); unsigned ScalarBits1 = VT1.getScalarSizeInBits(); unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); EVT MaskVT; // If the two SETCCs have different VTs, either extend/truncate one of // them to the other "towards" ToMaskVT, or truncate one and extend the // other to ToMaskVT. if (ScalarBits0 != ScalarBits1) { EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1); EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0); if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits()) MaskVT = WideVT; else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits()) MaskVT = NarrowVT; else MaskVT = ToMaskVT; } else // If the two SETCCs have the same VT, don't change it. MaskVT = VT0; // Make new SETCCs and logical nodes. SETCC0 = convertMask(SETCC0, VT0, MaskVT); SETCC1 = convertMask(SETCC1, VT1, MaskVT); Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1); // Convert the logical op for VSELECT if needed. Mask = convertMask(Cond, MaskVT, ToMaskVT); } else return SDValue(); return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { if (SDValue Res = WidenVSELECTAndMask(N)) return Res; EVT CondEltVT = CondVT.getVectorElementType(); EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); // If we have to split the condition there is no point in widening the // select. This would result in an cycle of widening the select -> // widening the condition operand -> splitting the condition operand -> // splitting the select -> widening the select. Instead split this select // further and widen the resulting type. if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); SDValue Res = ModifyToType(SplitSelect, WidenVT); return Res; } if (Cond1.getValueType() != CondWidenVT) Cond1 = ModifyToType(Cond1, CondWidenVT); } SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(2)); SDValue InOp2 = GetWidenedVector(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), InOp1.getValueType(), N->getOperand(0), N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned NumElts = VT.getVectorNumElements(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); // Adjust mask based on new input vector length. SmallVector NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = N->getMaskElt(i); if (Idx < (int)NumElts) NewMask.push_back(Idx); else NewMask.push_back(Idx - NumElts + WidenNumElts); } for (unsigned i = NumElts; i != WidenNumElts; ++i) NewMask.push_back(-1); return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operands must be vectors"); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); // The input and output types often differ here, and it could be that while // we'd prefer to widen the result type, the input operands have been split. // In this case, we also need to split the result of this node as well. if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { SDValue SplitVSetCC = SplitVecOp_VSETCC(N); SDValue Res = ModifyToType(SplitVSetCC, WidenVT); return Res; } InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); // Assume that the input and output will be widen appropriately. If not, // we will have to unroll it at some point. assert(InOp1.getValueType() == WidenInVT && InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } //===----------------------------------------------------------------------===// // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom widen this node. if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) return false; switch (N->getOpcode()) { default: #ifndef NDEBUG dbgs() << "WidenVectorOperand op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to widen this operator's operand!"); case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: Res = WidenVecOp_EXTEND(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: Res = WidenVecOp_Convert(N); break; } // If Res is null, the sub-method took care of registering the result. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer // core about this. if (Res.getNode() == N) return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue InOp = N->getOperand(0); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && "Unexpected type action"); InOp = GetWidenedVector(InOp); assert(VT.getVectorNumElements() < InOp.getValueType().getVectorNumElements() && "Input wasn't widened!"); // We may need to further widen the operand until it has the same total // vector size as the result. EVT InVT = InOp.getValueType(); if (InVT.getSizeInBits() != VT.getSizeInBits()) { EVT InEltVT = InVT.getVectorElementType(); for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { EVT FixedVT = (MVT::SimpleValueType)i; EVT FixedEltVT = FixedVT.getVectorElementType(); if (TLI.isTypeLegal(FixedVT) && FixedVT.getSizeInBits() == VT.getSizeInBits() && FixedEltVT == InEltVT) { assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && "Not enough elements in the fixed type for the operand!"); assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && "We can't have the same type as we started with!"); if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) InOp = DAG.getNode( ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); else InOp = DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); break; } } InVT = InOp.getValueType(); if (InVT.getSizeInBits() != VT.getSizeInBits()) // We couldn't find a legal vector type that was a widening of the input // and could be extended in-register to the result type, so we have to // scalarize. return WidenVecOp_Convert(N); } // Use special DAG nodes to represent the operation of extending the // low lanes. switch (N->getOpcode()) { default: llvm_unreachable("Extend legalization on extend operation!"); case ISD::ANY_EXTEND: return DAG.getAnyExtendVectorInReg(InOp, DL, VT); case ISD::SIGN_EXTEND: return DAG.getSignExtendVectorInReg(InOp, DL, VT); case ISD::ZERO_EXTEND: return DAG.getZeroExtendVectorInReg(InOp, DL, VT); } } SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { // The result (and first input) is legal, but the second input is illegal. // We can't do much to fix that, so just unroll and let the extracts off of // the second input be widened as needed later. return DAG.UnrollVectorOp(N); } SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && "Unexpected type action"); InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); unsigned Opcode = N->getOpcode(); // See if a widened result type would be legal, if so widen the node. EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, InVT.getVectorNumElements()); if (TLI.isTypeLegal(WideVT)) { SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); } EVT InEltVT = InVT.getVectorElementType(); // Unroll the convert into some scalar code and create a nasty build vector. SmallVector Ops(NumElts); for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode( Opcode, dl, EltVT, DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT InWidenVT = InOp.getValueType(); SDLoc dl(N); // Check if we can convert between two legal vector types and extract. unsigned InWidenSize = InWidenVT.getSizeInBits(); unsigned Size = VT.getSizeInBits(); // x86mmx is not an acceptable vector element type, so don't try. if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } } return CreateStackStoreLoad(InOp, VT); } SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { // If the input vector is not legal, it is likely that we will not find a // legal vector of the same size. Replace the concatenate vector with a // nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector Ops(NumElts); EVT InVT = N->getOperand(0).getValueType(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned Idx = 0; unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && "Unexpected type action"); InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // We have to widen the value, but we want only to store the original // vector type. StoreSDNode *ST = cast(N); if (!ST->getMemoryVT().getScalarType().isByteSized()) return TLI.scalarizeVectorStore(ST, DAG); SmallVector StChain; if (ST->isTruncatingStore()) GenWidenVectorTruncStores(StChain, ST); else GenWidenVectorStores(StChain, ST); if (StChain.size() == 1) return StChain[0]; else return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { - assert(OpNo == 3 && "Can widen only data operand of mstore"); + assert((OpNo == 2 || OpNo == 3) && + "Can widen only data or mask operand of mstore"); MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); EVT MaskVT = Mask.getValueType(); SDValue StVal = MST->getValue(); - // Widen the value - SDValue WideVal = GetWidenedVector(StVal); SDLoc dl(N); - // The mask should be widened as well. - EVT WideVT = WideVal.getValueType(); - EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), - MaskVT.getVectorElementType(), - WideVT.getVectorNumElements()); - Mask = ModifyToType(Mask, WideMaskVT, true); + if (OpNo == 3) { + // Widen the value + StVal = GetWidenedVector(StVal); + // The mask should be widened as well. + EVT WideVT = StVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); + } else { + EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT); + Mask = ModifyToType(Mask, WideMaskVT, true); + + EVT ValueVT = StVal.getValueType(); + if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector) + StVal = GetWidenedVector(StVal); + else { + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), + WideMaskVT.getVectorNumElements()); + StVal = ModifyToType(StVal, WideVT); + } + } + assert(Mask.getValueType().getVectorNumElements() == - WideVal.getValueType().getVectorNumElements() && + StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); - return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), + return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), Mask, MST->getMemoryVT(), MST->getMemOperand(), false, MST->isCompressingStore()); } SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Can widen only data operand of mscatter"); MaskedScatterSDNode *MSC = cast(N); SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); EVT MaskVT = Mask.getValueType(); SDValue Scale = MSC->getScale(); // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); EVT WideVT = WideVal.getValueType(); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well. EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(), NumElts); Mask = ModifyToType(Mask, WideMaskVT, true); // Widen index. SDValue Index = MSC->getIndex(); EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), Index.getValueType().getScalarType(), NumElts); Index = ModifyToType(Index, WideIndexVT); SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), dl, Ops, MSC->getMemOperand()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real // concern ? Should we zero the unused lanes if this is a float compare ? // Get a new SETCC node to compare the newly widened operands. // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. if (VT.getScalarType() == MVT::i1) SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, SVT.getVectorNumElements()); SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), VT.getVectorNumElements()); SDValue CC = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); return PromoteTargetBoolean(CC, VT); } //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// // Utility function to find the type to chop up a widen vector for load/store // TLI: Target lowering used to determine legal types. // Width: Width left need to load/store. // WidenVT: The widen vector type to load to/store from // Align: If 0, don't allow use of a wider type // WidenEx: If Align is not 0, the amount additional we can load/store from. static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned Width, EVT WidenVT, unsigned Align = 0, unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); unsigned WidenWidth = WidenVT.getSizeInBits(); unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); unsigned AlignInBits = Align*8; // If we have one element to load/store, return it. EVT RetVT = WidenEltVT; if (Width == WidenEltWidth) return RetVT; // See if there is larger legal integer than the element type to load/store. unsigned VT; for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { EVT MemVT((MVT::SimpleValueType) VT); unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); if ((Action == TargetLowering::TypeLegal || Action == TargetLowering::TypePromoteInteger) && (WidenWidth % MemVTWidth) == 0 && isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; break; } } // See if there is a larger vector type to load/store that has the same vector // element type and is evenly divisible with the WidenVT. for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) return MemVT; } } return RetVT; } // Builds a vector type from scalar loads // VecTy: Resulting Vector type // LDOps: Load operators to build a vector type // [Start,End) the list of loads to use. static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, SmallVectorImpl &LdOps, unsigned Start, unsigned End) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); unsigned NumElts = Width / LdTy.getSizeInBits(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); unsigned Idx = 1; SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); for (unsigned i = Start + 1; i != End; ++i) { EVT NewLdTy = LdOps[i].getValueType(); if (NewLdTy != LdTy) { NumElts = Width / NewLdTy.getSizeInBits(); NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); // Readjust position and vector position based on new load type. Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; } VecOp = DAG.getNode( ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, LoadSDNode *LD) { // The strategy assumes that we can efficiently load power-of-two widths. // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector // type. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), Align, MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. if (LdWidth <= NewVTWidth) { if (!NewVT.isVector()) { unsigned NumElts = WidenWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); } if (NewVT == WidenVT) return LdOp; assert(WidenWidth % NewVTWidth == 0); unsigned NumConcat = WidenWidth / NewVTWidth; SmallVector ConcatOps(NumConcat); SDValue UndefVal = DAG.getUNDEF(NewVT); ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); } // Load vector by using multiple loads from largest vector to scalar. SmallVector LdOps; LdOps.push_back(LdOp); LdWidth -= NewVTWidth; unsigned Offset = 0; while (LdWidth > 0) { unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); SDValue L; if (LdWidth < NewVTWidth) { // The current type we are using is too large. Find a better size. NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { // Later code assumes the vector loads produced will be mergeable, so we // must pad the final entry up to the previous width. Scalars are // combined separately. SmallVector Loads; Loads.push_back(L); unsigned size = L->getValueSizeInBits(0); while (size < LdOp->getValueSizeInBits(0)) { Loads.push_back(DAG.getUNDEF(L->getValueType(0))); size += L->getValueSizeInBits(0); } L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); } } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); } LdOps.push_back(L); LdOp = L; LdWidth -= NewVTWidth; } // Build the vector from the load operations. unsigned End = LdOps.size(); if (!LdOps[0].getValueType().isVector()) // All the loads are scalar loads. return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); // If the load contains vectors, build the vector using concat vector. // All of the vectors used to load are power-of-2, and the scalar loads can be // combined to make a power-of-2 vector. SmallVector ConcatOps(End); int i = End - 1; int Idx = End; EVT LdTy = LdOps[i].getValueType(); // First, combine the scalar loads to a vector. if (!LdTy.isVector()) { for (--i; i >= 0; --i) { LdTy = LdOps[i].getValueType(); if (LdTy.isVector()) break; } ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); } ConcatOps[--Idx] = LdOps[i]; for (--i; i >= 0; --i) { EVT NewLdTy = LdOps[i].getValueType(); if (NewLdTy != LdTy) { // Create a larger vector. ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, makeArrayRef(&ConcatOps[Idx], End - Idx)); Idx = End - 1; LdTy = NewLdTy; } ConcatOps[--Idx] = LdOps[i]; } if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, makeArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector. unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); SmallVector WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); { unsigned i = 0; for (; i != End-Idx; ++i) WidenOps[i] = ConcatOps[Idx+i]; for (; i != NumOps; ++i) WidenOps[i] = UndefVal; } return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); } SDValue DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector // and then extend it. Instead, we unroll the load and build a new vector. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); // Load each element and widen. unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), LdEltVT, Align, MMOFlags, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, Align, MMOFlags, AAInfo); LdChain.push_back(Ops[i].getValue(1)); } // Fill the rest with undefs. SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); } void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store power-of-two widths. // The routine chops the vector into the largest vector stores with the same // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); EVT ValVT = ValOp.getValueType(); unsigned ValWidth = ValVT.getSizeInBits(); EVT ValEltVT = ValVT.getVectorElementType(); unsigned ValEltWidth = ValEltVT.getSizeInBits(); assert(StVT.getVectorElementType() == ValEltVT); int Idx = 0; // current index to store unsigned Offset = 0; // offset from base to store while (StWidth != 0) { // Find the largest vector type we can store with. EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); unsigned NewVTWidth = NewVT.getSizeInBits(); unsigned Increment = NewVTWidth / 8; if (NewVT.isVector()) { unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store. unsigned NumElts = ValWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); // Readjust index position based on new vector type. Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth / ValEltWidth; } } } void DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector // and then store it. Instead, we extract each element and then store it. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); // It must be true that the wide vector type is bigger than where we need to // store. assert(StVT.isVector() && ValOp.getValueType().isVector()); assert(StVT.bitsLT(ValOp.getValueType())); // For truncating stores, we can not play the tricks of chopping legal vector // types and bitcast it to the right type. Instead, we unroll the store. EVT StEltVT = StVT.getVectorElementType(); EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, Align, MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getTruncStore( Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); } } /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. /// FillWithZeroes specifies that the vector should be widened with zeroes. SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes) { // Note that InOp might have been widened so it might already have // the right width or it might need be narrowed. EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); SDLoc dl(InOp); // Check if InOp already has the right width. if (InVT == NVT) return InOp; unsigned InNumElts = InVT.getVectorNumElements(); unsigned WidenNumElts = NVT.getVectorNumElements(); if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { unsigned NumConcat = WidenNumElts / InNumElts; SmallVector Ops(NumConcat); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : DAG.getUNDEF(InVT); Ops[0] = InOp; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = FillVal; return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); // Fall back to extract and build. SmallVector Ops(WidenNumElts); EVT EltVT = NVT.getVectorElementType(); unsigned MinNumElts = std::min(WidenNumElts, InNumElts); unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) Ops[Idx] = FillVal; return DAG.getBuildVector(NVT, dl, Ops); } Index: projects/clang700-import/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp (revision 337645) @@ -1,913 +1,893 @@ //===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" #include #include #include using namespace llvm; namespace { class ELFAsmParser : public MCAsmParserExtension { template void addDirectiveHandler(StringRef Directive) { MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( this, HandleDirective); getParser().addDirectiveHandler(Directive, Handler); } bool ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind); public: ELFAsmParser() { BracketExpressionsSupported = true; } void Initialize(MCAsmParser &Parser) override { // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data"); addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text"); addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss"); addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata"); addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata"); addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss"); addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); addDirectiveHandler< &ELFAsmParser::ParseDirectivePushSection>(".pushsection"); addDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size"); addDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local"); addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".protected"); addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal"); addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveCGProfile>(".cg_profile"); } // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is // the best way for us to get access to it? bool ParseSectionDirectiveData(StringRef, SMLoc) { return ParseSectionSwitch(".data", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getData()); } bool ParseSectionDirectiveText(StringRef, SMLoc) { return ParseSectionSwitch(".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, SectionKind::getText()); } bool ParseSectionDirectiveBSS(StringRef, SMLoc) { return ParseSectionSwitch(".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getBSS()); } bool ParseSectionDirectiveRoData(StringRef, SMLoc) { return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC, SectionKind::getReadOnly()); } bool ParseSectionDirectiveTData(StringRef, SMLoc) { return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE, SectionKind::getThreadData()); } bool ParseSectionDirectiveTBSS(StringRef, SMLoc) { return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS, ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE, SectionKind::getThreadBSS()); } bool ParseSectionDirectiveDataRel(StringRef, SMLoc) { return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE, SectionKind::getData()); } bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) { return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE, SectionKind::getReadOnlyWithRel()); } bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) { return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE, SectionKind::getData()); } bool ParseDirectivePushSection(StringRef, SMLoc); bool ParseDirectivePopSection(StringRef, SMLoc); bool ParseDirectiveSection(StringRef, SMLoc); bool ParseDirectiveSize(StringRef, SMLoc); bool ParseDirectivePrevious(StringRef, SMLoc); bool ParseDirectiveType(StringRef, SMLoc); bool ParseDirectiveIdent(StringRef, SMLoc); bool ParseDirectiveSymver(StringRef, SMLoc); bool ParseDirectiveVersion(StringRef, SMLoc); bool ParseDirectiveWeakref(StringRef, SMLoc); bool ParseDirectiveSymbolAttribute(StringRef, SMLoc); bool ParseDirectiveSubsection(StringRef, SMLoc); bool ParseDirectiveCGProfile(StringRef, SMLoc); private: bool ParseSectionName(StringRef &SectionName); bool ParseSectionArguments(bool IsPush, SMLoc loc); unsigned parseSunStyleSectionFlags(); bool maybeParseSectionType(StringRef &TypeName); bool parseMergeSize(int64_t &Size); bool parseGroup(StringRef &GroupName); bool parseMetadataSym(MCSymbolELF *&Associated); bool maybeParseUniqueID(int64_t &UniqueID); }; } // end anonymous namespace /// ParseDirectiveSymbolAttribute /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ] bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { MCSymbolAttr Attr = StringSwitch(Directive) .Case(".weak", MCSA_Weak) .Case(".local", MCSA_Local) .Case(".hidden", MCSA_Hidden) .Case(".internal", MCSA_Internal) .Case(".protected", MCSA_Protected) .Default(MCSA_Invalid); assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); if (getLexer().isNot(AsmToken::EndOfStatement)) { while (true) { StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().EmitSymbolAttribute(Sym, Attr); if (getLexer().is(AsmToken::EndOfStatement)) break; if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); } } Lex(); return false; } bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind) { const MCExpr *Subsection = nullptr; if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getParser().parseExpression(Subsection)) return true; } Lex(); getStreamer().SwitchSection(getContext().getELFSection(Section, Type, Flags), Subsection); return false; } bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbolELF *Sym = cast(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); const MCExpr *Expr; if (getParser().parseExpression(Expr)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); Lex(); getStreamer().emitELFSize(Sym, Expr); return false; } bool ELFAsmParser::ParseSectionName(StringRef &SectionName) { // A section name can contain -, so we cannot just use // parseIdentifier. SMLoc FirstLoc = getLexer().getLoc(); unsigned Size = 0; if (getLexer().is(AsmToken::String)) { SectionName = getTok().getIdentifier(); Lex(); return false; } while (!getParser().hasPendingError()) { SMLoc PrevLoc = getLexer().getLoc(); if (getLexer().is(AsmToken::Comma) || getLexer().is(AsmToken::EndOfStatement)) break; unsigned CurSize; if (getLexer().is(AsmToken::String)) { CurSize = getTok().getIdentifier().size() + 2; Lex(); } else if (getLexer().is(AsmToken::Identifier)) { CurSize = getTok().getIdentifier().size(); Lex(); } else { CurSize = getTok().getString().size(); Lex(); } Size += CurSize; SectionName = StringRef(FirstLoc.getPointer(), Size); // Make sure the following token is adjacent. if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer()) break; } if (Size == 0) return true; return false; } static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) { unsigned flags = 0; // If a valid numerical value is set for the section flag, use it verbatim if (!flagsStr.getAsInteger(0, flags)) return flags; for (char i : flagsStr) { switch (i) { case 'a': flags |= ELF::SHF_ALLOC; break; case 'e': flags |= ELF::SHF_EXCLUDE; break; case 'x': flags |= ELF::SHF_EXECINSTR; break; case 'w': flags |= ELF::SHF_WRITE; break; case 'o': flags |= ELF::SHF_LINK_ORDER; break; case 'M': flags |= ELF::SHF_MERGE; break; case 'S': flags |= ELF::SHF_STRINGS; break; case 'T': flags |= ELF::SHF_TLS; break; case 'c': flags |= ELF::XCORE_SHF_CP_SECTION; break; case 'd': flags |= ELF::XCORE_SHF_DP_SECTION; break; case 'y': flags |= ELF::SHF_ARM_PURECODE; break; case 'G': flags |= ELF::SHF_GROUP; break; case '?': *UseLastGroup = true; break; default: return -1U; } } return flags; } unsigned ELFAsmParser::parseSunStyleSectionFlags() { unsigned flags = 0; while (getLexer().is(AsmToken::Hash)) { Lex(); // Eat the #. if (!getLexer().is(AsmToken::Identifier)) return -1U; StringRef flagId = getTok().getIdentifier(); if (flagId == "alloc") flags |= ELF::SHF_ALLOC; else if (flagId == "execinstr") flags |= ELF::SHF_EXECINSTR; else if (flagId == "write") flags |= ELF::SHF_WRITE; else if (flagId == "tls") flags |= ELF::SHF_TLS; else return -1U; Lex(); // Eat the flag. if (!getLexer().is(AsmToken::Comma)) break; Lex(); // Eat the comma. } return flags; } bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) { getStreamer().PushSection(); if (ParseSectionArguments(/*IsPush=*/true, loc)) { getStreamer().PopSection(); return true; } return false; } bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { if (!getStreamer().PopSection()) return TokError(".popsection without corresponding .pushsection"); return false; } bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) { return ParseSectionArguments(/*IsPush=*/false, loc); } bool ELFAsmParser::maybeParseSectionType(StringRef &TypeName) { MCAsmLexer &L = getLexer(); if (L.isNot(AsmToken::Comma)) return false; Lex(); if (L.isNot(AsmToken::At) && L.isNot(AsmToken::Percent) && L.isNot(AsmToken::String)) { if (L.getAllowAtInIdentifier()) return TokError("expected '@', '%' or \"\""); else return TokError("expected '%' or \"\""); } if (!L.is(AsmToken::String)) Lex(); if (L.is(AsmToken::Integer)) { TypeName = getTok().getString(); Lex(); } else if (getParser().parseIdentifier(TypeName)) return TokError("expected identifier in directive"); return false; } bool ELFAsmParser::parseMergeSize(int64_t &Size) { if (getLexer().isNot(AsmToken::Comma)) return TokError("expected the entry size"); Lex(); if (getParser().parseAbsoluteExpression(Size)) return true; if (Size <= 0) return TokError("entry size must be positive"); return false; } bool ELFAsmParser::parseGroup(StringRef &GroupName) { MCAsmLexer &L = getLexer(); if (L.isNot(AsmToken::Comma)) return TokError("expected group name"); Lex(); if (L.is(AsmToken::Integer)) { GroupName = getTok().getString(); Lex(); } else if (getParser().parseIdentifier(GroupName)) { return TokError("invalid group name"); } if (L.is(AsmToken::Comma)) { Lex(); StringRef Linkage; if (getParser().parseIdentifier(Linkage)) return TokError("invalid linkage"); if (Linkage != "comdat") return TokError("Linkage must be 'comdat'"); } return false; } bool ELFAsmParser::parseMetadataSym(MCSymbolELF *&Associated) { MCAsmLexer &L = getLexer(); if (L.isNot(AsmToken::Comma)) return TokError("expected metadata symbol"); Lex(); StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("invalid metadata symbol"); Associated = dyn_cast_or_null(getContext().lookupSymbol(Name)); if (!Associated || !Associated->isInSection()) return TokError("symbol is not in a section: " + Name); return false; } bool ELFAsmParser::maybeParseUniqueID(int64_t &UniqueID) { MCAsmLexer &L = getLexer(); if (L.isNot(AsmToken::Comma)) return false; Lex(); StringRef UniqueStr; if (getParser().parseIdentifier(UniqueStr)) return TokError("expected identifier in directive"); if (UniqueStr != "unique") return TokError("expected 'unique'"); if (L.isNot(AsmToken::Comma)) return TokError("expected commma"); Lex(); if (getParser().parseAbsoluteExpression(UniqueID)) return true; if (UniqueID < 0) return TokError("unique id must be positive"); if (!isUInt<32>(UniqueID) || UniqueID == ~0U) return TokError("unique id is too large"); return false; } static bool hasPrefix(StringRef SectionName, StringRef Prefix) { return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back(); } -// Return a set of section flags based on the section name that can then -// be augmented later, otherwise return 0 if we don't have any reasonable -// defaults. -static unsigned defaultSectionFlags(StringRef SectionName) { - - if (hasPrefix(SectionName, ".rodata.cst")) - return ELF::SHF_ALLOC | ELF::SHF_MERGE; - - if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1") - return ELF::SHF_ALLOC; - - if (SectionName == ".fini" || SectionName == ".init" || - hasPrefix(SectionName, ".text.")) - return ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; - - if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" || - hasPrefix(SectionName, ".bss.") || - hasPrefix(SectionName, ".init_array.") || - hasPrefix(SectionName, ".fini_array.") || - hasPrefix(SectionName, ".preinit_array.")) - return ELF::SHF_ALLOC | ELF::SHF_WRITE; - - if (hasPrefix(SectionName, ".tdata.") || hasPrefix(SectionName, ".tbss.")) - return ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS; - - return 0; -} - bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { StringRef SectionName; if (ParseSectionName(SectionName)) return TokError("expected identifier in directive"); StringRef TypeName; int64_t Size = 0; StringRef GroupName; + unsigned Flags = 0; const MCExpr *Subsection = nullptr; bool UseLastGroup = false; MCSymbolELF *Associated = nullptr; int64_t UniqueID = ~0; - // Set the default section flags first in case no others are given. - unsigned Flags = defaultSectionFlags(SectionName); + // Set the defaults first. + if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1") + Flags |= ELF::SHF_ALLOC; + else if (SectionName == ".fini" || SectionName == ".init" || + hasPrefix(SectionName, ".text.")) + Flags |= ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; + else if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" || + hasPrefix(SectionName, ".bss.") || + hasPrefix(SectionName, ".init_array.") || + hasPrefix(SectionName, ".fini_array.") || + hasPrefix(SectionName, ".preinit_array.")) + Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE; + else if (hasPrefix(SectionName, ".tdata.") || + hasPrefix(SectionName, ".tbss.")) + Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS; if (getLexer().is(AsmToken::Comma)) { Lex(); if (IsPush && getLexer().isNot(AsmToken::String)) { if (getParser().parseExpression(Subsection)) return true; if (getLexer().isNot(AsmToken::Comma)) goto EndStmt; Lex(); } unsigned extraFlags; if (getLexer().isNot(AsmToken::String)) { if (!getContext().getAsmInfo()->usesSunStyleELFSectionSwitchSyntax() || getLexer().isNot(AsmToken::Hash)) return TokError("expected string in directive"); extraFlags = parseSunStyleSectionFlags(); } else { StringRef FlagsStr = getTok().getStringContents(); Lex(); extraFlags = parseSectionFlags(FlagsStr, &UseLastGroup); } if (extraFlags == -1U) return TokError("unknown flag"); - - // If we found additional section flags on a known section then give a - // warning. - if (Flags && Flags != extraFlags) - Warning(loc, "setting incorrect section attributes for " + SectionName); - Flags |= extraFlags; bool Mergeable = Flags & ELF::SHF_MERGE; bool Group = Flags & ELF::SHF_GROUP; if (Group && UseLastGroup) return TokError("Section cannot specifiy a group name while also acting " "as a member of the last group"); if (maybeParseSectionType(TypeName)) return true; MCAsmLexer &L = getLexer(); if (TypeName.empty()) { if (Mergeable) return TokError("Mergeable section must specify the type"); if (Group) return TokError("Group section must specify the type"); if (L.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); } if (Mergeable) if (parseMergeSize(Size)) return true; if (Group) if (parseGroup(GroupName)) return true; if (Flags & ELF::SHF_LINK_ORDER) if (parseMetadataSym(Associated)) return true; if (maybeParseUniqueID(UniqueID)) return true; } EndStmt: if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); Lex(); unsigned Type = ELF::SHT_PROGBITS; if (TypeName.empty()) { if (SectionName.startswith(".note")) Type = ELF::SHT_NOTE; else if (hasPrefix(SectionName, ".init_array.")) Type = ELF::SHT_INIT_ARRAY; else if (hasPrefix(SectionName, ".bss.")) Type = ELF::SHT_NOBITS; else if (hasPrefix(SectionName, ".tbss.")) Type = ELF::SHT_NOBITS; else if (hasPrefix(SectionName, ".fini_array.")) Type = ELF::SHT_FINI_ARRAY; else if (hasPrefix(SectionName, ".preinit_array.")) Type = ELF::SHT_PREINIT_ARRAY; } else { if (TypeName == "init_array") Type = ELF::SHT_INIT_ARRAY; else if (TypeName == "fini_array") Type = ELF::SHT_FINI_ARRAY; else if (TypeName == "preinit_array") Type = ELF::SHT_PREINIT_ARRAY; else if (TypeName == "nobits") Type = ELF::SHT_NOBITS; else if (TypeName == "progbits") Type = ELF::SHT_PROGBITS; else if (TypeName == "note") Type = ELF::SHT_NOTE; else if (TypeName == "unwind") Type = ELF::SHT_X86_64_UNWIND; else if (TypeName == "llvm_odrtab") Type = ELF::SHT_LLVM_ODRTAB; else if (TypeName == "llvm_linker_options") Type = ELF::SHT_LLVM_LINKER_OPTIONS; else if (TypeName == "llvm_call_graph_profile") Type = ELF::SHT_LLVM_CALL_GRAPH_PROFILE; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } if (UseLastGroup) { MCSectionSubPair CurrentSection = getStreamer().getCurrentSection(); if (const MCSectionELF *Section = cast_or_null(CurrentSection.first)) if (const MCSymbol *Group = Section->getGroup()) { GroupName = Group->getName(); Flags |= ELF::SHF_GROUP; } } MCSection *ELFSection = getContext().getELFSection(SectionName, Type, Flags, Size, GroupName, UniqueID, Associated); getStreamer().SwitchSection(ELFSection, Subsection); if (getContext().getGenDwarfForAssembly()) { bool InsertResult = getContext().addGenDwarfSection(ELFSection); if (InsertResult) { if (getContext().getDwarfVersion() <= 2) Warning(loc, "DWARF2 only supports one section per compilation unit"); if (!ELFSection->getBeginSymbol()) { MCSymbol *SectionStartSymbol = getContext().createTempSymbol(); getStreamer().EmitLabel(SectionStartSymbol); ELFSection->setBeginSymbol(SectionStartSymbol); } } } return false; } bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); if (PreviousSection.first == nullptr) return TokError(".previous without corresponding .section"); getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); return false; } static MCSymbolAttr MCAttrForString(StringRef Type) { return StringSwitch(Type) .Cases("STT_FUNC", "function", MCSA_ELF_TypeFunction) .Cases("STT_OBJECT", "object", MCSA_ELF_TypeObject) .Cases("STT_TLS", "tls_object", MCSA_ELF_TypeTLS) .Cases("STT_COMMON", "common", MCSA_ELF_TypeCommon) .Cases("STT_NOTYPE", "notype", MCSA_ELF_TypeNoType) .Cases("STT_GNU_IFUNC", "gnu_indirect_function", MCSA_ELF_TypeIndFunction) .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) .Default(MCSA_Invalid); } /// ParseDirectiveELFType /// ::= .type identifier , STT_ /// ::= .type identifier , #attribute /// ::= .type identifier , @attribute /// ::= .type identifier , %attribute /// ::= .type identifier , "attribute" bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); // NOTE the comma is optional in all cases. It is only documented as being // optional in the first case, however, GAS will silently treat the comma as // optional in all cases. Furthermore, although the documentation states that // the first form only accepts STT_, in reality, GAS // accepts both the upper case name as well as the lower case aliases. if (getLexer().is(AsmToken::Comma)) Lex(); if (getLexer().isNot(AsmToken::Identifier) && getLexer().isNot(AsmToken::Hash) && getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::String)) { if (!getLexer().getAllowAtInIdentifier()) return TokError("expected STT_, '#', " "'%' or \"\""); else if (getLexer().isNot(AsmToken::At)) return TokError("expected STT_, '#', '@', " "'%' or \"\""); } if (getLexer().isNot(AsmToken::String) && getLexer().isNot(AsmToken::Identifier)) Lex(); SMLoc TypeLoc = getLexer().getLoc(); StringRef Type; if (getParser().parseIdentifier(Type)) return TokError("expected symbol type in directive"); MCSymbolAttr Attr = MCAttrForString(Type); if (Attr == MCSA_Invalid) return Error(TypeLoc, "unsupported attribute in '.type' directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.type' directive"); Lex(); getStreamer().EmitSymbolAttribute(Sym, Attr); return false; } /// ParseDirectiveIdent /// ::= .ident string bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::String)) return TokError("unexpected token in '.ident' directive"); StringRef Data = getTok().getIdentifier(); Lex(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.ident' directive"); Lex(); getStreamer().EmitIdent(Data); return false; } /// ParseDirectiveSymver /// ::= .symver foo, bar2@zed bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected a comma"); // ARM assembly uses @ for a comment... // except when parsing the second parameter of the .symver directive. // Force the next symbol to allow @ in the identifier, which is // required for this directive and then reset it to its initial state. const bool AllowAtInIdentifier = getLexer().getAllowAtInIdentifier(); getLexer().setAllowAtInIdentifier(true); Lex(); getLexer().setAllowAtInIdentifier(AllowAtInIdentifier); StringRef AliasName; if (getParser().parseIdentifier(AliasName)) return TokError("expected identifier in directive"); if (AliasName.find('@') == StringRef::npos) return TokError("expected a '@' in the name"); MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitELFSymverDirective(AliasName, Sym); return false; } /// ParseDirectiveVersion /// ::= .version string bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::String)) return TokError("unexpected token in '.version' directive"); StringRef Data = getTok().getIdentifier(); Lex(); MCSection *Note = getContext().getELFSection(".note", ELF::SHT_NOTE, 0); getStreamer().PushSection(); getStreamer().SwitchSection(Note); getStreamer().EmitIntValue(Data.size()+1, 4); // namesz. getStreamer().EmitIntValue(0, 4); // descsz = 0 (no description). getStreamer().EmitIntValue(1, 4); // type = NT_VERSION. getStreamer().EmitBytes(Data); // name. getStreamer().EmitIntValue(0, 1); // terminate the string. getStreamer().EmitValueToAlignment(4); // ensure 4 byte alignment. getStreamer().PopSection(); return false; } /// ParseDirectiveWeakref /// ::= .weakref foo, bar bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) { // FIXME: Share code with the other alias building directives. StringRef AliasName; if (getParser().parseIdentifier(AliasName)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected a comma"); Lex(); StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().EmitWeakReference(Alias, Sym); return false; } bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) { const MCExpr *Subsection = nullptr; if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getParser().parseExpression(Subsection)) return true; } if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); Lex(); getStreamer().SubSection(Subsection); return false; } /// ParseDirectiveCGProfile /// ::= .cg_profile identifier, identifier, bool ELFAsmParser::ParseDirectiveCGProfile(StringRef, SMLoc) { StringRef From; SMLoc FromLoc = getLexer().getLoc(); if (getParser().parseIdentifier(From)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected a comma"); Lex(); StringRef To; SMLoc ToLoc = getLexer().getLoc(); if (getParser().parseIdentifier(To)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected a comma"); Lex(); int64_t Count; if (getParser().parseIntToken( Count, "expected integer count in '.cg_profile' directive")) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); MCSymbol *FromSym = getContext().getOrCreateSymbol(From); MCSymbol *ToSym = getContext().getOrCreateSymbol(To); getStreamer().emitCGProfileEntry( MCSymbolRefExpr::create(FromSym, MCSymbolRefExpr::VK_None, getContext(), FromLoc), MCSymbolRefExpr::create(ToSym, MCSymbolRefExpr::VK_None, getContext(), ToLoc), Count); return false; } namespace llvm { MCAsmParserExtension *createELFAsmParser() { return new ELFAsmParser; } } // end namespace llvm Index: projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td (revision 337645) @@ -1,746 +1,737 @@ //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===------------------------------------------------------------===// include "llvm/TableGen/SearchableTable.td" include "llvm/Target/Target.td" include "AMDGPUFeatures.td" //===------------------------------------------------------------===// // Subtarget Features (device properties) //===------------------------------------------------------------===// def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "FastFMAF32", "true", "Assuming f32 fma is at least as fast as mul + add" >; def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", "MIMG_R128", "true", "Support 128-bit texture resources" >; def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", "HalfRate64Ops", "true", "Most fp64 instructions are half rate instead of quarter" >; def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "FlatAddressSpace", "true", "Support flat address space" >; def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", "FlatInstOffsets", "true", "Flat instructions have immediate offset addressing mode" >; def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", "FlatGlobalInsts", "true", "Have global_* flat memory instructions" >; def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", "FlatScratchInsts", "true", "Have scratch_* flat memory instructions" >; def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", "AddNoCarryInsts", "true", "Have VALU add/sub instructions without carry out" >; def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", "UnalignedBufferAccess", "true", "Support unaligned global loads and stores" >; def FeatureTrapHandler: SubtargetFeature<"trap-handler", "TrapHandler", "true", "Trap handler support" >; def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", "UnalignedScratchAccess", "true", "Support unaligned scratch loads and stores" >; def FeatureApertureRegs : SubtargetFeature<"aperture-regs", "HasApertureRegs", "true", "Has Memory Aperture Base and Size Registers" >; def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", "HasMadMixInsts", "true", "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" >; def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", "HasFmaMixInsts", "true", "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" >; // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support // XNACK. The current default kernel driver setting is: // - graphics ring: XNACK disabled // - compute ring: XNACK enabled // // If XNACK is enabled, the VMEM latency can be worse. // If XNACK is disabled, the 2 SGPRs can be used for general purposes. def FeatureXNACK : SubtargetFeature<"xnack", "EnableXNACK", "true", "Enable XNACK support" >; def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", "SGPRInitBug", "true", "VI SGPR initialization bug requiring a fixed SGPR allocation size" >; class SubtargetFeatureLDSBankCount : SubtargetFeature < "ldsbankcount"#Value, "LDSBankCount", !cast(Value), "The number of LDS banks per compute unit." >; def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", "GCN3Encoding", "true", "Encoding format for VI" >; def FeatureCIInsts : SubtargetFeature<"ci-insts", "CIInsts", "true", "Additional instructions for CI+" >; def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", "GFX9Insts", "true", "Additional instructions for GFX9+" >; def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", "HasSMemRealTime", "true", "Has s_memrealtime instruction" >; def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", "HasInv2PiInlineImm", "true", "Has 1 / (2 * pi) as inline immediate" >; def Feature16BitInsts : SubtargetFeature<"16-bit-insts", "Has16BitInsts", "true", "Has i16/f16 instructions" >; def FeatureVOP3P : SubtargetFeature<"vop3p", "HasVOP3PInsts", "true", "Has VOP3P packed instructions" >; def FeatureMovrel : SubtargetFeature<"movrel", "HasMovrel", "true", "Has v_movrel*_b32 instructions" >; def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", "HasVGPRIndexMode", "true", "Has VGPR mode register indexing" >; def FeatureScalarStores : SubtargetFeature<"scalar-stores", "HasScalarStores", "true", "Has store scalar memory instructions" >; def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", "HasScalarAtomics", "true", "Has atomic scalar memory instructions" >; def FeatureSDWA : SubtargetFeature<"sdwa", "HasSDWA", "true", "Support SDWA (Sub-DWORD Addressing) extension" >; def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", "HasSDWAOmod", "true", "Support OMod with SDWA (Sub-DWORD Addressing) extension" >; def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", "HasSDWAScalar", "true", "Support scalar register with SDWA (Sub-DWORD Addressing) extension" >; def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", "HasSDWASdst", "true", "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" >; def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", "HasSDWAMac", "true", "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" >; def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", "HasSDWAOutModsVOPC", "true", "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" >; def FeatureDPP : SubtargetFeature<"dpp", "HasDPP", "true", "Support DPP (Data Parallel Primitives) extension" >; def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", "HasIntClamp", "true", "Support clamp for integer destination" >; def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", "HasUnpackedD16VMem", "true", "Has unpacked d16 vmem instructions" >; def FeatureDLInsts : SubtargetFeature<"dl-insts", "HasDLInsts", "true", "Has deep learning instructions" >; def FeatureD16PreservesUnusedBits : SubtargetFeature< "d16-preserves-unused-bits", "D16PreservesUnusedBits", "true", "If present, then instructions defined by HasD16LoadStore predicate preserve " "unused bits. Otherwise instructions defined by HasD16LoadStore predicate " "zero unused bits." >; //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// -// Some instructions do not support denormals despite this flag. Using -// fp32 denormals also causes instructions to run at the double -// precision rate for the device. -def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", - "FP32Denormals", - "true", - "Enable single precision denormal handling" ->; - // Denormal handling for fp64 and fp16 is controlled by the same // config register when fp16 supported. // TODO: Do we need a separate f16 setting when not legal? def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals", "FP64FP16Denormals", "true", "Enable double and half precision denormal handling", [FeatureFP64] >; def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", "FP64FP16Denormals", "true", "Enable double and half precision denormal handling", [FeatureFP64, FeatureFP64FP16Denormals] >; def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", "FP64FP16Denormals", "true", "Enable half precision denormal handling", [FeatureFP64FP16Denormals] >; def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", "FPExceptions", "true", "Enable floating point exceptions" >; class FeatureMaxPrivateElementSize : SubtargetFeature< "max-private-element-size-"#size, "MaxPrivateElementSize", !cast(size), "Maximum private access size may be "#size >; def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; def FeatureEnableHugePrivateBuffer : SubtargetFeature< "huge-private-buffer", "EnableHugePrivateBuffer", "true", "Enable private/scratch buffer sizes greater than 128 GB" >; def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "EnableVGPRSpilling", "true", "Enable spilling of VGPRs to scratch memory" >; def FeatureDumpCode : SubtargetFeature <"DumpCode", "DumpCode", "true", "Dump MachineInstrs in the CodeEmitter" >; def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", "DumpCode", "true", "Dump MachineInstrs in the CodeEmitter" >; // XXX - This should probably be removed once enabled by default def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", "EnableLoadStoreOpt", "true", "Enable SI load/store optimizer pass" >; // Performance debugging feature. Allow using DS instruction immediate // offsets even if the base pointer can't be proven to be base. On SI, // base pointer values that won't give the same result as a 16-bit add // are not safe to fold, but this will override the conservative test // for the base pointer. def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < "unsafe-ds-offset-folding", "EnableUnsafeDSOffsetFolding", "true", "Force using DS instruction immediate offsets on SI" >; def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "EnableSIScheduler", "true", "Enable SI Machine Scheduler" >; def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", "EnableDS128", "true", "Use ds_{read|write}_b128" >; // Unless +-flat-for-global is specified, turn on FlatForGlobal for // all OS-es on VI and newer hardware to avoid assertion failures due // to missing ADDR64 variants of MUBUF instructions. // FIXME: moveToVALU should be able to handle converting addr64 MUBUF // instructions. def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "FlatForGlobal", "true", "Force to generate flat instruction for global" >; def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < "auto-waitcnt-before-barrier", "AutoWaitcntBeforeBarrier", "true", "Hardware automatically inserts waitcnt before barrier" >; def FeatureCodeObjectV3 : SubtargetFeature < "code-object-v3", "CodeObjectV3", "true", "Generate code object version 3" >; // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", "Dummy feature to disable assembler instructions" >; def FeatureGCN : SubtargetFeature<"gcn", "IsGCN", "true", "GCN or newer GPU" >; class GCNSubtargetFeatureGeneration Implies> : SubtargetFeatureGeneration ; def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureLDSBankCount32, FeatureMovrel] >; def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel] >; def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, FeatureIntClamp ] >; def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, FeatureAddNoCarryInsts, FeatureScalarAtomics ] >; class SubtargetFeatureISAVersion Implies> : SubtargetFeature < "isaver"#Major#"."#Minor#"."#Stepping, "IsaVersion", "ISAVersion"#Major#"_"#Minor#"_"#Stepping, "Instruction set version number", Implies >; def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops, FeatureLDSBankCount32]>; def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, [FeatureSouthernIslands, FeatureLDSBankCount32]>; def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1, [FeatureSeaIslands, HalfRate64Ops, FeatureLDSBankCount32, FeatureFastFMAF32]>; def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, [FeatureSeaIslands, FeatureLDSBankCount16, FeatureFastFMAF32]>; def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, [FeatureSeaIslands, FeatureLDSBankCount16]>; def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4, [FeatureSeaIslands, FeatureLDSBankCount32]>; def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1, [FeatureVolcanicIslands, FeatureFastFMAF32, HalfRate64Ops, FeatureLDSBankCount32, FeatureXNACK, FeatureUnpackedD16VMem]>; def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2, [FeatureVolcanicIslands, FeatureLDSBankCount32, FeatureSGPRInitBug, FeatureUnpackedD16VMem]>; def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3, [FeatureVolcanicIslands, FeatureLDSBankCount32, FeatureUnpackedD16VMem]>; def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, [FeatureVolcanicIslands, FeatureLDSBankCount16, FeatureXNACK]>; def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, FeatureD16PreservesUnusedBits]>; def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32, FeatureXNACK, FeatureD16PreservesUnusedBits]>; def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4, [FeatureGFX9, FeatureLDSBankCount32, FeatureFmaMixInsts, FeatureD16PreservesUnusedBits]>; def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6, [FeatureGFX9, HalfRate64Ops, FeatureFmaMixInsts, FeatureLDSBankCount32, FeatureDLInsts]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. //===----------------------------------------------------------------------===// def FeatureDebuggerInsertNops : SubtargetFeature< "amdgpu-debugger-insert-nops", "DebuggerInsertNops", "true", "Insert one nop instruction for each high level source statement" >; def FeatureDebuggerEmitPrologue : SubtargetFeature< "amdgpu-debugger-emit-prologue", "DebuggerEmitPrologue", "true", "Emit debugger prologue" >; //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { let guessInstructionProperties = 1; let noNamedPositionallyEncodedOperands = 1; } def AMDGPUAsmParser : AsmParser { // Some of the R600 registers have the same name, so this crashes. // For example T0_XYZW and T0_XY both have the asm name T0. let ShouldEmitMatchRegisterName = 0; } def AMDGPUAsmWriter : AsmWriter { int PassSubtarget = 1; } def AMDGPUAsmVariants { string Default = "Default"; int Default_ID = 0; string VOP3 = "VOP3"; int VOP3_ID = 1; string SDWA = "SDWA"; int SDWA_ID = 2; string SDWA9 = "SDWA9"; int SDWA9_ID = 3; string DPP = "DPP"; int DPP_ID = 4; string Disable = "Disable"; int Disable_ID = 5; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.Default_ID; let Name = AMDGPUAsmVariants.Default; } def VOP3AsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.VOP3_ID; let Name = AMDGPUAsmVariants.VOP3; } def SDWAAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.SDWA_ID; let Name = AMDGPUAsmVariants.SDWA; } def SDWA9AsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.SDWA9_ID; let Name = AMDGPUAsmVariants.SDWA9; } def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; } def AMDGPU : Target { // Pull in Instruction Info: let InstructionSet = AMDGPUInstrInfo; let AssemblyParsers = [AMDGPUAsmParser]; let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, VOP3AsmParserVariant, SDWAAsmParserVariant, SDWA9AsmParserVariant, DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; let AllowRegisterRenaming = 1; } // Dummy Instruction itineraries for pseudo instructions def ALU_NULL : FuncUnit; def NullALU : InstrItinClass; //===----------------------------------------------------------------------===// // Predicate helper class //===----------------------------------------------------------------------===// def isSICI : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" >, AssemblerPredicate<"!FeatureGCN3Encoding">; def isVI : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, AssemblerPredicate<"FeatureGCN3Encoding">; def isGFX9 : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, AssemblerPredicate<"FeatureGFX9Insts">; // TODO: Either the name to be changed or we simply use IsCI! def isCIVI : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate<"FeatureCIInsts">; def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<"FeatureFlatAddressSpace">; def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, AssemblerPredicate<"FeatureFlatGlobalInsts">; def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, AssemblerPredicate<"FeatureFlatScratchInsts">; def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, AssemblerPredicate<"FeatureGFX9Insts">; def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, AssemblerPredicate<"FeatureUnpackedD16VMem">; def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, AssemblerPredicate<"!FeatureUnpackedD16VMem">; def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">, AssemblerPredicate<"FeatureD16PreservesUnusedBits">; def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, AssemblerPredicate<"FeatureGFX9Insts">; def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">, AssemblerPredicate<"FeatureAddNoCarryInsts">; def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">, AssemblerPredicate<"!FeatureAddNoCarryInsts">; def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<"Feature16BitInsts">; def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<"FeatureVOP3P">; def NotHasVOP3PInsts : Predicate<"!Subtarget->hasVOP3PInsts()">, AssemblerPredicate<"!FeatureVOP3P">; def HasSDWA : Predicate<"Subtarget->hasSDWA()">, AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, AssemblerPredicate<"FeatureSDWA,FeatureGFX9">; def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, AssemblerPredicate<"FeatureIntClamp">; def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, AssemblerPredicate<"FeatureMadMixInsts">; def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, AssemblerPredicate<"FeatureScalarAtomics">; def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, AssemblerPredicate<"FeatureVGPRIndexMode">; def HasMovrel : Predicate<"Subtarget->hasMovrel()">, AssemblerPredicate<"FeatureMovrel">; def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, AssemblerPredicate<"FeatureFmaMixInsts">; def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, AssemblerPredicate<"FeatureDLInsts">; def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" include "AMDGPUInstrInfo.td" include "AMDGPUIntrinsics.td" include "SIIntrinsics.td" include "AMDGPURegisterInfo.td" include "AMDGPURegisterBanks.td" include "AMDGPUInstructions.td" include "SIInstrInfo.td" include "AMDGPUCallingConv.td" include "AMDGPUSearchableTables.td" Index: projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td =================================================================== --- projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td (revision 337645) @@ -1,60 +1,69 @@ //===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// def FeatureFP64 : SubtargetFeature<"fp64", "FP64", "true", "Enable double precision operations" >; def FeatureFMA : SubtargetFeature<"fmaf", "FMA", "true", "Enable single precision FMA (not as fast as mul+add, but fused)" >; +// Some instructions do not support denormals despite this flag. Using +// fp32 denormals also causes instructions to run at the double +// precision rate for the device. +def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", + "FP32Denormals", + "true", + "Enable single precision denormal handling" +>; + class SubtargetFeatureLocalMemorySize : SubtargetFeature< "localmemorysize"#Value, "LocalMemorySize", !cast(Value), "The size of local memory in bytes" >; def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; class SubtargetFeatureWavefrontSize : SubtargetFeature< "wavefrontsize"#Value, "WavefrontSize", !cast(Value), "The number of threads per wavefront" >; def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; class SubtargetFeatureGeneration Implies> : SubtargetFeature ; def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp", "DX10Clamp", "true", "clamp modifier clamps NaNs to 0.0" >; def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", "EnablePromoteAlloca", "true", "Enable promote alloca pass" >; Index: projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp (revision 337645) @@ -1,2265 +1,2305 @@ //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// Custom DAG lowering for R600 // //===----------------------------------------------------------------------===// #include "R600ISelLowering.h" #include "AMDGPUFrameLowering.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include #include #include #include #include using namespace llvm; #include "R600GenCallingConv.inc" R600TargetLowering::R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI) : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); computeRegisterProperties(Subtarget->getRegisterInfo()); // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address // spaces, so it is custom lowered to handle those where it isn't. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); } // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setTruncStoreAction(MVT::i32, MVT::i8, Custom); setTruncStoreAction(MVT::i32, MVT::i16, Custom); // We need to include these since trunc STORES to PRIVATE need // special handling to accommodate RMW setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); // Set condition code actions setCondCodeAction(ISD::SETO, MVT::f32, Expand); setCondCodeAction(ISD::SETUO, MVT::f32, Expand); setCondCodeAction(ISD::SETLT, MVT::f32, Expand); setCondCodeAction(ISD::SETLE, MVT::f32, Expand); setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULE, MVT::f32, Expand); setCondCodeAction(ISD::SETLE, MVT::i32, Expand); setCondCodeAction(ISD::SETLT, MVT::i32, Expand); setCondCodeAction(ISD::SETULE, MVT::i32, Expand); setCondCodeAction(ISD::SETULT, MVT::i32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Custom); setOperationAction(ISD::FSIN, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); setOperationAction(ISD::SETCC, MVT::v2i32, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::FSUB, MVT::f32, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Custom); setOperationAction(ISD::FTRUNC, MVT::f64, Custom); setOperationAction(ISD::FRINT, MVT::f64, Custom); setOperationAction(ISD::FFLOOR, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::v2i32, Expand); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); // ADD, SUB overflow. // TODO: turn these into Legal? if (Subtarget->hasCARRY()) setOperationAction(ISD::UADDO, MVT::i32, Custom); if (Subtarget->hasBORROW()) setOperationAction(ISD::USUBO, MVT::i32, Custom); // Expand sign extension of vectors if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 // to be Legal/Custom in order to avoid library calls. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); if (!Subtarget->hasFMA()) { setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); } // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we // need it for R600. if (!Subtarget->hasFP32Denormals()) setOperationAction(ISD::FMAD, MVT::f32, Legal); if (!Subtarget->hasBFI()) { // fcopysign can be done in a single instruction with BFI. setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); } if (!Subtarget->hasBCNT(32)) setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasBCNT(64)) setOperationAction(ISD::CTPOP, MVT::i64, Expand); if (Subtarget->hasFFBH()) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); if (Subtarget->hasFFBL()) setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we // need it for R600. if (Subtarget->hasBFE()) setHasExtractBitsInsn(true); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { setOperationAction(ISD::ADDC, VT, Expand); setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::ADDE, VT, Expand); setOperationAction(ISD::SUBE, VT, Expand); } // LLVM will expand these to atomic_cmp_swap(0) // and atomic_swap, respectively. setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); // We need to custom lower some of the intrinsics setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setSchedulingPreference(Sched::Source); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::LOAD); } static inline bool isEOP(MachineBasicBlock::iterator I) { if (std::next(I) == I->getParent()->end()) return false; return std::next(I)->getOpcode() == R600::RETURN; } MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock::iterator I = MI; const R600InstrInfo *TII = Subtarget->getInstrInfo(); switch (MI.getOpcode()) { default: // Replace LDS_*_RET instruction that don't have any uses with the // equivalent LDS_*_NORET instruction. if (TII->isLDSRetInstr(MI.getOpcode())) { int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); assert(DstIdx != -1); MachineInstrBuilder NewMI; // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add // LDS_1A2D support and remove this special case. if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || MI.getOpcode() == R600::LDS_CMPST_RET) return BB; NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { NewMI.add(MI.getOperand(i)); } } else { return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } break; case R600::FABS_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( *BB, I, R600::MOV, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); TII->addFlag(*NewMI, 0, MO_FLAG_ABS); break; } case R600::FNEG_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( *BB, I, R600::MOV, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); TII->addFlag(*NewMI, 0, MO_FLAG_NEG); break; } case R600::MASK_WRITE: { unsigned maskedRegister = MI.getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); TII->addFlag(*defInstr, 0, MO_FLAG_MASK); break; } case R600::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) .getFPImm() ->getValueAPF() .bitcastToAPInt() .getZExtValue()); break; case R600::MOV_IMM_I32: TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); break; case R600::MOV_IMM_GLOBAL_ADDR: { //TODO: Perhaps combine this instruction with the next if possible auto MIB = TII->buildDefaultInstruction( *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); //TODO: Ugh this is rather ugly MIB->getOperand(Idx) = MI.getOperand(1); break; } case R600::CONST_COPY: { MachineInstr *NewMI = TII->buildDefaultInstruction( *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); TII->setImmOperand(*NewMI, R600::OpName::src0_sel, MI.getOperand(1).getImm()); break; } case R600::RAT_WRITE_CACHELESS_32_eg: case R600::RAT_WRITE_CACHELESS_64_eg: case R600::RAT_WRITE_CACHELESS_128_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) .addImm(isEOP(I)); // Set End of program bit break; case R600::RAT_STORE_TYPED_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) .add(MI.getOperand(2)) .addImm(isEOP(I)); // Set End of program bit break; case R600::BRANCH: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) .add(MI.getOperand(0)); break; case R600::BRANCH_COND_f32: { MachineInstr *NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), R600::PREDICATE_BIT) .add(MI.getOperand(1)) .addImm(R600::PRED_SETNE) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) .add(MI.getOperand(0)) .addReg(R600::PREDICATE_BIT, RegState::Kill); break; } case R600::BRANCH_COND_i32: { MachineInstr *NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), R600::PREDICATE_BIT) .add(MI.getOperand(1)) .addImm(R600::PRED_SETNE_INT) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) .add(MI.getOperand(0)) .addReg(R600::PREDICATE_BIT, RegState::Kill); break; } case R600::EG_ExportSwz: case R600::R600_ExportSwz: { // Instruction is left unmodified if its not the last one of its type bool isLastInstructionOfItsType = true; unsigned InstExportType = MI.getOperand(1).getImm(); for (MachineBasicBlock::iterator NextExportInst = std::next(I), EndBlock = BB->end(); NextExportInst != EndBlock; NextExportInst = std::next(NextExportInst)) { if (NextExportInst->getOpcode() == R600::EG_ExportSwz || NextExportInst->getOpcode() == R600::R600_ExportSwz) { unsigned CurrentInstExportType = NextExportInst->getOperand(1) .getImm(); if (CurrentInstExportType == InstExportType) { isLastInstructionOfItsType = false; break; } } } bool EOP = isEOP(I); if (!EOP && !isLastInstructionOfItsType) return BB; unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) .add(MI.getOperand(2)) .add(MI.getOperand(3)) .add(MI.getOperand(4)) .add(MI.getOperand(5)) .add(MI.getOperand(6)) .addImm(CfInst) .addImm(EOP); break; } case R600::RETURN: { return BB; } } MI.eraseFromParent(); return BB; } //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); R600MachineFunctionInfo *MFI = MF.getInfo(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); case ISD::FCOS: case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: { SDValue Result = LowerLOAD(Op, DAG); assert((!Result.getNode() || Result.getNode()->getNumValues() == 2) && "Load should return a value and a chain"); return Result; } case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::FrameIndex: return lowerFrameIndex(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { case Intrinsic::r600_store_swizzle: { SDLoc DL(Op); const SDValue Args[8] = { Chain, Op.getOperand(2), // Export Value Op.getOperand(3), // ArrayBase Op.getOperand(4), // Type DAG.getConstant(0, DL, MVT::i32), // SWZ_X DAG.getConstant(1, DL, MVT::i32), // SWZ_Y DAG.getConstant(2, DL, MVT::i32), // SWZ_Z DAG.getConstant(3, DL, MVT::i32) // SWZ_W }; return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); } // default for switch(IntrinsicID) default: break; } // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) break; } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); EVT VT = Op.getValueType(); SDLoc DL(Op); switch (IntrinsicID) { case Intrinsic::r600_tex: case Intrinsic::r600_texc: { unsigned TextureOp; switch (IntrinsicID) { case Intrinsic::r600_tex: TextureOp = 0; break; case Intrinsic::r600_texc: TextureOp = 1; break; default: llvm_unreachable("unhandled texture operation"); } SDValue TexArgs[19] = { DAG.getConstant(TextureOp, DL, MVT::i32), Op.getOperand(1), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(1, DL, MVT::i32), DAG.getConstant(2, DL, MVT::i32), DAG.getConstant(3, DL, MVT::i32), Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(1, DL, MVT::i32), DAG.getConstant(2, DL, MVT::i32), DAG.getConstant(3, DL, MVT::i32), Op.getOperand(5), Op.getOperand(6), Op.getOperand(7), Op.getOperand(8), Op.getOperand(9), Op.getOperand(10) }; return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); } case Intrinsic::r600_dot4: { SDValue Args[8] = { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(0, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(0, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(1, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(1, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(2, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(2, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(3, DL, MVT::i32)), DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(3, DL, MVT::i32)) }; return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); } case Intrinsic::r600_implicitarg_ptr: { MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } case Intrinsic::r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); case Intrinsic::r600_read_ngroups_y: return LowerImplicitParameter(DAG, VT, DL, 1); case Intrinsic::r600_read_ngroups_z: return LowerImplicitParameter(DAG, VT, DL, 2); case Intrinsic::r600_read_global_size_x: return LowerImplicitParameter(DAG, VT, DL, 3); case Intrinsic::r600_read_global_size_y: return LowerImplicitParameter(DAG, VT, DL, 4); case Intrinsic::r600_read_global_size_z: return LowerImplicitParameter(DAG, VT, DL, 5); case Intrinsic::r600_read_local_size_x: return LowerImplicitParameter(DAG, VT, DL, 6); case Intrinsic::r600_read_local_size_y: return LowerImplicitParameter(DAG, VT, DL, 7); case Intrinsic::r600_read_local_size_z: return LowerImplicitParameter(DAG, VT, DL, 8); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T1_X, VT); case Intrinsic::r600_read_tgid_y: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T1_Y, VT); case Intrinsic::r600_read_tgid_z: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T1_Z, VT); case Intrinsic::r600_read_tidig_x: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T0_X, VT); case Intrinsic::r600_read_tidig_y: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T0_Y, VT); case Intrinsic::r600_read_tidig_z: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T0_Z, VT); case Intrinsic::r600_recipsqrt_ieee: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); case Intrinsic::r600_recipsqrt_clamped: return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); default: return Op; } // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) break; } } // end switch(Op.getOpcode()) return SDValue(); } void R600TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { default: AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); return; case ISD::FP_TO_UINT: if (N->getValueType(0) == MVT::i1) { Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); return; } // Since we don't care about out of bounds values we can use FP_TO_SINT for // uints too. The DAGLegalizer code for uint considers some extra cases // which are not necessary here. LLVM_FALLTHROUGH; case ISD::FP_TO_SINT: { if (N->getValueType(0) == MVT::i1) { Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); return; } SDValue Result; if (expandFP_TO_SINT(N, Result, DAG)) Results.push_back(Result); return; } case ISD::SDIVREM: { SDValue Op = SDValue(N, 1); SDValue RES = LowerSDIVREM(Op, DAG); Results.push_back(RES); Results.push_back(RES.getValue(1)); break; } case ISD::UDIVREM: { SDValue Op = SDValue(N, 0); LowerUDIVREM64(Op, DAG, Results); break; } } } SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const { SDLoc DL(Vector); EVT VecVT = Vector.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SmallVector Args; for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) { Args.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout())))); } return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); } SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Vector = Op.getOperand(0); SDValue Index = Op.getOperand(1); if (isa(Index) || Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) return Op; Vector = vectorToVerticalVector(DAG, Vector); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), Vector, Index); } SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Vector = Op.getOperand(0); SDValue Value = Op.getOperand(1); SDValue Index = Op.getOperand(2); if (isa(Index) || Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) return Op; Vector = vectorToVerticalVector(DAG, Vector); SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), Vector, Value, Index); return vectorToVerticalVector(DAG, Insert); } SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); } SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { // On hw >= R700, COS/SIN input must be between -1. and 1. // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) EVT VT = Op.getValueType(); SDValue Arg = Op.getOperand(0); SDLoc DL(Op); // TODO: Should this propagate fast-math-flags? SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, DAG.getNode(ISD::FADD, DL, VT, DAG.getNode(ISD::FMUL, DL, VT, Arg, DAG.getConstantFP(0.15915494309, DL, MVT::f32)), DAG.getConstantFP(0.5, DL, MVT::f32))); unsigned TrigNode; switch (Op.getOpcode()) { case ISD::FCOS: TrigNode = AMDGPUISD::COS_HW; break; case ISD::FSIN: TrigNode = AMDGPUISD::SIN_HW; break; default: llvm_unreachable("Wrong trig opcode"); } SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, DAG.getNode(ISD::FADD, DL, VT, FractPart, DAG.getConstantFP(-0.5, DL, MVT::f32))); if (Gen >= AMDGPUSubtarget::R700) return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, DAG.getConstantFP(3.14159265359, DL, MVT::f32)); } SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Shift = Op.getOperand(2); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue One = DAG.getConstant(1, DL, VT); SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); // The dance around Width1 is necessary for 0 special case. // Without it the CompShift might be 32, producing incorrect results in // Overflow. So we do the shift in two steps, the alternative is to // add a conditional to filter the special case. SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); SDValue LoBig = Zero; Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); } SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Shift = Op.getOperand(2); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue One = DAG.getConstant(1, DL, VT); const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); // The dance around Width1 is necessary for 0 special case. // Without it the CompShift might be 32, producing incorrect results in // Overflow. So we do the shift in two steps, the alternative is to // add a conditional to filter the special case. SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift); SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift); SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero; Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); } SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, unsigned mainop, unsigned ovf) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); // Extend sign. OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, DAG.getValueType(MVT::i1)); SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); } SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode( ISD::SETCC, DL, MVT::i1, Op, DAG.getConstantFP(1.0f, DL, MVT::f32), DAG.getCondCode(ISD::SETEQ)); } SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode( ISD::SETCC, DL, MVT::i1, Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), DAG.getCondCode(ISD::SETEQ)); } SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, const SDLoc &DL, unsigned DwordOffset) const { unsigned ByteOffset = DwordOffset * 4; PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUASI.CONSTANT_BUFFER_0); + AMDGPUASI.PARAM_I_ADDRESS); // We shouldn't be using an offset wider than 16-bits for implicit parameters. assert(isInt<16>(ByteOffset)); return DAG.getLoad(VT, DL, DAG.getEntryNode(), DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR MachinePointerInfo(ConstantPointerNull::get(PtrType))); } bool R600TargetLowering::isZero(SDValue Op) const { if(ConstantSDNode *Cst = dyn_cast(Op)) { return Cst->isNullValue(); } else if(ConstantFPSDNode *CstFP = dyn_cast(Op)){ return CstFP->isZero(); } else { return false; } } bool R600TargetLowering::isHWTrueValue(SDValue Op) const { if (ConstantFPSDNode * CFP = dyn_cast(Op)) { return CFP->isExactlyValue(1.0); } return isAllOnesConstant(Op); } bool R600TargetLowering::isHWFalseValue(SDValue Op) const { if (ConstantFPSDNode * CFP = dyn_cast(Op)) { return CFP->getValueAPF().isZero(); } return isNullConstant(Op); } SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue True = Op.getOperand(2); SDValue False = Op.getOperand(3); SDValue CC = Op.getOperand(4); SDValue Temp; if (VT == MVT::f32) { DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); if (MinMax) return MinMax; } // LHS and RHS are guaranteed to be the same value type EVT CompareVT = LHS.getValueType(); // Check if we can lower this to a native operation. // Try to lower to a SET* instruction: // // SET* can match the following patterns: // // select_cc f32, f32, -1, 0, cc_supported // select_cc f32, f32, 1.0f, 0.0f, cc_supported // select_cc i32, i32, -1, 0, cc_supported // // Move hardware True/False values to the correct operand. ISD::CondCode CCOpcode = cast(CC)->get(); ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); if (isHWTrueValue(False) && isHWFalseValue(True)) { if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) { std::swap(False, True); CC = DAG.getCondCode(InverseCC); } else { ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC); if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) { std::swap(False, True); std::swap(LHS, RHS); CC = DAG.getCondCode(SwapInvCC); } } } if (isHWTrueValue(True) && isHWFalseValue(False) && (CompareVT == VT || VT == MVT::i32)) { // This can be matched by a SET* instruction. return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); } // Try to lower to a CND* instruction: // // CND* can match the following patterns: // // select_cc f32, 0.0, f32, f32, cc_supported // select_cc f32, 0.0, i32, i32, cc_supported // select_cc i32, 0, f32, f32, cc_supported // select_cc i32, 0, i32, i32, cc_supported // // Try to move the zero value to the RHS if (isZero(LHS)) { ISD::CondCode CCOpcode = cast(CC)->get(); // Try swapping the operands ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode); if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { std::swap(LHS, RHS); CC = DAG.getCondCode(CCSwapped); } else { // Try inverting the conditon and then swapping the operands ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger()); CCSwapped = ISD::getSetCCSwappedOperands(CCInv); if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { std::swap(True, False); std::swap(LHS, RHS); CC = DAG.getCondCode(CCSwapped); } } } if (isZero(RHS)) { SDValue Cond = LHS; SDValue Zero = RHS; ISD::CondCode CCOpcode = cast(CC)->get(); if (CompareVT != VT) { // Bitcast True / False to the correct types. This will end up being // a nop, but it allows us to define only a single pattern in the // .TD files for each CND* instruction rather than having to have // one pattern for integer True/False and one for fp True/False True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); } switch (CCOpcode) { case ISD::SETONE: case ISD::SETUNE: case ISD::SETNE: CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); Temp = True; True = False; False = Temp; break; default: break; } SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, Cond, Zero, True, False, DAG.getCondCode(CCOpcode)); return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); } // If we make it this for it means we have no native instructions to handle // this SELECT_CC, so we must lower it. SDValue HWTrue, HWFalse; if (CompareVT == MVT::f32) { HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); } else if (CompareVT == MVT::i32) { HWTrue = DAG.getConstant(-1, DL, CompareVT); HWFalse = DAG.getConstant(0, DL, CompareVT); } else { llvm_unreachable("Unhandled value type in LowerSELECT_CC"); } // Lower this unsupported SELECT_CC into a combination of two supported // SELECT_CC operations. SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond, HWFalse, True, False, DAG.getCondCode(ISD::SETNE)); } /// LLVM generates byte-addressed pointers. For indirect addressing, we need to /// convert these pointers to a register index. Each register holds /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used /// for indirect addressing. SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const { unsigned SRLPad; switch(StackWidth) { case 1: SRLPad = 2; break; case 2: SRLPad = 3; break; case 4: SRLPad = 4; break; default: llvm_unreachable("Invalid stack width"); } SDLoc DL(Ptr); return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, DAG.getConstant(SRLPad, DL, MVT::i32)); } void R600TargetLowering::getStackAddress(unsigned StackWidth, unsigned ElemIdx, unsigned &Channel, unsigned &PtrIncr) const { switch (StackWidth) { default: case 1: Channel = 0; if (ElemIdx > 0) { PtrIncr = 1; } else { PtrIncr = 0; } break; case 2: Channel = ElemIdx % 2; if (ElemIdx == 2) { PtrIncr = 1; } else { PtrIncr = 0; } break; case 4: Channel = ElemIdx; PtrIncr = 0; break; } } SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const { SDLoc DL(Store); //TODO: Who creates the i8 stores? assert(Store->isTruncatingStore() || Store->getValue().getValueType() == MVT::i8); assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS); SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { assert(Store->getAlignment() >= 1); Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { assert(Store->getAlignment() >= 2); Mask = DAG.getConstant(0xffff, DL, MVT::i32); } else { llvm_unreachable("Unsupported private trunc store"); } SDValue OldChain = Store->getChain(); bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); // Skip dummy SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; SDValue BasePtr = Store->getBasePtr(); SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); SDValue LoadPtr = BasePtr; if (!Offset.isUndef()) { LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); } // Get dword location // TODO: this should be eliminated by the future SHR ptr, 2 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, DAG.getConstant(0xfffffffc, DL, MVT::i32)); // Load dword // TODO: can we be smarter about machine pointer info? MachinePointerInfo PtrInfo(UndefValue::get( Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))); SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); Chain = Dst.getValue(1); // Get offset in dword SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); // Convert byte offset to bit shift SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); // TODO: Contrary to the name of the functiom, // it also handles sub i32 non-truncating stores (like i1) SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Store->getValue()); // Mask the value to the right type SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); // Shift the value in place SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, MaskedValue, ShiftAmt); // Shift the mask in place SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); // Invert the mask. NOTE: if we had native ROL instructions we could // use inverted mask DstMask = DAG.getNOT(DL, DstMask, MVT::i32); // Cleanup the target bits Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); // Add the new bits SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); // Store dword // TODO: Can we be smarter about MachinePointerInfo? SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo); // If we are part of expanded vector, make our neighbors depend on this store if (VectorTrunc) { // Make all other vector elements depend on this store Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); } return NewStore; } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *StoreNode = cast(Op); unsigned AS = StoreNode->getAddressSpace(); SDValue Chain = StoreNode->getChain(); SDValue Ptr = StoreNode->getBasePtr(); SDValue Value = StoreNode->getValue(); EVT VT = Value.getValueType(); EVT MemVT = StoreNode->getMemoryVT(); EVT PtrVT = Ptr.getValueType(); SDLoc DL(Op); // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { if ((AS == AMDGPUASI.PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { // Add an extra level of chain to isolate this vector SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); // TODO: can the chain be replaced without creating a new store? SDValue NewStore = DAG.getTruncStore( NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT, StoreNode->getAlignment(), StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); StoreNode = cast(NewStore); } return scalarizeVectorStore(StoreNode, DAG); } unsigned Align = StoreNode->getAlignment(); if (Align < MemVT.getStoreSize() && !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) { return expandUnalignedStore(StoreNode, DAG); } SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, DAG.getConstant(2, DL, PtrVT)); if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { assert(VT.bitsLE(MVT::i32)); SDValue MaskConstant; if (MemVT == MVT::i8) { MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32); } else { assert(MemVT == MVT::i16); assert(StoreNode->getAlignment() >= 2); MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); } SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, DAG.getConstant(0x00000003, DL, PtrVT)); SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, DAG.getConstant(3, DL, VT)); // Put the mask in correct place SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); // Put the value bits in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 // vector instead. SDValue Src[4] = { ShiftedValue, DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), Mask }; SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src); SDValue Args[3] = { Chain, Input, DWordAddr }; return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, Op->getVTList(), Args, MemVT, StoreNode->getMemOperand()); } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) { // Convert pointer from byte address to dword address. Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { llvm_unreachable("Truncated and indexed stores not supported yet"); } else { Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); } return Chain; } } // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes if (AS != AMDGPUASI.PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) return lowerPrivateTruncStore(StoreNode, DAG); // Standard i32+ store, tag it with DWORDADDR to note that the address // has been shifted if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); } // Tagged i32+ stores will be matched by patterns return SDValue(); } // return (512 + (kc_bank << 12) static int ConstantAddressBlock(unsigned AddressSpace) { switch (AddressSpace) { case AMDGPUAS::CONSTANT_BUFFER_0: return 512; case AMDGPUAS::CONSTANT_BUFFER_1: return 512 + 4096; case AMDGPUAS::CONSTANT_BUFFER_2: return 512 + 4096 * 2; case AMDGPUAS::CONSTANT_BUFFER_3: return 512 + 4096 * 3; case AMDGPUAS::CONSTANT_BUFFER_4: return 512 + 4096 * 4; case AMDGPUAS::CONSTANT_BUFFER_5: return 512 + 4096 * 5; case AMDGPUAS::CONSTANT_BUFFER_6: return 512 + 4096 * 6; case AMDGPUAS::CONSTANT_BUFFER_7: return 512 + 4096 * 7; case AMDGPUAS::CONSTANT_BUFFER_8: return 512 + 4096 * 8; case AMDGPUAS::CONSTANT_BUFFER_9: return 512 + 4096 * 9; case AMDGPUAS::CONSTANT_BUFFER_10: return 512 + 4096 * 10; case AMDGPUAS::CONSTANT_BUFFER_11: return 512 + 4096 * 11; case AMDGPUAS::CONSTANT_BUFFER_12: return 512 + 4096 * 12; case AMDGPUAS::CONSTANT_BUFFER_13: return 512 + 4096 * 13; case AMDGPUAS::CONSTANT_BUFFER_14: return 512 + 4096 * 14; case AMDGPUAS::CONSTANT_BUFFER_15: return 512 + 4096 * 15; default: return -1; } } SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast(Op); ISD::LoadExtType ExtType = Load->getExtensionType(); EVT MemVT = Load->getMemoryVT(); assert(Load->getAlignment() >= MemVT.getStoreSize()); SDValue BasePtr = Load->getBasePtr(); SDValue Chain = Load->getChain(); SDValue Offset = Load->getOffset(); SDValue LoadPtr = BasePtr; if (!Offset.isUndef()) { LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); } // Get dword location // NOTE: this should be eliminated by the future SHR ptr, 2 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, DAG.getConstant(0xfffffffc, DL, MVT::i32)); // Load dword // TODO: can we be smarter about machine pointer info? MachinePointerInfo PtrInfo(UndefValue::get( Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))); SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); // Get offset within the register. SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); // Bit offset of target byte (byteIdx * 8). SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); // Shift to the right. SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); // Eliminate the upper bits by setting them to ... EVT MemEltVT = MemVT.getScalarType(); if (ExtType == ISD::SEXTLOAD) { // ... ones. SDValue MemEltVTNode = DAG.getValueType(MemEltVT); Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); } else { // ... or zeros. Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); } SDValue Ops[] = { Ret, Read.getValue(1) // This should be our output chain }; return DAG.getMergeValues(Ops, DL); } SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LoadNode = cast(Op); unsigned AS = LoadNode->getAddressSpace(); EVT MemVT = LoadNode->getMemoryVT(); ISD::LoadExtType ExtType = LoadNode->getExtensionType(); if (AS == AMDGPUASI.PRIVATE_ADDRESS && ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { return lowerPrivateExtLoad(Op, DAG); } SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { return scalarizeVectorLoad(LoadNode, DAG); } + // This is still used for explicit load from addrspace(8) int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); if (ConstantBlock > -1 && ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { SDValue Result; - if (isa(LoadNode->getMemOperand()->getValue()) || - isa(LoadNode->getMemOperand()->getValue()) || + if (isa(LoadNode->getMemOperand()->getValue()) || isa(Ptr)) { - SDValue Slots[4]; - for (unsigned i = 0; i < 4; i++) { - // We want Const position encoded with the following formula : - // (((512 + (kc_bank << 12) + const_index) << 2) + chan) - // const_index is Ptr computed by llvm using an alignment of 16. - // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and - // then div by 4 at the ISel step - SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32)); - Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); - } - EVT NewVT = MVT::v4i32; - unsigned NumElements = 4; - if (VT.isVector()) { - NewVT = VT; - NumElements = VT.getVectorNumElements(); - } - Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements)); + return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG); } else { + //TODO: Does this even work? // non-constant ptr can't be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, DL, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32) ); } if (!VT.isVector()) { Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, DAG.getConstant(0, DL, MVT::i32)); } SDValue MergedValues[2] = { Result, Chain }; return DAG.getMergeValues(MergedValues, DL); } // For most operations returning SDValue() will result in the node being // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we // need to manually expand loads that may be legal in some address spaces and // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for // compute shaders, since the data is sign extended when it is uploaded to the // buffer. However SEXT loads from other address spaces are not supported, so // we need to expand them here. if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { EVT MemVT = LoadNode->getMemoryVT(); assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); SDValue NewLoad = DAG.getExtLoad( ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT, LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags()); SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad, DAG.getValueType(MemVT)); SDValue MergedValues[2] = { Res, Chain }; return DAG.getMergeValues(MergedValues, DL); } if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) { return SDValue(); } // DWORDADDR ISD marks already shifted address if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { assert(VT == MVT::i32); Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); } return SDValue(); } SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); SDValue Jump = Op.getOperand(2); return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), Chain, Jump, Cond); } SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const R600FrameLowering *TFL = Subtarget->getFrameLowering(); FrameIndexSDNode *FIN = cast(Op); unsigned FrameIndex = FIN->getIndex(); unsigned IgnoredFrameReg; unsigned Offset = TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op), Op.getValueType()); } CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const { switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_LS: return CC_R600; default: report_fatal_error("Unsupported calling convention."); } } /// XXX Only kernel functions are supported, so we can assume for now that /// every function is a kernel function, but in the future we should use /// separate calling conventions for kernel and non-kernel functions. SDValue R600TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); MachineFunction &MF = DAG.getMachineFunction(); SmallVector LocalIns; if (AMDGPU::isShader(CallConv)) { CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); } else { analyzeFormalArgumentsCompute(CCInfo, Ins); } for (unsigned i = 0, e = Ins.size(); i < e; ++i) { CCValAssign &VA = ArgLocs[i]; const ISD::InputArg &In = Ins[i]; EVT VT = In.VT; EVT MemVT = VA.getLocVT(); if (!VT.isVector() && MemVT.isVector()) { // Get load source type if scalarized. MemVT = MemVT.getVectorElementType(); } if (AMDGPU::isShader(CallConv)) { unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); InVals.push_back(Register); continue; } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUASI.CONSTANT_BUFFER_0); + AMDGPUASI.PARAM_I_ADDRESS); // i64 isn't a legal type, so the register type used ends up as i32, which // isn't expected here. It attempts to create this sextload, but it ends up // being invalid. Somehow this seems to work with i64 arguments, but breaks // for <1 x i64>. // The first 36 bytes of the input buffer contains information about // thread group and global sizes. ISD::LoadExtType Ext = ISD::NON_EXTLOAD; if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) { // FIXME: This should really check the extload type, but the handling of // extload vector parameters seems to be broken. // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; Ext = ISD::SEXTLOAD; } // Compute the offset from the value. // XXX - I think PartOffset should give you this, but it seems to give the // size of the register which isn't useful. unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); + unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( ISD::UNINDEXED, Ext, VT, DL, Chain, DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo, - MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal | + MemVT, Alignment, MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); - // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); } return Chain; } EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); } bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const { // Local and Private addresses do not handle vectors. Limit to i32 if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { return (MemVT.getSizeInBits() <= 32); } return true; } bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, bool *IsFast) const { if (IsFast) *IsFast = false; if (!VT.isSimple() || VT == MVT::Other) return false; if (VT.bitsLT(MVT::i32)) return false; // TODO: This is a rough estimate. if (IsFast) *IsFast = true; return VT.bitsGT(MVT::i32) && Align % 4 == 0; } static SDValue CompactSwizzlableVector( SelectionDAG &DAG, SDValue VectorEntry, DenseMap &RemapSwizzle) { assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); assert(RemapSwizzle.empty()); SDValue NewBldVec[4] = { VectorEntry.getOperand(0), VectorEntry.getOperand(1), VectorEntry.getOperand(2), VectorEntry.getOperand(3) }; for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].isUndef()) // We mask write here to teach later passes that the ith element of this // vector is undef. Thus we can use it to reduce 128 bits reg usage, // break false dependencies and additionnaly make assembly easier to read. RemapSwizzle[i] = 7; // SEL_MASK_WRITE if (ConstantFPSDNode *C = dyn_cast(NewBldVec[i])) { if (C->isZero()) { RemapSwizzle[i] = 4; // SEL_0 NewBldVec[i] = DAG.getUNDEF(MVT::f32); } else if (C->isExactlyValue(1.0)) { RemapSwizzle[i] = 5; // SEL_1 NewBldVec[i] = DAG.getUNDEF(MVT::f32); } } if (NewBldVec[i].isUndef()) continue; for (unsigned j = 0; j < i; j++) { if (NewBldVec[i] == NewBldVec[j]) { NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); RemapSwizzle[i] = j; break; } } } return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), NewBldVec); } static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap &RemapSwizzle) { assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); assert(RemapSwizzle.empty()); SDValue NewBldVec[4] = { VectorEntry.getOperand(0), VectorEntry.getOperand(1), VectorEntry.getOperand(2), VectorEntry.getOperand(3) }; bool isUnmovable[4] = { false, false, false, false }; for (unsigned i = 0; i < 4; i++) { RemapSwizzle[i] = i; if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { unsigned Idx = dyn_cast(NewBldVec[i].getOperand(1)) ->getZExtValue(); if (i == Idx) isUnmovable[Idx] = true; } } for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { unsigned Idx = dyn_cast(NewBldVec[i].getOperand(1)) ->getZExtValue(); if (isUnmovable[Idx]) continue; // Swap i and Idx std::swap(NewBldVec[Idx], NewBldVec[i]); std::swap(RemapSwizzle[i], RemapSwizzle[Idx]); break; } } return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), NewBldVec); } SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4], SelectionDAG &DAG, const SDLoc &DL) const { assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR); // Old -> New swizzle values DenseMap SwizzleRemap; BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { unsigned Idx = cast(Swz[i])->getZExtValue(); if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); } SwizzleRemap.clear(); BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { unsigned Idx = cast(Swz[i])->getZExtValue(); if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); } return BuildVector; } +SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block, + SelectionDAG &DAG) const { + SDLoc DL(LoadNode); + EVT VT = LoadNode->getValueType(0); + SDValue Chain = LoadNode->getChain(); + SDValue Ptr = LoadNode->getBasePtr(); + assert (isa(Ptr)); + + //TODO: Support smaller loads + if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode)) + return SDValue(); + + if (LoadNode->getAlignment() < 4) + return SDValue(); + + int ConstantBlock = ConstantAddressBlock(Block); + + SDValue Slots[4]; + for (unsigned i = 0; i < 4; i++) { + // We want Const position encoded with the following formula : + // (((512 + (kc_bank << 12) + const_index) << 2) + chan) + // const_index is Ptr computed by llvm using an alignment of 16. + // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and + // then div by 4 at the ISel step + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32)); + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); + } + EVT NewVT = MVT::v4i32; + unsigned NumElements = 4; + if (VT.isVector()) { + NewVT = VT; + NumElements = VT.getVectorNumElements(); + } + SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements)); + if (!VT.isVector()) { + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, + DAG.getConstant(0, DL, MVT::i32)); + } + SDValue MergedValues[2] = { + Result, + Chain + }; + return DAG.getMergeValues(MergedValues, DL); +} + //===----------------------------------------------------------------------===// // Custom DAG Optimizations //===----------------------------------------------------------------------===// SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); switch (N->getOpcode()) { // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) case ISD::FP_ROUND: { SDValue Arg = N->getOperand(0); if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0), Arg.getOperand(0)); } break; } // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> // (i32 select_cc f32, f32, -1, 0 cc) // // Mesa's GLSL frontend generates the above pattern a lot and we can lower // this to one of the SET*_DX10 instructions. case ISD::FP_TO_SINT: { SDValue FNeg = N->getOperand(0); if (FNeg.getOpcode() != ISD::FNEG) { return SDValue(); } SDValue SelectCC = FNeg.getOperand(0); if (SelectCC.getOpcode() != ISD::SELECT_CC || SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS SelectCC.getOperand(2).getValueType() != MVT::f32 || // True !isHWTrueValue(SelectCC.getOperand(2)) || !isHWFalseValue(SelectCC.getOperand(3))) { return SDValue(); } return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), SelectCC.getOperand(0), // LHS SelectCC.getOperand(1), // RHS DAG.getConstant(-1, DL, MVT::i32), // True DAG.getConstant(0, DL, MVT::i32), // False SelectCC.getOperand(4)); // CC break; } // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx // => build_vector elt0, ... , NewEltIdx, ... , eltN case ISD::INSERT_VECTOR_ELT: { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); // If the inserted element is an UNDEF, just use the input vector. if (InVal.isUndef()) return InVec; EVT VT = InVec.getValueType(); // If we can't generate a legal BUILD_VECTOR, exit if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) return SDValue(); // Check that we know which element is being inserted if (!isa(EltNo)) return SDValue(); unsigned Elt = cast(EltNo)->getZExtValue(); // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector Ops; if (InVec.getOpcode() == ISD::BUILD_VECTOR) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.isUndef()) { unsigned NElts = VT.getVectorNumElements(); Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); } else { return SDValue(); } // Insert the element if (Elt < Ops.size()) { // All the operands of BUILD_VECTOR must have the same type; // we enforce that here. EVT OpVT = Ops[0].getValueType(); if (InVal.getValueType() != OpVT) InVal = OpVT.bitsGT(InVal.getValueType()) ? DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); Ops[Elt] = InVal; } // Return the new vector return DAG.getBuildVector(VT, DL, Ops); } // Extract_vec (Build_vector) generated by custom lowering // also needs to be customly combined case ISD::EXTRACT_VECTOR_ELT: { SDValue Arg = N->getOperand(0); if (Arg.getOpcode() == ISD::BUILD_VECTOR) { if (ConstantSDNode *Const = dyn_cast(N->getOperand(1))) { unsigned Element = Const->getZExtValue(); return Arg->getOperand(Element); } } if (Arg.getOpcode() == ISD::BITCAST && Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && (Arg.getOperand(0).getValueType().getVectorNumElements() == Arg.getValueType().getVectorNumElements())) { if (ConstantSDNode *Const = dyn_cast(N->getOperand(1))) { unsigned Element = Const->getZExtValue(); return DAG.getNode(ISD::BITCAST, DL, N->getVTList(), Arg->getOperand(0).getOperand(Element)); } } break; } case ISD::SELECT_CC: { // Try common optimizations if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI)) return Ret; // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> // selectcc x, y, a, b, inv(cc) // // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> // selectcc x, y, a, b, cc SDValue LHS = N->getOperand(0); if (LHS.getOpcode() != ISD::SELECT_CC) { return SDValue(); } SDValue RHS = N->getOperand(1); SDValue True = N->getOperand(2); SDValue False = N->getOperand(3); ISD::CondCode NCC = cast(N->getOperand(4))->get(); if (LHS.getOperand(2).getNode() != True.getNode() || LHS.getOperand(3).getNode() != False.getNode() || RHS.getNode() != False.getNode()) { return SDValue(); } switch (NCC) { default: return SDValue(); case ISD::SETNE: return LHS; case ISD::SETEQ: { ISD::CondCode LHSCC = cast(LHS.getOperand(4))->get(); LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType().isInteger()); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType())) return DAG.getSelectCC(DL, LHS.getOperand(0), LHS.getOperand(1), LHS.getOperand(2), LHS.getOperand(3), LHSCC); break; } } return SDValue(); } case AMDGPUISD::R600_EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; SDValue NewArgs[8] = { N->getOperand(0), // Chain SDValue(), N->getOperand(2), // ArrayBase N->getOperand(3), // Type N->getOperand(4), // SWZ_X N->getOperand(5), // SWZ_Y N->getOperand(6), // SWZ_Z N->getOperand(7) // SWZ_W }; NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); } case AMDGPUISD::TEXTURE_FETCH: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; SDValue NewArgs[19] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), N->getOperand(5), N->getOperand(6), N->getOperand(7), N->getOperand(8), N->getOperand(9), N->getOperand(10), N->getOperand(11), N->getOperand(12), N->getOperand(13), N->getOperand(14), N->getOperand(15), N->getOperand(16), N->getOperand(17), N->getOperand(18), }; NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL); return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs); } + + case ISD::LOAD: { + LoadSDNode *LoadNode = cast(N); + SDValue Ptr = LoadNode->getBasePtr(); + if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS && + isa(Ptr)) + return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG); + break; + } + default: break; } return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) const { const R600InstrInfo *TII = Subtarget->getInstrInfo(); if (!Src.isMachineOpcode()) return false; switch (Src.getMachineOpcode()) { case R600::FNEG_R600: if (!Neg.getNode()) return false; Src = Src.getOperand(0); Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); return true; case R600::FABS_R600: if (!Abs.getNode()) return false; Src = Src.getOperand(0); Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); return true; case R600::CONST_COPY: { unsigned Opcode = ParentNode->getMachineOpcode(); bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; if (!Sel.getNode()) return false; SDValue CstOffset = Src.getOperand(0); if (ParentNode->getValueType(0).isVector()) return false; // Gather constants values int SrcIndices[] = { TII->getOperandIdx(Opcode, R600::OpName::src0), TII->getOperandIdx(Opcode, R600::OpName::src1), TII->getOperandIdx(Opcode, R600::OpName::src2), TII->getOperandIdx(Opcode, R600::OpName::src0_X), TII->getOperandIdx(Opcode, R600::OpName::src0_Y), TII->getOperandIdx(Opcode, R600::OpName::src0_Z), TII->getOperandIdx(Opcode, R600::OpName::src0_W), TII->getOperandIdx(Opcode, R600::OpName::src1_X), TII->getOperandIdx(Opcode, R600::OpName::src1_Y), TII->getOperandIdx(Opcode, R600::OpName::src1_Z), TII->getOperandIdx(Opcode, R600::OpName::src1_W) }; std::vector Consts; for (int OtherSrcIdx : SrcIndices) { int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); if (OtherSrcIdx < 0 || OtherSelIdx < 0) continue; if (HasDst) { OtherSrcIdx--; OtherSelIdx--; } if (RegisterSDNode *Reg = dyn_cast(ParentNode->getOperand(OtherSrcIdx))) { if (Reg->getReg() == R600::ALU_CONST) { ConstantSDNode *Cst = cast(ParentNode->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); } } } ConstantSDNode *Cst = cast(CstOffset); Consts.push_back(Cst->getZExtValue()); if (!TII->fitsConstReadLimitations(Consts)) { return false; } Sel = CstOffset; Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); return true; } case R600::MOV_IMM_GLOBAL_ADDR: // Check if the Imm slot is used. Taken from below. if (cast(Imm)->getZExtValue()) return false; Imm = Src.getOperand(0); Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); return true; case R600::MOV_IMM_I32: case R600::MOV_IMM_F32: { unsigned ImmReg = R600::ALU_LITERAL_X; uint64_t ImmValue = 0; if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { ConstantFPSDNode *FPC = dyn_cast(Src.getOperand(0)); float FloatValue = FPC->getValueAPF().convertToFloat(); if (FloatValue == 0.0) { ImmReg = R600::ZERO; } else if (FloatValue == 0.5) { ImmReg = R600::HALF; } else if (FloatValue == 1.0) { ImmReg = R600::ONE; } else { ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); } } else { ConstantSDNode *C = dyn_cast(Src.getOperand(0)); uint64_t Value = C->getZExtValue(); if (Value == 0) { ImmReg = R600::ZERO; } else if (Value == 1) { ImmReg = R600::ONE_INT; } else { ImmValue = Value; } } // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == R600::ALU_LITERAL_X) { if (!Imm.getNode()) return false; ConstantSDNode *C = dyn_cast(Imm); assert(C); if (C->getZExtValue()) return false; Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32); } Src = DAG.getRegister(ImmReg, MVT::i32); return true; } default: return false; } } /// Fold the instructions after selecting them SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { const R600InstrInfo *TII = Subtarget->getInstrInfo(); if (!Node->isMachineOpcode()) return Node; unsigned Opcode = Node->getMachineOpcode(); SDValue FakeOp; std::vector Ops(Node->op_begin(), Node->op_end()); if (Opcode == R600::DOT_4) { int OperandIdx[] = { TII->getOperandIdx(Opcode, R600::OpName::src0_X), TII->getOperandIdx(Opcode, R600::OpName::src0_Y), TII->getOperandIdx(Opcode, R600::OpName::src0_Z), TII->getOperandIdx(Opcode, R600::OpName::src0_W), TII->getOperandIdx(Opcode, R600::OpName::src1_X), TII->getOperandIdx(Opcode, R600::OpName::src1_Y), TII->getOperandIdx(Opcode, R600::OpName::src1_Z), TII->getOperandIdx(Opcode, R600::OpName::src1_W) }; int NegIdx[] = { TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) }; int AbsIdx[] = { TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) }; for (unsigned i = 0; i < 8; i++) { if (OperandIdx[i] < 0) return Node; SDValue &Src = Ops[OperandIdx[i] - 1]; SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue &Abs = Ops[AbsIdx[i] - 1]; bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); if (HasDst) SelIdx--; SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } else if (Opcode == R600::REG_SEQUENCE) { for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { SDValue &Src = Ops[i]; if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } else { if (!TII->hasInstrModifiers(Opcode)) return Node; int OperandIdx[] = { TII->getOperandIdx(Opcode, R600::OpName::src0), TII->getOperandIdx(Opcode, R600::OpName::src1), TII->getOperandIdx(Opcode, R600::OpName::src2) }; int NegIdx[] = { TII->getOperandIdx(Opcode, R600::OpName::src0_neg), TII->getOperandIdx(Opcode, R600::OpName::src1_neg), TII->getOperandIdx(Opcode, R600::OpName::src2_neg) }; int AbsIdx[] = { TII->getOperandIdx(Opcode, R600::OpName::src0_abs), TII->getOperandIdx(Opcode, R600::OpName::src1_abs), -1 }; for (unsigned i = 0; i < 3; i++) { if (OperandIdx[i] < 0) return Node; SDValue &Src = Ops[OperandIdx[i] - 1]; SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue FakeAbs; SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); if (HasDst) { SelIdx--; ImmIdx--; } SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; SDValue &Imm = Ops[ImmIdx]; if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } return Node; } Index: projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h =================================================================== --- projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h (revision 337645) @@ -1,110 +1,112 @@ //===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// R600 DAG Lowering interface definition // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AMDGPU_R600ISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_R600ISELLOWERING_H #include "AMDGPUISelLowering.h" namespace llvm { class R600InstrInfo; class R600Subtarget; class R600TargetLowering final : public AMDGPUTargetLowering { const R600Subtarget *Subtarget; public: R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI); const R600Subtarget *getSubtarget() const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; void ReplaceNodeResults(SDNode * N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; private: unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the /// first nine dwords of a Vertex Buffer. These implicit parameters are /// lowered to load instructions which retrieve the values from the Vertex /// Buffer. SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT, const SDLoc &DL, unsigned DwordOffset) const; void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, MachineRegisterInfo & MRI, unsigned dword_offset) const; SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG, const SDLoc &DL) const; SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const; SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const override; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, unsigned mainop, unsigned ovf) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; void getStackAddress(unsigned StackWidth, unsigned ElemIdx, unsigned &Channel, unsigned &PtrIncr) const; bool isZero(SDValue Op) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; - bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, - SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, - SelectionDAG &DAG) const; + bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, + SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, + SelectionDAG &DAG) const; + SDValue constBufferLoad(LoadSDNode *LoadNode, int Block, + SelectionDAG &DAG) const; SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; }; } // End namespace llvm; #endif Index: projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td (revision 337645) @@ -1,861 +1,850 @@ //===-- VOP3Instructions.td - Vector Instruction Defintions ---------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // VOP3 Classes //===----------------------------------------------------------------------===// class getVOP3ModPat { dag src0 = !if(P.HasOMod, (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); list ret3 = [(set P.DstVT:$vdst, (node (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, (node (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, (node (P.Src0VT src0)))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class getVOP3PModPat { list ret3 = [(set P.DstVT:$vdst, (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class getVOP3OpSelPat { list ret3 = [(set P.DstVT:$vdst, (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class getVOP3OpSelModPat { list ret3 = [(set P.DstVT:$vdst, (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class getVOP3Pat { list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class getVOP3ClampPat { list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class VOP3Inst : VOP3_Pseudo.ret, getVOP3OpSelPat.ret), !if(P.HasModifiers, getVOP3ModPat.ret, !if(P.HasIntClamp, getVOP3ClampPat.ret, getVOP3Pat.ret))), VOP3Only, 0, P.HasOpSel> { let IntClamp = P.HasIntClamp; let AsmMatchConverter = !if(P.HasOpSel, "cvtVOP3OpSel", !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)), "cvtVOP3", "")); } // Special case for v_div_fmas_{f32|f64}, since it seems to be the // only VOP instruction that implicitly reads VCC. let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { let Outs64 = (outs DstRC.RegClass:$vdst); } def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { let Outs64 = (outs DstRC.RegClass:$vdst); } } class getVOP3VCC { list ret = [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)), (i1 VCC)))]; } class VOP3Features { bit HasClamp = Clamp; bit HasOpSel = OpSel; bit IsPacked = Packed; } def VOP3_REGULAR : VOP3Features<0, 0, 0>; def VOP3_CLAMP : VOP3Features<1, 0, 0>; def VOP3_OPSEL : VOP3Features<1, 1, 0>; def VOP3_PACKED : VOP3Features<1, 1, 1>; class VOP3_Profile : VOPProfile { let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); let HasModifiers = !if(Features.IsPacked, 1, P.HasModifiers); // FIXME: Hack to stop printing _e64 let Outs64 = (outs DstRC.RegClass:$vdst); let Asm64 = " " # !if(Features.HasOpSel, getAsmVOP3OpSel.ret, !if(Features.HasClamp, getAsm64.ret, P.Asm64)); } class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; let HasOMod = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile { // FIXME: Hack to stop printing _e64 let DstRC = RegisterOperand; } def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile { // FIXME: Hack to stop printing _e64 let DstRC = RegisterOperand; } def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { let HasClamp = 1; // FIXME: Hack to stop printing _e64 let DstRC = RegisterOperand; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp"; } //===----------------------------------------------------------------------===// // VOP3 INTERP //===----------------------------------------------------------------------===// class VOP3Interp : VOP3_Pseudo { let AsmMatchConverter = "cvtVOP3Interp"; } def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> { let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Attr:$attr, AttrChan:$attrchan, clampmod:$clamp, omod:$omod); let Asm64 = "$vdst, $src0_modifiers, $attr$attrchan$clamp$omod"; } def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> { let Ins64 = (ins InterpSlot:$src0, Attr:$attr, AttrChan:$attrchan, clampmod:$clamp, omod:$omod); let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod"; let HasClamp = 1; } class getInterp16Asm { string src2 = !if(HasSrc2, ", $src2_modifiers", ""); string omod = !if(HasOMod, "$omod", ""); string ret = " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod; } class getInterp16Ins { dag ret = !if(HasSrc2, !if(HasOMod, (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Attr:$attr, AttrChan:$attrchan, Src2Mod:$src2_modifiers, VRegSrc_32:$src2, highmod:$high, clampmod:$clamp, omod:$omod), (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Attr:$attr, AttrChan:$attrchan, Src2Mod:$src2_modifiers, VRegSrc_32:$src2, highmod:$high, clampmod:$clamp) ), (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Attr:$attr, AttrChan:$attrchan, highmod:$high, clampmod:$clamp, omod:$omod) ); } class VOP3_INTERP16 ArgVT> : VOPProfile { let HasOMod = !if(!eq(DstVT.Value, f16.Value), 0, 1); let HasHigh = 1; let Outs64 = (outs VGPR_32:$vdst); let Ins64 = getInterp16Ins.ret; let Asm64 = getInterp16Asm.ret; } //===----------------------------------------------------------------------===// // VOP3 Instructions //===----------------------------------------------------------------------===// let isCommutable = 1 in { def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile>; def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile, fmad>; def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, fma>; def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; let SchedRW = [WriteDoubleAdd] in { def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, fma>; def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile, fadd, 1>; def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile, fmul, 1>; def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile, fminnum, 1>; def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile, fmaxnum, 1>; } // End SchedRW = [WriteDoubleAdd] let SchedRW = [WriteQuarterRate32] in { def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile>; def V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile, mulhu>; def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile>; def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile, mulhs>; } // End SchedRW = [WriteQuarterRate32] let Uses = [VCC, EXEC] in { // v_div_fmas_f32: // result = src0 * src1 + src2 // if (vcc) // result *= 2^32 // def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, getVOP3VCC.ret> { let SchedRW = [WriteFloatFMA]; } // v_div_fmas_f64: // result = src0 * src1 + src2 // if (vcc) // result *= 2^64 // def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, getVOP3VCC.ret> { let SchedRW = [WriteDouble]; } } // End Uses = [VCC, EXEC] } // End isCommutable = 1 def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile, int_amdgcn_cubeid>; def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile, int_amdgcn_cubesc>; def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile, int_amdgcn_cubetc>; def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, int_amdgcn_cubema>; def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, int_amdgcn_alignbit>; def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile, AMDGPUumin3>; def V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile, AMDGPUfmax3>; def V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile, AMDGPUsmax3>; def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile, AMDGPUumax3>; def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile, AMDGPUumed3>; def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile>; def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile>; def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile>; def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile>; def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile, AMDGPUdiv_fixup>; let SchedRW = [WriteDoubleAdd] in { def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUdiv_fixup>; def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, AMDGPUldexp, 1>; } // End SchedRW = [WriteDoubleAdd] def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; let AsmMatchConverter = ""; } // Double precision division pre-scale. def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { let SchedRW = [WriteDouble, WriteSALU]; let AsmMatchConverter = ""; } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile>; let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile>; } // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; } let SchedRW = [Write64Bit] in { // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile>; def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile>; def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile>; def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; } // End SubtargetPredicate = isSICI let SubtargetPredicate = isVI in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile>; def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; } // End SubtargetPredicate = isVI } // End SchedRW = [Write64Bit] let SubtargetPredicate = isCIVI in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] let isCommutable = 1 in { let SchedRW = [WriteQuarterRate32, WriteSALU] in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SchedRW = [WriteDouble, WriteSALU] } // End isCommutable = 1 } // End SubtargetPredicate = isCIVI def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup> { let Predicates = [Has16BitInsts, isVIOnly]; } def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile, AMDGPUdiv_fixup> { let renamedInGFX9 = 1; let Predicates = [Has16BitInsts, isGFX9]; } let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in { let renamedInGFX9 = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>; } let SubtargetPredicate = isGFX9 in { def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; } // End SubtargetPredicate = isGFX9 def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>; def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>; } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 let SubtargetPredicate = isVI in { def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>; def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>; def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>; def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile, AMDGPUperm>; } // End SubtargetPredicate = isVI let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats { def : GCNPat < (op2 (op1 i16:$src0, i16:$src1), i16:$src2), (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; -def : GCNPat< - (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), - (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) ->; - -def : GCNPat< - (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), - (REG_SEQUENCE VReg_64, - (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0, - (V_MOV_B32_e32 (i32 0)), sub1) ->; } defm: Ternary_i16_Pats; defm: Ternary_i16_Pats; } // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile>; def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile>; def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile>; def V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile>; def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile>; def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile>; def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile>; def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile, AMDGPUumed3>; def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile, AMDGPUfmin3>; def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile, AMDGPUsmin3>; def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile, AMDGPUumin3>; def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUfmax3>; def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile, AMDGPUsmax3>; def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; def V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile>; def V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile>; def V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile>; def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile>; def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile>; def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile>; def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile>; def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile>; } // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// // Integer Clamp Patterns //===----------------------------------------------------------------------===// class getClampPat { dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2)); dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1)); dag ret1 = (P.DstVT (node P.Src0VT:$src0)); dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class getClampRes { dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0)); dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0)); dag ret1 = (inst P.Src0VT:$src0, (i1 0)); dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); } class IntClampPat : GCNPat< getClampPat.ret, getClampRes.ret >; def : IntClampPat; def : IntClampPat; def : IntClampPat; def : IntClampPat; def : IntClampPat; def : IntClampPat; def : IntClampPat; def : IntClampPat; def : IntClampPat; //===----------------------------------------------------------------------===// // Target //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // SI //===----------------------------------------------------------------------===// let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { multiclass VOP3_Real_si op> { def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, VOP3e_si (NAME).Pfl>; } multiclass VOP3be_Real_si op> { def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, VOP3be_si (NAME).Pfl>; } } // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>; defm V_MAD_F32 : VOP3_Real_si <0x141>; defm V_MAD_I32_I24 : VOP3_Real_si <0x142>; defm V_MAD_U32_U24 : VOP3_Real_si <0x143>; defm V_CUBEID_F32 : VOP3_Real_si <0x144>; defm V_CUBESC_F32 : VOP3_Real_si <0x145>; defm V_CUBETC_F32 : VOP3_Real_si <0x146>; defm V_CUBEMA_F32 : VOP3_Real_si <0x147>; defm V_BFE_U32 : VOP3_Real_si <0x148>; defm V_BFE_I32 : VOP3_Real_si <0x149>; defm V_BFI_B32 : VOP3_Real_si <0x14a>; defm V_FMA_F32 : VOP3_Real_si <0x14b>; defm V_FMA_F64 : VOP3_Real_si <0x14c>; defm V_LERP_U8 : VOP3_Real_si <0x14d>; defm V_ALIGNBIT_B32 : VOP3_Real_si <0x14e>; defm V_ALIGNBYTE_B32 : VOP3_Real_si <0x14f>; defm V_MULLIT_F32 : VOP3_Real_si <0x150>; defm V_MIN3_F32 : VOP3_Real_si <0x151>; defm V_MIN3_I32 : VOP3_Real_si <0x152>; defm V_MIN3_U32 : VOP3_Real_si <0x153>; defm V_MAX3_F32 : VOP3_Real_si <0x154>; defm V_MAX3_I32 : VOP3_Real_si <0x155>; defm V_MAX3_U32 : VOP3_Real_si <0x156>; defm V_MED3_F32 : VOP3_Real_si <0x157>; defm V_MED3_I32 : VOP3_Real_si <0x158>; defm V_MED3_U32 : VOP3_Real_si <0x159>; defm V_SAD_U8 : VOP3_Real_si <0x15a>; defm V_SAD_HI_U8 : VOP3_Real_si <0x15b>; defm V_SAD_U16 : VOP3_Real_si <0x15c>; defm V_SAD_U32 : VOP3_Real_si <0x15d>; defm V_CVT_PK_U8_F32 : VOP3_Real_si <0x15e>; defm V_DIV_FIXUP_F32 : VOP3_Real_si <0x15f>; defm V_DIV_FIXUP_F64 : VOP3_Real_si <0x160>; defm V_LSHL_B64 : VOP3_Real_si <0x161>; defm V_LSHR_B64 : VOP3_Real_si <0x162>; defm V_ASHR_I64 : VOP3_Real_si <0x163>; defm V_ADD_F64 : VOP3_Real_si <0x164>; defm V_MUL_F64 : VOP3_Real_si <0x165>; defm V_MIN_F64 : VOP3_Real_si <0x166>; defm V_MAX_F64 : VOP3_Real_si <0x167>; defm V_LDEXP_F64 : VOP3_Real_si <0x168>; defm V_MUL_LO_U32 : VOP3_Real_si <0x169>; defm V_MUL_HI_U32 : VOP3_Real_si <0x16a>; defm V_MUL_LO_I32 : VOP3_Real_si <0x16b>; defm V_MUL_HI_I32 : VOP3_Real_si <0x16c>; defm V_DIV_SCALE_F32 : VOP3be_Real_si <0x16d>; defm V_DIV_SCALE_F64 : VOP3be_Real_si <0x16e>; defm V_DIV_FMAS_F32 : VOP3_Real_si <0x16f>; defm V_DIV_FMAS_F64 : VOP3_Real_si <0x170>; defm V_MSAD_U8 : VOP3_Real_si <0x171>; defm V_MQSAD_PK_U16_U8 : VOP3_Real_si <0x173>; defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>; //===----------------------------------------------------------------------===// // CI //===----------------------------------------------------------------------===// multiclass VOP3_Real_ci op> { def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, VOP3e_si (NAME).Pfl> { let AssemblerPredicates = [isCIOnly]; let DecoderNamespace = "CI"; } } multiclass VOP3be_Real_ci op> { def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, VOP3be_si (NAME).Pfl> { let AssemblerPredicates = [isCIOnly]; let DecoderNamespace = "CI"; } } defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>; defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>; defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>; defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>; //===----------------------------------------------------------------------===// // VI //===----------------------------------------------------------------------===// let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { multiclass VOP3_Real_vi op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3e_vi (NAME).Pfl>; } multiclass VOP3be_Real_vi op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3be_vi (NAME).Pfl>; } multiclass VOP3OpSel_Real_gfx9 op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3OpSel_gfx9 (NAME).Pfl>; } multiclass VOP3Interp_Real_vi op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3Interp_vi (NAME).Pfl>; } } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" let AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" in { multiclass VOP3_F16_Real_vi op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3e_vi (NAME).Pfl>; } multiclass VOP3Interp_F16_Real_vi op> { def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, VOP3Interp_vi (NAME).Pfl>; } } // End AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in { multiclass VOP3_F16_Real_gfx9 op, string OpName, string AsmName> { def _gfx9 : VOP3_Real(OpName), SIEncodingFamily.GFX9>, VOP3e_vi (OpName).Pfl> { VOP3_Pseudo ps = !cast(OpName); let AsmString = AsmName # ps.AsmOperands; } } multiclass VOP3OpSel_F16_Real_gfx9 op, string AsmName> { def _gfx9 : VOP3_Real(NAME), SIEncodingFamily.GFX9>, VOP3OpSel_gfx9 (NAME).Pfl> { VOP3_Pseudo ps = !cast(NAME); let AsmString = AsmName # ps.AsmOperands; } } multiclass VOP3Interp_F16_Real_gfx9 op, string OpName, string AsmName> { def _gfx9 : VOP3_Real(OpName), SIEncodingFamily.GFX9>, VOP3Interp_vi (OpName).Pfl> { VOP3_Pseudo ps = !cast(OpName); let AsmString = AsmName # ps.AsmOperands; } } multiclass VOP3_Real_gfx9 op, string AsmName> { def _gfx9 : VOP3_Real(NAME), SIEncodingFamily.GFX9>, VOP3e_vi (NAME).Pfl> { VOP_Pseudo ps = !cast(NAME); let AsmString = AsmName # ps.AsmOperands; } } } // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>; defm V_MAD_LEGACY_F32 : VOP3_Real_vi <0x1c0>; defm V_MAD_F32 : VOP3_Real_vi <0x1c1>; defm V_MAD_I32_I24 : VOP3_Real_vi <0x1c2>; defm V_MAD_U32_U24 : VOP3_Real_vi <0x1c3>; defm V_CUBEID_F32 : VOP3_Real_vi <0x1c4>; defm V_CUBESC_F32 : VOP3_Real_vi <0x1c5>; defm V_CUBETC_F32 : VOP3_Real_vi <0x1c6>; defm V_CUBEMA_F32 : VOP3_Real_vi <0x1c7>; defm V_BFE_U32 : VOP3_Real_vi <0x1c8>; defm V_BFE_I32 : VOP3_Real_vi <0x1c9>; defm V_BFI_B32 : VOP3_Real_vi <0x1ca>; defm V_FMA_F32 : VOP3_Real_vi <0x1cb>; defm V_FMA_F64 : VOP3_Real_vi <0x1cc>; defm V_LERP_U8 : VOP3_Real_vi <0x1cd>; defm V_ALIGNBIT_B32 : VOP3_Real_vi <0x1ce>; defm V_ALIGNBYTE_B32 : VOP3_Real_vi <0x1cf>; defm V_MIN3_F32 : VOP3_Real_vi <0x1d0>; defm V_MIN3_I32 : VOP3_Real_vi <0x1d1>; defm V_MIN3_U32 : VOP3_Real_vi <0x1d2>; defm V_MAX3_F32 : VOP3_Real_vi <0x1d3>; defm V_MAX3_I32 : VOP3_Real_vi <0x1d4>; defm V_MAX3_U32 : VOP3_Real_vi <0x1d5>; defm V_MED3_F32 : VOP3_Real_vi <0x1d6>; defm V_MED3_I32 : VOP3_Real_vi <0x1d7>; defm V_MED3_U32 : VOP3_Real_vi <0x1d8>; defm V_SAD_U8 : VOP3_Real_vi <0x1d9>; defm V_SAD_HI_U8 : VOP3_Real_vi <0x1da>; defm V_SAD_U16 : VOP3_Real_vi <0x1db>; defm V_SAD_U32 : VOP3_Real_vi <0x1dc>; defm V_CVT_PK_U8_F32 : VOP3_Real_vi <0x1dd>; defm V_DIV_FIXUP_F32 : VOP3_Real_vi <0x1de>; defm V_DIV_FIXUP_F64 : VOP3_Real_vi <0x1df>; defm V_DIV_SCALE_F32 : VOP3be_Real_vi <0x1e0>; defm V_DIV_SCALE_F64 : VOP3be_Real_vi <0x1e1>; defm V_DIV_FMAS_F32 : VOP3_Real_vi <0x1e2>; defm V_DIV_FMAS_F64 : VOP3_Real_vi <0x1e3>; defm V_MSAD_U8 : VOP3_Real_vi <0x1e4>; defm V_QSAD_PK_U16_U8 : VOP3_Real_vi <0x1e5>; defm V_MQSAD_PK_U16_U8 : VOP3_Real_vi <0x1e6>; defm V_MQSAD_U32_U8 : VOP3_Real_vi <0x1e7>; defm V_PERM_B32 : VOP3_Real_vi <0x1ed>; defm V_MAD_F16 : VOP3_F16_Real_vi <0x1ea>; defm V_MAD_U16 : VOP3_F16_Real_vi <0x1eb>; defm V_MAD_I16 : VOP3_F16_Real_vi <0x1ec>; defm V_FMA_F16 : VOP3_F16_Real_vi <0x1ee>; defm V_DIV_FIXUP_F16 : VOP3_F16_Real_vi <0x1ef>; defm V_INTERP_P2_F16 : VOP3Interp_F16_Real_vi <0x276>; defm V_MAD_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16", "v_mad_legacy_f16">; defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">; defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">; defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">; defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">; defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16", "v_interp_p2_legacy_f16">; defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">; defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">; defm V_ADD_I32_gfx9 : VOP3_Real_gfx9 <0x29c, "v_add_i32">; defm V_SUB_I32_gfx9 : VOP3_Real_gfx9 <0x29d, "v_sub_i32">; defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_vi <0x272>; defm V_INTERP_P1LL_F16 : VOP3Interp_Real_vi <0x274>; defm V_INTERP_P1LV_F16 : VOP3Interp_Real_vi <0x275>; defm V_ADD_F64 : VOP3_Real_vi <0x280>; defm V_MUL_F64 : VOP3_Real_vi <0x281>; defm V_MIN_F64 : VOP3_Real_vi <0x282>; defm V_MAX_F64 : VOP3_Real_vi <0x283>; defm V_LDEXP_F64 : VOP3_Real_vi <0x284>; defm V_MUL_LO_U32 : VOP3_Real_vi <0x285>; // removed from VI as identical to V_MUL_LO_U32 let isAsmParserOnly = 1 in { defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>; } defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>; defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>; defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>; defm V_LSHL_ADD_U32 : VOP3_Real_vi <0x1fd>; defm V_ADD_LSHL_U32 : VOP3_Real_vi <0x1fe>; defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>; defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>; defm V_AND_OR_B32 : VOP3_Real_vi <0x201>; defm V_OR3_B32 : VOP3_Real_vi <0x202>; defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>; defm V_XAD_U32 : VOP3_Real_vi <0x1f3>; defm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>; defm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>; defm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>; defm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>; defm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>; defm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>; defm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>; defm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>; defm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>; defm V_ADD_I16 : VOP3OpSel_Real_gfx9 <0x29e>; defm V_SUB_I16 : VOP3OpSel_Real_gfx9 <0x29f>; defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>; defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>; defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; Index: projects/clang700-import/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (revision 337645) @@ -1,3724 +1,3772 @@ //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines an instruction selector for the NVPTX target. // //===----------------------------------------------------------------------===// #include "NVPTXISelDAGToDAG.h" #include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" using namespace llvm; #define DEBUG_TYPE "nvptx-isel" /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel) { return new NVPTXDAGToDAGISel(TM, OptLevel); } NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm) { doMulWide = (OptLevel > 0); } bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { Subtarget = &static_cast(MF.getSubtarget()); return SelectionDAGISel::runOnMachineFunction(MF); } int NVPTXDAGToDAGISel::getDivF32Level() const { return Subtarget->getTargetLowering()->getDivF32Level(); } bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { return Subtarget->getTargetLowering()->usePrecSqrtF32(); } bool NVPTXDAGToDAGISel::useF32FTZ() const { return Subtarget->getTargetLowering()->useF32FTZ(*MF); } bool NVPTXDAGToDAGISel::allowFMA() const { const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); return TL->allowFMA(*MF, OptLevel); } bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); return TL->allowUnsafeFPMath(*MF); } bool NVPTXDAGToDAGISel::useShortPointers() const { return TM.useShortPointers(); } /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. void NVPTXDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { N->setNodeId(-1); return; // Already selected. } switch (N->getOpcode()) { case ISD::LOAD: + case ISD::ATOMIC_LOAD: if (tryLoad(N)) return; break; case ISD::STORE: + case ISD::ATOMIC_STORE: if (tryStore(N)) return; break; case ISD::EXTRACT_VECTOR_ELT: if (tryEXTRACT_VECTOR_ELEMENT(N)) return; break; case NVPTXISD::SETP_F16X2: SelectSETP_F16X2(N); return; case NVPTXISD::LoadV2: case NVPTXISD::LoadV4: if (tryLoadVector(N)) return; break; case NVPTXISD::LDGV2: case NVPTXISD::LDGV4: case NVPTXISD::LDUV2: case NVPTXISD::LDUV4: if (tryLDGLDU(N)) return; break; case NVPTXISD::StoreV2: case NVPTXISD::StoreV4: if (tryStoreVector(N)) return; break; case NVPTXISD::LoadParam: case NVPTXISD::LoadParamV2: case NVPTXISD::LoadParamV4: if (tryLoadParam(N)) return; break; case NVPTXISD::StoreRetval: case NVPTXISD::StoreRetvalV2: case NVPTXISD::StoreRetvalV4: if (tryStoreRetval(N)) return; break; case NVPTXISD::StoreParam: case NVPTXISD::StoreParamV2: case NVPTXISD::StoreParamV4: case NVPTXISD::StoreParamS32: case NVPTXISD::StoreParamU32: if (tryStoreParam(N)) return; break; case ISD::INTRINSIC_WO_CHAIN: if (tryIntrinsicNoChain(N)) return; break; case ISD::INTRINSIC_W_CHAIN: if (tryIntrinsicChain(N)) return; break; case NVPTXISD::Tex1DFloatS32: case NVPTXISD::Tex1DFloatFloat: case NVPTXISD::Tex1DFloatFloatLevel: case NVPTXISD::Tex1DFloatFloatGrad: case NVPTXISD::Tex1DS32S32: case NVPTXISD::Tex1DS32Float: case NVPTXISD::Tex1DS32FloatLevel: case NVPTXISD::Tex1DS32FloatGrad: case NVPTXISD::Tex1DU32S32: case NVPTXISD::Tex1DU32Float: case NVPTXISD::Tex1DU32FloatLevel: case NVPTXISD::Tex1DU32FloatGrad: case NVPTXISD::Tex1DArrayFloatS32: case NVPTXISD::Tex1DArrayFloatFloat: case NVPTXISD::Tex1DArrayFloatFloatLevel: case NVPTXISD::Tex1DArrayFloatFloatGrad: case NVPTXISD::Tex1DArrayS32S32: case NVPTXISD::Tex1DArrayS32Float: case NVPTXISD::Tex1DArrayS32FloatLevel: case NVPTXISD::Tex1DArrayS32FloatGrad: case NVPTXISD::Tex1DArrayU32S32: case NVPTXISD::Tex1DArrayU32Float: case NVPTXISD::Tex1DArrayU32FloatLevel: case NVPTXISD::Tex1DArrayU32FloatGrad: case NVPTXISD::Tex2DFloatS32: case NVPTXISD::Tex2DFloatFloat: case NVPTXISD::Tex2DFloatFloatLevel: case NVPTXISD::Tex2DFloatFloatGrad: case NVPTXISD::Tex2DS32S32: case NVPTXISD::Tex2DS32Float: case NVPTXISD::Tex2DS32FloatLevel: case NVPTXISD::Tex2DS32FloatGrad: case NVPTXISD::Tex2DU32S32: case NVPTXISD::Tex2DU32Float: case NVPTXISD::Tex2DU32FloatLevel: case NVPTXISD::Tex2DU32FloatGrad: case NVPTXISD::Tex2DArrayFloatS32: case NVPTXISD::Tex2DArrayFloatFloat: case NVPTXISD::Tex2DArrayFloatFloatLevel: case NVPTXISD::Tex2DArrayFloatFloatGrad: case NVPTXISD::Tex2DArrayS32S32: case NVPTXISD::Tex2DArrayS32Float: case NVPTXISD::Tex2DArrayS32FloatLevel: case NVPTXISD::Tex2DArrayS32FloatGrad: case NVPTXISD::Tex2DArrayU32S32: case NVPTXISD::Tex2DArrayU32Float: case NVPTXISD::Tex2DArrayU32FloatLevel: case NVPTXISD::Tex2DArrayU32FloatGrad: case NVPTXISD::Tex3DFloatS32: case NVPTXISD::Tex3DFloatFloat: case NVPTXISD::Tex3DFloatFloatLevel: case NVPTXISD::Tex3DFloatFloatGrad: case NVPTXISD::Tex3DS32S32: case NVPTXISD::Tex3DS32Float: case NVPTXISD::Tex3DS32FloatLevel: case NVPTXISD::Tex3DS32FloatGrad: case NVPTXISD::Tex3DU32S32: case NVPTXISD::Tex3DU32Float: case NVPTXISD::Tex3DU32FloatLevel: case NVPTXISD::Tex3DU32FloatGrad: case NVPTXISD::TexCubeFloatFloat: case NVPTXISD::TexCubeFloatFloatLevel: case NVPTXISD::TexCubeS32Float: case NVPTXISD::TexCubeS32FloatLevel: case NVPTXISD::TexCubeU32Float: case NVPTXISD::TexCubeU32FloatLevel: case NVPTXISD::TexCubeArrayFloatFloat: case NVPTXISD::TexCubeArrayFloatFloatLevel: case NVPTXISD::TexCubeArrayS32Float: case NVPTXISD::TexCubeArrayS32FloatLevel: case NVPTXISD::TexCubeArrayU32Float: case NVPTXISD::TexCubeArrayU32FloatLevel: case NVPTXISD::Tld4R2DFloatFloat: case NVPTXISD::Tld4G2DFloatFloat: case NVPTXISD::Tld4B2DFloatFloat: case NVPTXISD::Tld4A2DFloatFloat: case NVPTXISD::Tld4R2DS64Float: case NVPTXISD::Tld4G2DS64Float: case NVPTXISD::Tld4B2DS64Float: case NVPTXISD::Tld4A2DS64Float: case NVPTXISD::Tld4R2DU64Float: case NVPTXISD::Tld4G2DU64Float: case NVPTXISD::Tld4B2DU64Float: case NVPTXISD::Tld4A2DU64Float: case NVPTXISD::TexUnified1DFloatS32: case NVPTXISD::TexUnified1DFloatFloat: case NVPTXISD::TexUnified1DFloatFloatLevel: case NVPTXISD::TexUnified1DFloatFloatGrad: case NVPTXISD::TexUnified1DS32S32: case NVPTXISD::TexUnified1DS32Float: case NVPTXISD::TexUnified1DS32FloatLevel: case NVPTXISD::TexUnified1DS32FloatGrad: case NVPTXISD::TexUnified1DU32S32: case NVPTXISD::TexUnified1DU32Float: case NVPTXISD::TexUnified1DU32FloatLevel: case NVPTXISD::TexUnified1DU32FloatGrad: case NVPTXISD::TexUnified1DArrayFloatS32: case NVPTXISD::TexUnified1DArrayFloatFloat: case NVPTXISD::TexUnified1DArrayFloatFloatLevel: case NVPTXISD::TexUnified1DArrayFloatFloatGrad: case NVPTXISD::TexUnified1DArrayS32S32: case NVPTXISD::TexUnified1DArrayS32Float: case NVPTXISD::TexUnified1DArrayS32FloatLevel: case NVPTXISD::TexUnified1DArrayS32FloatGrad: case NVPTXISD::TexUnified1DArrayU32S32: case NVPTXISD::TexUnified1DArrayU32Float: case NVPTXISD::TexUnified1DArrayU32FloatLevel: case NVPTXISD::TexUnified1DArrayU32FloatGrad: case NVPTXISD::TexUnified2DFloatS32: case NVPTXISD::TexUnified2DFloatFloat: case NVPTXISD::TexUnified2DFloatFloatLevel: case NVPTXISD::TexUnified2DFloatFloatGrad: case NVPTXISD::TexUnified2DS32S32: case NVPTXISD::TexUnified2DS32Float: case NVPTXISD::TexUnified2DS32FloatLevel: case NVPTXISD::TexUnified2DS32FloatGrad: case NVPTXISD::TexUnified2DU32S32: case NVPTXISD::TexUnified2DU32Float: case NVPTXISD::TexUnified2DU32FloatLevel: case NVPTXISD::TexUnified2DU32FloatGrad: case NVPTXISD::TexUnified2DArrayFloatS32: case NVPTXISD::TexUnified2DArrayFloatFloat: case NVPTXISD::TexUnified2DArrayFloatFloatLevel: case NVPTXISD::TexUnified2DArrayFloatFloatGrad: case NVPTXISD::TexUnified2DArrayS32S32: case NVPTXISD::TexUnified2DArrayS32Float: case NVPTXISD::TexUnified2DArrayS32FloatLevel: case NVPTXISD::TexUnified2DArrayS32FloatGrad: case NVPTXISD::TexUnified2DArrayU32S32: case NVPTXISD::TexUnified2DArrayU32Float: case NVPTXISD::TexUnified2DArrayU32FloatLevel: case NVPTXISD::TexUnified2DArrayU32FloatGrad: case NVPTXISD::TexUnified3DFloatS32: case NVPTXISD::TexUnified3DFloatFloat: case NVPTXISD::TexUnified3DFloatFloatLevel: case NVPTXISD::TexUnified3DFloatFloatGrad: case NVPTXISD::TexUnified3DS32S32: case NVPTXISD::TexUnified3DS32Float: case NVPTXISD::TexUnified3DS32FloatLevel: case NVPTXISD::TexUnified3DS32FloatGrad: case NVPTXISD::TexUnified3DU32S32: case NVPTXISD::TexUnified3DU32Float: case NVPTXISD::TexUnified3DU32FloatLevel: case NVPTXISD::TexUnified3DU32FloatGrad: case NVPTXISD::TexUnifiedCubeFloatFloat: case NVPTXISD::TexUnifiedCubeFloatFloatLevel: case NVPTXISD::TexUnifiedCubeS32Float: case NVPTXISD::TexUnifiedCubeS32FloatLevel: case NVPTXISD::TexUnifiedCubeU32Float: case NVPTXISD::TexUnifiedCubeU32FloatLevel: case NVPTXISD::TexUnifiedCubeArrayFloatFloat: case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: case NVPTXISD::TexUnifiedCubeArrayS32Float: case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: case NVPTXISD::TexUnifiedCubeArrayU32Float: case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: case NVPTXISD::Tld4UnifiedR2DFloatFloat: case NVPTXISD::Tld4UnifiedG2DFloatFloat: case NVPTXISD::Tld4UnifiedB2DFloatFloat: case NVPTXISD::Tld4UnifiedA2DFloatFloat: case NVPTXISD::Tld4UnifiedR2DS64Float: case NVPTXISD::Tld4UnifiedG2DS64Float: case NVPTXISD::Tld4UnifiedB2DS64Float: case NVPTXISD::Tld4UnifiedA2DS64Float: case NVPTXISD::Tld4UnifiedR2DU64Float: case NVPTXISD::Tld4UnifiedG2DU64Float: case NVPTXISD::Tld4UnifiedB2DU64Float: case NVPTXISD::Tld4UnifiedA2DU64Float: if (tryTextureIntrinsic(N)) return; break; case NVPTXISD::Suld1DI8Clamp: case NVPTXISD::Suld1DI16Clamp: case NVPTXISD::Suld1DI32Clamp: case NVPTXISD::Suld1DI64Clamp: case NVPTXISD::Suld1DV2I8Clamp: case NVPTXISD::Suld1DV2I16Clamp: case NVPTXISD::Suld1DV2I32Clamp: case NVPTXISD::Suld1DV2I64Clamp: case NVPTXISD::Suld1DV4I8Clamp: case NVPTXISD::Suld1DV4I16Clamp: case NVPTXISD::Suld1DV4I32Clamp: case NVPTXISD::Suld1DArrayI8Clamp: case NVPTXISD::Suld1DArrayI16Clamp: case NVPTXISD::Suld1DArrayI32Clamp: case NVPTXISD::Suld1DArrayI64Clamp: case NVPTXISD::Suld1DArrayV2I8Clamp: case NVPTXISD::Suld1DArrayV2I16Clamp: case NVPTXISD::Suld1DArrayV2I32Clamp: case NVPTXISD::Suld1DArrayV2I64Clamp: case NVPTXISD::Suld1DArrayV4I8Clamp: case NVPTXISD::Suld1DArrayV4I16Clamp: case NVPTXISD::Suld1DArrayV4I32Clamp: case NVPTXISD::Suld2DI8Clamp: case NVPTXISD::Suld2DI16Clamp: case NVPTXISD::Suld2DI32Clamp: case NVPTXISD::Suld2DI64Clamp: case NVPTXISD::Suld2DV2I8Clamp: case NVPTXISD::Suld2DV2I16Clamp: case NVPTXISD::Suld2DV2I32Clamp: case NVPTXISD::Suld2DV2I64Clamp: case NVPTXISD::Suld2DV4I8Clamp: case NVPTXISD::Suld2DV4I16Clamp: case NVPTXISD::Suld2DV4I32Clamp: case NVPTXISD::Suld2DArrayI8Clamp: case NVPTXISD::Suld2DArrayI16Clamp: case NVPTXISD::Suld2DArrayI32Clamp: case NVPTXISD::Suld2DArrayI64Clamp: case NVPTXISD::Suld2DArrayV2I8Clamp: case NVPTXISD::Suld2DArrayV2I16Clamp: case NVPTXISD::Suld2DArrayV2I32Clamp: case NVPTXISD::Suld2DArrayV2I64Clamp: case NVPTXISD::Suld2DArrayV4I8Clamp: case NVPTXISD::Suld2DArrayV4I16Clamp: case NVPTXISD::Suld2DArrayV4I32Clamp: case NVPTXISD::Suld3DI8Clamp: case NVPTXISD::Suld3DI16Clamp: case NVPTXISD::Suld3DI32Clamp: case NVPTXISD::Suld3DI64Clamp: case NVPTXISD::Suld3DV2I8Clamp: case NVPTXISD::Suld3DV2I16Clamp: case NVPTXISD::Suld3DV2I32Clamp: case NVPTXISD::Suld3DV2I64Clamp: case NVPTXISD::Suld3DV4I8Clamp: case NVPTXISD::Suld3DV4I16Clamp: case NVPTXISD::Suld3DV4I32Clamp: case NVPTXISD::Suld1DI8Trap: case NVPTXISD::Suld1DI16Trap: case NVPTXISD::Suld1DI32Trap: case NVPTXISD::Suld1DI64Trap: case NVPTXISD::Suld1DV2I8Trap: case NVPTXISD::Suld1DV2I16Trap: case NVPTXISD::Suld1DV2I32Trap: case NVPTXISD::Suld1DV2I64Trap: case NVPTXISD::Suld1DV4I8Trap: case NVPTXISD::Suld1DV4I16Trap: case NVPTXISD::Suld1DV4I32Trap: case NVPTXISD::Suld1DArrayI8Trap: case NVPTXISD::Suld1DArrayI16Trap: case NVPTXISD::Suld1DArrayI32Trap: case NVPTXISD::Suld1DArrayI64Trap: case NVPTXISD::Suld1DArrayV2I8Trap: case NVPTXISD::Suld1DArrayV2I16Trap: case NVPTXISD::Suld1DArrayV2I32Trap: case NVPTXISD::Suld1DArrayV2I64Trap: case NVPTXISD::Suld1DArrayV4I8Trap: case NVPTXISD::Suld1DArrayV4I16Trap: case NVPTXISD::Suld1DArrayV4I32Trap: case NVPTXISD::Suld2DI8Trap: case NVPTXISD::Suld2DI16Trap: case NVPTXISD::Suld2DI32Trap: case NVPTXISD::Suld2DI64Trap: case NVPTXISD::Suld2DV2I8Trap: case NVPTXISD::Suld2DV2I16Trap: case NVPTXISD::Suld2DV2I32Trap: case NVPTXISD::Suld2DV2I64Trap: case NVPTXISD::Suld2DV4I8Trap: case NVPTXISD::Suld2DV4I16Trap: case NVPTXISD::Suld2DV4I32Trap: case NVPTXISD::Suld2DArrayI8Trap: case NVPTXISD::Suld2DArrayI16Trap: case NVPTXISD::Suld2DArrayI32Trap: case NVPTXISD::Suld2DArrayI64Trap: case NVPTXISD::Suld2DArrayV2I8Trap: case NVPTXISD::Suld2DArrayV2I16Trap: case NVPTXISD::Suld2DArrayV2I32Trap: case NVPTXISD::Suld2DArrayV2I64Trap: case NVPTXISD::Suld2DArrayV4I8Trap: case NVPTXISD::Suld2DArrayV4I16Trap: case NVPTXISD::Suld2DArrayV4I32Trap: case NVPTXISD::Suld3DI8Trap: case NVPTXISD::Suld3DI16Trap: case NVPTXISD::Suld3DI32Trap: case NVPTXISD::Suld3DI64Trap: case NVPTXISD::Suld3DV2I8Trap: case NVPTXISD::Suld3DV2I16Trap: case NVPTXISD::Suld3DV2I32Trap: case NVPTXISD::Suld3DV2I64Trap: case NVPTXISD::Suld3DV4I8Trap: case NVPTXISD::Suld3DV4I16Trap: case NVPTXISD::Suld3DV4I32Trap: case NVPTXISD::Suld1DI8Zero: case NVPTXISD::Suld1DI16Zero: case NVPTXISD::Suld1DI32Zero: case NVPTXISD::Suld1DI64Zero: case NVPTXISD::Suld1DV2I8Zero: case NVPTXISD::Suld1DV2I16Zero: case NVPTXISD::Suld1DV2I32Zero: case NVPTXISD::Suld1DV2I64Zero: case NVPTXISD::Suld1DV4I8Zero: case NVPTXISD::Suld1DV4I16Zero: case NVPTXISD::Suld1DV4I32Zero: case NVPTXISD::Suld1DArrayI8Zero: case NVPTXISD::Suld1DArrayI16Zero: case NVPTXISD::Suld1DArrayI32Zero: case NVPTXISD::Suld1DArrayI64Zero: case NVPTXISD::Suld1DArrayV2I8Zero: case NVPTXISD::Suld1DArrayV2I16Zero: case NVPTXISD::Suld1DArrayV2I32Zero: case NVPTXISD::Suld1DArrayV2I64Zero: case NVPTXISD::Suld1DArrayV4I8Zero: case NVPTXISD::Suld1DArrayV4I16Zero: case NVPTXISD::Suld1DArrayV4I32Zero: case NVPTXISD::Suld2DI8Zero: case NVPTXISD::Suld2DI16Zero: case NVPTXISD::Suld2DI32Zero: case NVPTXISD::Suld2DI64Zero: case NVPTXISD::Suld2DV2I8Zero: case NVPTXISD::Suld2DV2I16Zero: case NVPTXISD::Suld2DV2I32Zero: case NVPTXISD::Suld2DV2I64Zero: case NVPTXISD::Suld2DV4I8Zero: case NVPTXISD::Suld2DV4I16Zero: case NVPTXISD::Suld2DV4I32Zero: case NVPTXISD::Suld2DArrayI8Zero: case NVPTXISD::Suld2DArrayI16Zero: case NVPTXISD::Suld2DArrayI32Zero: case NVPTXISD::Suld2DArrayI64Zero: case NVPTXISD::Suld2DArrayV2I8Zero: case NVPTXISD::Suld2DArrayV2I16Zero: case NVPTXISD::Suld2DArrayV2I32Zero: case NVPTXISD::Suld2DArrayV2I64Zero: case NVPTXISD::Suld2DArrayV4I8Zero: case NVPTXISD::Suld2DArrayV4I16Zero: case NVPTXISD::Suld2DArrayV4I32Zero: case NVPTXISD::Suld3DI8Zero: case NVPTXISD::Suld3DI16Zero: case NVPTXISD::Suld3DI32Zero: case NVPTXISD::Suld3DI64Zero: case NVPTXISD::Suld3DV2I8Zero: case NVPTXISD::Suld3DV2I16Zero: case NVPTXISD::Suld3DV2I32Zero: case NVPTXISD::Suld3DV2I64Zero: case NVPTXISD::Suld3DV4I8Zero: case NVPTXISD::Suld3DV4I16Zero: case NVPTXISD::Suld3DV4I32Zero: if (trySurfaceIntrinsic(N)) return; break; case ISD::AND: case ISD::SRA: case ISD::SRL: // Try to select BFE if (tryBFE(N)) return; break; case ISD::ADDRSPACECAST: SelectAddrSpaceCast(N); return; case ISD::ConstantFP: if (tryConstantFP16(N)) return; break; default: break; } SelectCode(N); } bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { unsigned IID = cast(N->getOperand(1))->getZExtValue(); switch (IID) { default: return false; case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_p: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_p: return tryLDGLDU(N); } } // There's no way to specify FP16 immediates in .f16 ops, so we have to // load them into an .f16 register first. bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) { if (N->getValueType(0) != MVT::f16) return false; SDValue Val = CurDAG->getTargetConstantFP( cast(N)->getValueAPF(), SDLoc(N), MVT::f16); SDNode *LoadConstF16 = CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val); ReplaceNode(N, LoadConstF16); return true; } // Map ISD:CONDCODE value to appropriate CmpMode expected by // NVPTXInstPrinter::printCmpMode() static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) { using NVPTX::PTXCmpMode::CmpMode; unsigned PTXCmpMode = [](ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unexpected condition code."); case ISD::SETOEQ: return CmpMode::EQ; case ISD::SETOGT: return CmpMode::GT; case ISD::SETOGE: return CmpMode::GE; case ISD::SETOLT: return CmpMode::LT; case ISD::SETOLE: return CmpMode::LE; case ISD::SETONE: return CmpMode::NE; case ISD::SETO: return CmpMode::NUM; case ISD::SETUO: return CmpMode::NotANumber; case ISD::SETUEQ: return CmpMode::EQU; case ISD::SETUGT: return CmpMode::GTU; case ISD::SETUGE: return CmpMode::GEU; case ISD::SETULT: return CmpMode::LTU; case ISD::SETULE: return CmpMode::LEU; case ISD::SETUNE: return CmpMode::NEU; case ISD::SETEQ: return CmpMode::EQ; case ISD::SETGT: return CmpMode::GT; case ISD::SETGE: return CmpMode::GE; case ISD::SETLT: return CmpMode::LT; case ISD::SETLE: return CmpMode::LE; case ISD::SETNE: return CmpMode::NE; } }(CondCode.get()); if (FTZ) PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG; return PTXCmpMode; } bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) { unsigned PTXCmpMode = getPTXCmpMode(*cast(N->getOperand(2)), useF32FTZ()); SDLoc DL(N); SDNode *SetP = CurDAG->getMachineNode( NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0), N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32)); ReplaceNode(N, SetP); return true; } // Find all instances of extract_vector_elt that use this v2f16 vector // and coalesce them into a scattering move instruction. bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { SDValue Vector = N->getOperand(0); // We only care about f16x2 as it's the only real vector type we // need to deal with. if (Vector.getSimpleValueType() != MVT::v2f16) return false; // Find and record all uses of this vector that extract element 0 or 1. SmallVector E0, E1; for (const auto &U : Vector.getNode()->uses()) { if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT) continue; if (U->getOperand(0) != Vector) continue; if (const ConstantSDNode *IdxConst = dyn_cast(U->getOperand(1))) { if (IdxConst->getZExtValue() == 0) E0.push_back(U); else if (IdxConst->getZExtValue() == 1) E1.push_back(U); else llvm_unreachable("Invalid vector index."); } } // There's no point scattering f16x2 if we only ever access one // element of it. if (E0.empty() || E1.empty()) return false; unsigned Op = NVPTX::SplitF16x2; // If the vector has been BITCAST'ed from i32, we can use original // value directly and avoid register-to-register move. SDValue Source = Vector; if (Vector->getOpcode() == ISD::BITCAST) { Op = NVPTX::SplitI32toF16x2; Source = Vector->getOperand(0); } // Merge (f16 extractelt(V, 0), f16 extractelt(V,1)) // into f16,f16 SplitF16x2(V) SDNode *ScatterOp = CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source); for (auto *Node : E0) ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0)); for (auto *Node : E1) ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1)); return true; } static unsigned int getCodeAddrSpace(MemSDNode *N) { const Value *Src = N->getMemOperand()->getValue(); if (!Src) return NVPTX::PTXLdStInstCode::GENERIC; if (auto *PT = dyn_cast(Src->getType())) { switch (PT->getAddressSpace()) { case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; default: break; } } return NVPTX::PTXLdStInstCode::GENERIC; } static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F) { // We use ldg (i.e. ld.global.nc) for invariant loads from the global address // space. // // We have two ways of identifying invariant loads: Loads may be explicitly // marked as invariant, or we may infer them to be invariant. // // We currently infer invariance for loads from // - constant global variables, and // - kernel function pointer params that are noalias (i.e. __restrict) and // never written to. // // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally // not during the SelectionDAG phase). // // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for // explicitly invariant loads because these are how clang tells us to use ldg // when the user uses a builtin. if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) return false; if (N->isInvariant()) return true; bool IsKernelFn = isKernelFunction(F->getFunction()); // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly // because the former looks through phi nodes while the latter does not. We // need to look through phi nodes to handle pointer induction variables. SmallVector Objs; GetUnderlyingObjects(const_cast(N->getMemOperand()->getValue()), Objs, F->getDataLayout()); return all_of(Objs, [&](Value *V) { if (auto *A = dyn_cast(V)) return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr(); if (auto *GV = dyn_cast(V)) return GV->isConstant(); return false; }); } bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { unsigned IID = cast(N->getOperand(0))->getZExtValue(); switch (IID) { default: return false; case Intrinsic::nvvm_texsurf_handle_internal: SelectTexSurfHandle(N); return true; } } void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { // Op 0 is the intrinsic ID SDValue Wrapper = N->getOperand(1); SDValue GlobalVal = Wrapper.getOperand(0); ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, GlobalVal)); } void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { SDValue Src = N->getOperand(0); AddrSpaceCastSDNode *CastN = cast(N); unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); unsigned DstAddrSpace = CastN->getDestAddressSpace(); assert(SrcAddrSpace != DstAddrSpace && "addrspacecast must be between different address spaces"); if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { // Specific to generic unsigned Opc; switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); case ADDRESS_SPACE_GLOBAL: Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; break; case ADDRESS_SPACE_SHARED: Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432 : NVPTX::cvta_shared_yes_64) : NVPTX::cvta_shared_yes; break; case ADDRESS_SPACE_CONST: Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432 : NVPTX::cvta_const_yes_64) : NVPTX::cvta_const_yes; break; case ADDRESS_SPACE_LOCAL: Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432 : NVPTX::cvta_local_yes_64) : NVPTX::cvta_local_yes; break; } ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src)); return; } else { // Generic to specific if (SrcAddrSpace != 0) report_fatal_error("Cannot cast between two non-generic address spaces"); unsigned Opc; switch (DstAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); case ADDRESS_SPACE_GLOBAL: Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 : NVPTX::cvta_to_global_yes; break; case ADDRESS_SPACE_SHARED: Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264 : NVPTX::cvta_to_shared_yes_64) : NVPTX::cvta_to_shared_yes; break; case ADDRESS_SPACE_CONST: Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264 : NVPTX::cvta_to_const_yes_64) : NVPTX::cvta_to_const_yes; break; case ADDRESS_SPACE_LOCAL: Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264 : NVPTX::cvta_to_local_yes_64) : NVPTX::cvta_to_local_yes; break; case ADDRESS_SPACE_PARAM: Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 : NVPTX::nvvm_ptr_gen_to_param; break; } ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src)); return; } } // Helper function template to reduce amount of boilerplate code for // opcode selection. static Optional pickOpcodeForVT( MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional Opcode_f64) { switch (VT) { case MVT::i1: case MVT::i8: return Opcode_i8; case MVT::i16: return Opcode_i16; case MVT::i32: return Opcode_i32; case MVT::i64: return Opcode_i64; case MVT::f16: return Opcode_f16; case MVT::v2f16: return Opcode_f16x2; case MVT::f32: return Opcode_f32; case MVT::f64: return Opcode_f64; default: return None; } } bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { SDLoc dl(N); - LoadSDNode *LD = cast(N); + MemSDNode *LD = cast(N); + assert(LD->readMem() && "Expected load"); + LoadSDNode *PlainLoad = dyn_cast(N); EVT LoadedVT = LD->getMemoryVT(); SDNode *NVPTXLD = nullptr; // do not support pre/post inc/dec - if (LD->isIndexed()) + if (PlainLoad && PlainLoad->isIndexed()) return false; if (!LoadedVT.isSimple()) return false; + AtomicOrdering Ordering = LD->getOrdering(); + // In order to lower atomic loads with stronger guarantees we would need to + // use load.acquire or insert fences. However these features were only added + // with PTX ISA 6.0 / sm_70. + // TODO: Check if we can actually use the new instructions and implement them. + if (isStrongerThanMonotonic(Ordering)) + return false; + // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(LD); if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } unsigned int PointerSize = CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace()); // Volatile Setting - // - .volatile is only availalble for .global and .shared - bool isVolatile = LD->isVolatile(); + // - .volatile is only available for .global and .shared + // - .volatile has the same memory synchronization semantics as .relaxed.sys + bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic; if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) isVolatile = false; // Type Setting: fromType + fromTypeWidth // // Sign : ISD::SEXTLOAD // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT SimpleVT = LoadedVT.getSimpleVT(); MVT ScalarVT = SimpleVT.getScalarType(); // Read at least 8 bits (predicates are stored as 8-bit values) unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); unsigned int fromType; // Vector Setting unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { assert(LoadedVT == MVT::v2f16 && "Unexpected vector type"); // v2f16 is loaded using ld.b32 fromTypeWidth = 32; } - if ((LD->getExtensionType() == ISD::SEXTLOAD)) + if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) // f16 uses .b16 as its storage type. fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped : NVPTX::PTXLdStInstCode::Float; else fromType = NVPTX::PTXLdStInstCode::Unsigned; // Create the machine instruction DAG SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue Addr; SDValue Offset, Base; Optional Opcode; MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; if (SelectDirectAddr(N1, Addr)) { Opcode = pickOpcodeForVT( TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar, NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar, NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), Addr, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, MVT::Other, Ops); } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi, NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, MVT::Other, Ops); } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (PointerSize == 64) Opcode = pickOpcodeForVT( TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64, NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); else Opcode = pickOpcodeForVT( TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari, NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari, NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, MVT::Other, Ops); } else { if (PointerSize == 64) Opcode = pickOpcodeForVT( TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64, NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64); else Opcode = pickOpcodeForVT( TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg, NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg, NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), getI32Imm(fromType, dl), getI32Imm(fromTypeWidth, dl), N1, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT, MVT::Other, Ops); } if (!NVPTXLD) return false; MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); ReplaceNode(N, NVPTXLD); return true; } bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Addr, Offset, Base; Optional Opcode; SDLoc DL(N); SDNode *LD; MemSDNode *MemSD = cast(N); EVT LoadedVT = MemSD->getMemoryVT(); if (!LoadedVT.isSimple()) return false; // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { return tryLDGLDU(N); } unsigned int PointerSize = CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); // Volatile Setting // - .volatile is only availalble for .global and .shared bool IsVolatile = MemSD->isVolatile(); if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) IsVolatile = false; // Vector Setting MVT SimpleVT = LoadedVT.getSimpleVT(); // Type Setting: fromType + fromTypeWidth // // Sign : ISD::SEXTLOAD // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); // Read at least 8 bits (predicates are stored as 8-bit values) unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); unsigned int FromType; // The last operand holds the original LoadSDNode::getExtensionType() value unsigned ExtensionType = cast( N->getOperand(N->getNumOperands() - 1))->getZExtValue(); if (ExtensionType == ISD::SEXTLOAD) FromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped : NVPTX::PTXLdStInstCode::Float; else FromType = NVPTX::PTXLdStInstCode::Unsigned; unsigned VecType; switch (N->getOpcode()) { case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; default: return false; } EVT EltVT = N->getValueType(0); // v8f16 is a special case. PTX doesn't have ld.v8.f16 // instruction. Instead, we split the vector into v2f16 chunks and // load them with ld.v4.b32. if (EltVT == MVT::v2f16) { assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode."); EltVT = MVT::i32; FromType = NVPTX::PTXLdStInstCode::Untyped; FromTypeWidth = 32; } if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return false; case NVPTXISD::LoadV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar, NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar, NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar, NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None, NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, NVPTX::LDV_f32_v4_avar, None); break; } if (!Opcode) return false; SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); } else if (PointerSize == 64 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { default: return false; case NVPTXISD::LoadV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi, NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None, NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, NVPTX::LDV_f32_v4_asi, None); break; } if (!Opcode) return false; SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); } else if (PointerSize == 64 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (PointerSize == 64) { switch (N->getOpcode()) { default: return false; case NVPTXISD::LoadV2: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64, NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None, NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, NVPTX::LDV_f32_v4_ari_64, None); break; } } else { switch (N->getOpcode()) { default: return false; case NVPTXISD::LoadV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari, NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None, NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, NVPTX::LDV_f32_v4_ari, None); break; } } if (!Opcode) return false; SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); } else { if (PointerSize == 64) { switch (N->getOpcode()) { default: return false; case NVPTXISD::LoadV2: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64, NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64, NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64, NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, NVPTX::LDV_f64_v2_areg_64); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None, NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, NVPTX::LDV_f32_v4_areg_64, None); break; } } else { switch (N->getOpcode()) { default: return false; case NVPTXISD::LoadV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg, NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg, NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg, NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg, NVPTX::LDV_f64_v2_areg); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None, NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, NVPTX::LDV_f32_v4_areg, None); break; } } if (!Opcode) return false; SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL), getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Op1, Chain }; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops); } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); ReplaceNode(N, LD); return true; } bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue Op1; MemSDNode *Mem; bool IsLDG = true; // If this is an LDG intrinsic, the address is the third operand. If its an // LDG/LDU SD node (from custom vector handling), then its the second operand if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { Op1 = N->getOperand(2); Mem = cast(N); unsigned IID = cast(N->getOperand(1))->getZExtValue(); switch (IID) { default: return false; case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_p: IsLDG = true; break; case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_p: IsLDG = false; break; } } else { Op1 = N->getOperand(1); Mem = cast(N); } Optional Opcode; SDLoc DL(N); SDNode *LD; SDValue Base, Offset, Addr; EVT EltVT = Mem->getMemoryVT(); unsigned NumElts = 1; if (EltVT.isVector()) { NumElts = EltVT.getVectorNumElements(); EltVT = EltVT.getVectorElementType(); // vectors of f16 are loaded/stored as multiples of v2f16 elements. if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) { assert(NumElts % 2 == 0 && "Vector must have even number of elements"); EltVT = MVT::v2f16; NumElts /= 2; } } // Build the "promoted" result VTList for the load. If we are really loading // i8s, then the return type will be promoted to i16 since we do not expose // 8-bit registers in NVPTX. EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT; SmallVector InstVTs; for (unsigned i = 0; i != NumElts; ++i) { InstVTs.push_back(NodeVT); } InstVTs.push_back(MVT::Other); SDVTList InstVTList = CurDAG->getVTList(InstVTs); if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return false; case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8avar, NVPTX::INT_PTX_LDG_GLOBAL_i16avar, NVPTX::INT_PTX_LDG_GLOBAL_i32avar, NVPTX::INT_PTX_LDG_GLOBAL_i64avar, NVPTX::INT_PTX_LDG_GLOBAL_f16avar, NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar, NVPTX::INT_PTX_LDG_GLOBAL_f32avar, NVPTX::INT_PTX_LDG_GLOBAL_f64avar); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8avar, NVPTX::INT_PTX_LDU_GLOBAL_i16avar, NVPTX::INT_PTX_LDU_GLOBAL_i32avar, NVPTX::INT_PTX_LDU_GLOBAL_i64avar, NVPTX::INT_PTX_LDU_GLOBAL_f16avar, NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar, NVPTX::INT_PTX_LDU_GLOBAL_f32avar, NVPTX::INT_PTX_LDU_GLOBAL_f64avar); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar, NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar, NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar, NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar, NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None, NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None); break; case NVPTXISD::LDUV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None, NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None); break; } if (!Opcode) return false; SDValue Ops[] = { Addr, Chain }; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (TM.is64Bit()) { switch (N->getOpcode()) { default: return false; case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, NVPTX::INT_PTX_LDG_GLOBAL_f16ari64, NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64, NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, NVPTX::INT_PTX_LDU_GLOBAL_f16ari64, NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64, NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None, NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None); break; case NVPTXISD::LDUV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None, NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None); break; } } else { switch (N->getOpcode()) { default: return false; case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari, NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari, NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f16ari, NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari, NVPTX::INT_PTX_LDG_GLOBAL_f64ari); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari, NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari, NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f16ari, NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari, NVPTX::INT_PTX_LDU_GLOBAL_f64ari); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None, NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None); break; case NVPTXISD::LDUV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None, NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None); break; } } if (!Opcode) return false; SDValue Ops[] = {Base, Offset, Chain}; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); } else { if (TM.is64Bit()) { switch (N->getOpcode()) { default: return false; case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, NVPTX::INT_PTX_LDG_GLOBAL_f16areg64, NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64, NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, NVPTX::INT_PTX_LDU_GLOBAL_f16areg64, NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64, NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None, NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None); break; case NVPTXISD::LDUV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None, NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None); break; } } else { switch (N->getOpcode()) { default: return false; case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8areg, NVPTX::INT_PTX_LDG_GLOBAL_i16areg, NVPTX::INT_PTX_LDG_GLOBAL_i32areg, NVPTX::INT_PTX_LDG_GLOBAL_i64areg, NVPTX::INT_PTX_LDG_GLOBAL_f16areg, NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg, NVPTX::INT_PTX_LDG_GLOBAL_f32areg, NVPTX::INT_PTX_LDG_GLOBAL_f64areg); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8areg, NVPTX::INT_PTX_LDU_GLOBAL_i16areg, NVPTX::INT_PTX_LDU_GLOBAL_i32areg, NVPTX::INT_PTX_LDU_GLOBAL_i64areg, NVPTX::INT_PTX_LDU_GLOBAL_f16areg, NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg, NVPTX::INT_PTX_LDU_GLOBAL_f32areg, NVPTX::INT_PTX_LDU_GLOBAL_f64areg); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None, NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None); break; case NVPTXISD::LDUV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None, NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None); break; } } if (!Opcode) return false; SDValue Ops[] = { Op1, Chain }; LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops); } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = Mem->getMemOperand(); cast(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); // For automatic generation of LDG (through SelectLoad[Vector], not the // intrinsics), we may have an extending load like: // // i32,ch = load t0, t7, undef:i64 // // In this case, the matching logic above will select a load for the original // memory type (in this case, i8) and our types will not match (the node needs // to return an i32 in this case). Our LDG/LDU nodes do not support the // concept of sign-/zero-extension, so emulate it here by adding an explicit // CVT instruction. Ptxas should clean up any redundancies here. EVT OrigType = N->getValueType(0); LoadSDNode *LdNode = dyn_cast(N); if (OrigType != EltVT && LdNode) { // We have an extending-load. The instruction we selected operates on the // smaller type, but the SDNode we are replacing has the larger type. We // need to emit a CVT to make the types match. bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), EltVT.getSimpleVT(), IsSigned); // For each output value, apply the manual sign/zero-extension and make sure // all users of the load go through that CVT. for (unsigned i = 0; i != NumElts; ++i) { SDValue Res(LD, i); SDValue OrigVal(N, i); SDNode *CvtNode = CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32)); ReplaceUses(OrigVal, SDValue(CvtNode, 0)); } } ReplaceNode(N, LD); return true; } bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { SDLoc dl(N); - StoreSDNode *ST = cast(N); + MemSDNode *ST = cast(N); + assert(ST->writeMem() && "Expected store"); + StoreSDNode *PlainStore = dyn_cast(N); + AtomicSDNode *AtomicStore = dyn_cast(N); + assert((PlainStore || AtomicStore) && "Expected store"); EVT StoreVT = ST->getMemoryVT(); SDNode *NVPTXST = nullptr; // do not support pre/post inc/dec - if (ST->isIndexed()) + if (PlainStore && PlainStore->isIndexed()) return false; if (!StoreVT.isSimple()) return false; + AtomicOrdering Ordering = ST->getOrdering(); + // In order to lower atomic loads with stronger guarantees we would need to + // use store.release or insert fences. However these features were only added + // with PTX ISA 6.0 / sm_70. + // TODO: Check if we can actually use the new instructions and implement them. + if (isStrongerThanMonotonic(Ordering)) + return false; + // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(ST); unsigned int PointerSize = CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace()); // Volatile Setting - // - .volatile is only availalble for .global and .shared - bool isVolatile = ST->isVolatile(); + // - .volatile is only available for .global and .shared + // - .volatile has the same memory synchronization semantics as .relaxed.sys + bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic; if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) isVolatile = false; // Vector Setting MVT SimpleVT = StoreVT.getSimpleVT(); unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; // Type Setting: toType + toTypeWidth // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); unsigned toTypeWidth = ScalarVT.getSizeInBits(); if (SimpleVT.isVector()) { assert(StoreVT == MVT::v2f16 && "Unexpected vector type"); // v2f16 is stored using st.b32 toTypeWidth = 32; } unsigned int toType; if (ScalarVT.isFloatingPoint()) // f16 uses .b16 as its storage type. toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped : NVPTX::PTXLdStInstCode::Float; else toType = NVPTX::PTXLdStInstCode::Unsigned; // Create the machine instruction DAG - SDValue Chain = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); + SDValue Chain = ST->getChain(); + SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal(); + SDValue BasePtr = ST->getBasePtr(); SDValue Addr; SDValue Offset, Base; Optional Opcode; - MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; + MVT::SimpleValueType SourceVT = + Value.getNode()->getSimpleValueType(0).SimpleTy; - if (SelectDirectAddr(N2, Addr)) { + if (SelectDirectAddr(BasePtr, Addr)) { Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar, NVPTX::ST_i32_avar, NVPTX::ST_i64_avar, NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar, NVPTX::ST_f32_avar, NVPTX::ST_f64_avar); if (!Opcode) return false; - SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), - getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), - getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr, - Chain }; + SDValue Ops[] = {Value, + getI32Imm(isVolatile, dl), + getI32Imm(CodeAddrSpace, dl), + getI32Imm(vecType, dl), + getI32Imm(toType, dl), + getI32Imm(toTypeWidth, dl), + Addr, + Chain}; NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); - } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) - : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (PointerSize == 64 + ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset) + : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi, NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); if (!Opcode) return false; - SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), - getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), - getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, - Offset, Chain }; + SDValue Ops[] = {Value, + getI32Imm(isVolatile, dl), + getI32Imm(CodeAddrSpace, dl), + getI32Imm(vecType, dl), + getI32Imm(toType, dl), + getI32Imm(toTypeWidth, dl), + Base, + Offset, + Chain}; NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); - } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) - : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + } else if (PointerSize == 64 + ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) + : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { if (PointerSize == 64) Opcode = pickOpcodeForVT( SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64, NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); else Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari, NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); if (!Opcode) return false; - SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), - getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), - getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, - Offset, Chain }; + SDValue Ops[] = {Value, + getI32Imm(isVolatile, dl), + getI32Imm(CodeAddrSpace, dl), + getI32Imm(vecType, dl), + getI32Imm(toType, dl), + getI32Imm(toTypeWidth, dl), + Base, + Offset, + Chain}; NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); } else { if (PointerSize == 64) Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64, NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64, NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64, NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64); else Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg, NVPTX::ST_i32_areg, NVPTX::ST_i64_areg, NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg, NVPTX::ST_f32_areg, NVPTX::ST_f64_areg); if (!Opcode) return false; - SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), - getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl), - getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2, - Chain }; + SDValue Ops[] = {Value, + getI32Imm(isVolatile, dl), + getI32Imm(CodeAddrSpace, dl), + getI32Imm(vecType, dl), + getI32Imm(toType, dl), + getI32Imm(toTypeWidth, dl), + BasePtr, + Chain}; NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops); } if (!NVPTXST) return false; MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); ReplaceNode(N, NVPTXST); return true; } bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Addr, Offset, Base; Optional Opcode; SDLoc DL(N); SDNode *ST; EVT EltVT = Op1.getValueType(); MemSDNode *MemSD = cast(N); EVT StoreVT = MemSD->getMemoryVT(); // Address Space Setting unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { report_fatal_error("Cannot store to pointer that points to constant " "memory space"); } unsigned int PointerSize = CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace()); // Volatile Setting // - .volatile is only availalble for .global and .shared bool IsVolatile = MemSD->isVolatile(); if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) IsVolatile = false; // Type Setting: toType + toTypeWidth // - for integer type, always use 'u' assert(StoreVT.isSimple() && "Store value is not simple"); MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); unsigned ToTypeWidth = ScalarVT.getSizeInBits(); unsigned ToType; if (ScalarVT.isFloatingPoint()) ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped : NVPTX::PTXLdStInstCode::Float; else ToType = NVPTX::PTXLdStInstCode::Unsigned; SmallVector StOps; SDValue N2; unsigned VecType; switch (N->getOpcode()) { case NVPTXISD::StoreV2: VecType = NVPTX::PTXLdStInstCode::V2; StOps.push_back(N->getOperand(1)); StOps.push_back(N->getOperand(2)); N2 = N->getOperand(3); break; case NVPTXISD::StoreV4: VecType = NVPTX::PTXLdStInstCode::V4; StOps.push_back(N->getOperand(1)); StOps.push_back(N->getOperand(2)); StOps.push_back(N->getOperand(3)); StOps.push_back(N->getOperand(4)); N2 = N->getOperand(5); break; default: return false; } // v8f16 is a special case. PTX doesn't have st.v8.f16 // instruction. Instead, we split the vector into v2f16 chunks and // store them with st.v4.b32. if (EltVT == MVT::v2f16) { assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode."); EltVT = MVT::i32; ToType = NVPTX::PTXLdStInstCode::Untyped; ToTypeWidth = 32; } StOps.push_back(getI32Imm(IsVolatile, DL)); StOps.push_back(getI32Imm(CodeAddrSpace, DL)); StOps.push_back(getI32Imm(VecType, DL)); StOps.push_back(getI32Imm(ToType, DL)); StOps.push_back(getI32Imm(ToTypeWidth, DL)); if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar, NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar, NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar, NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None, NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, NVPTX::STV_f32_v4_avar, None); break; } StOps.push_back(Addr); } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi, NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None, NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, NVPTX::STV_f32_v4_asi, None); break; } StOps.push_back(Base); StOps.push_back(Offset); } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (PointerSize == 64) { switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreV2: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64, NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None, NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, NVPTX::STV_f32_v4_ari_64, None); break; } } else { switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari, NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None, NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, NVPTX::STV_f32_v4_ari, None); break; } } StOps.push_back(Base); StOps.push_back(Offset); } else { if (PointerSize == 64) { switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreV2: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64, NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64, NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64, NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64, NVPTX::STV_f64_v2_areg_64); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None, NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, NVPTX::STV_f32_v4_areg_64, None); break; } } else { switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg, NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg, NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg, NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg, NVPTX::STV_f64_v2_areg); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None, NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, NVPTX::STV_f32_v4_areg, None); break; } } StOps.push_back(N2); } if (!Opcode) return false; StOps.push_back(Chain); ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); ReplaceNode(N, ST); return true; } bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Offset = Node->getOperand(2); SDValue Flag = Node->getOperand(3); SDLoc DL(Node); MemSDNode *Mem = cast(Node); unsigned VecSize; switch (Node->getOpcode()) { default: return false; case NVPTXISD::LoadParam: VecSize = 1; break; case NVPTXISD::LoadParamV2: VecSize = 2; break; case NVPTXISD::LoadParamV4: VecSize = 4; break; } EVT EltVT = Node->getValueType(0); EVT MemVT = Mem->getMemoryVT(); Optional Opcode; switch (VecSize) { default: return false; case 1: Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16, NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64, NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2, NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64); break; case 2: Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8, NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32, NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16, NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32, NVPTX::LoadParamMemV2F64); break; case 4: Opcode = pickOpcodeForVT( MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None, NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, NVPTX::LoadParamMemV4F32, None); break; } if (!Opcode) return false; SDVTList VTs; if (VecSize == 1) { VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); } else if (VecSize == 2) { VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); } else { EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; VTs = CurDAG->getVTList(EVTs); } unsigned OffsetVal = cast(Offset)->getZExtValue(); SmallVector Ops; Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); Ops.push_back(Chain); Ops.push_back(Flag); ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops)); return true; } bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { SDLoc DL(N); SDValue Chain = N->getOperand(0); SDValue Offset = N->getOperand(1); unsigned OffsetVal = cast(Offset)->getZExtValue(); MemSDNode *Mem = cast(N); // How many elements do we have? unsigned NumElts = 1; switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreRetval: NumElts = 1; break; case NVPTXISD::StoreRetvalV2: NumElts = 2; break; case NVPTXISD::StoreRetvalV4: NumElts = 4; break; } // Build vector of operands SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(N->getOperand(i + 2)); Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); Ops.push_back(Chain); // Determine target opcode // If we have an i1, use an 8-bit store. The lowering code in // NVPTXISelLowering will have already emitted an upcast. Optional Opcode = 0; switch (NumElts) { default: return false; case 1: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64, NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2, NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64); break; case 2: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64, NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2, NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64); break; case 4: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, NVPTX::StoreRetvalV4I32, None, NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, NVPTX::StoreRetvalV4F32, None); break; } if (!Opcode) return false; SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); ReplaceNode(N, Ret); return true; } bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { SDLoc DL(N); SDValue Chain = N->getOperand(0); SDValue Param = N->getOperand(1); unsigned ParamVal = cast(Param)->getZExtValue(); SDValue Offset = N->getOperand(2); unsigned OffsetVal = cast(Offset)->getZExtValue(); MemSDNode *Mem = cast(N); SDValue Flag = N->getOperand(N->getNumOperands() - 1); // How many elements do we have? unsigned NumElts = 1; switch (N->getOpcode()) { default: return false; case NVPTXISD::StoreParamU32: case NVPTXISD::StoreParamS32: case NVPTXISD::StoreParam: NumElts = 1; break; case NVPTXISD::StoreParamV2: NumElts = 2; break; case NVPTXISD::StoreParamV4: NumElts = 4; break; } // Build vector of operands SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(N->getOperand(i + 3)); Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); Ops.push_back(Chain); Ops.push_back(Flag); // Determine target opcode // If we have an i1, use an 8-bit store. The lowering code in // NVPTXISelLowering will have already emitted an upcast. Optional Opcode = 0; switch (N->getOpcode()) { default: switch (NumElts) { default: return false; case 1: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamI8, NVPTX::StoreParamI16, NVPTX::StoreParamI32, NVPTX::StoreParamI64, NVPTX::StoreParamF16, NVPTX::StoreParamF16x2, NVPTX::StoreParamF32, NVPTX::StoreParamF64); break; case 2: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16, NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64, NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2, NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64); break; case 4: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, NVPTX::StoreParamV4I32, None, NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, NVPTX::StoreParamV4F32, None); break; } if (!Opcode) return false; break; // Special case: if we have a sign-extend/zero-extend node, insert the // conversion instruction first, and use that as the value operand to // the selected StoreParam node. case NVPTXISD::StoreParamU32: { Opcode = NVPTX::StoreParamI32; SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, MVT::i32, Ops[0], CvtNone); Ops[0] = SDValue(Cvt, 0); break; } case NVPTXISD::StoreParamS32: { Opcode = NVPTX::StoreParamI32; SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, MVT::i32, Ops[0], CvtNone); Ops[0] = SDValue(Cvt, 0); break; } } SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); ReplaceNode(N, Ret); return true; } bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { unsigned Opc = 0; switch (N->getOpcode()) { default: return false; case NVPTXISD::Tex1DFloatS32: Opc = NVPTX::TEX_1D_F32_S32; break; case NVPTXISD::Tex1DFloatFloat: Opc = NVPTX::TEX_1D_F32_F32; break; case NVPTXISD::Tex1DFloatFloatLevel: Opc = NVPTX::TEX_1D_F32_F32_LEVEL; break; case NVPTXISD::Tex1DFloatFloatGrad: Opc = NVPTX::TEX_1D_F32_F32_GRAD; break; case NVPTXISD::Tex1DS32S32: Opc = NVPTX::TEX_1D_S32_S32; break; case NVPTXISD::Tex1DS32Float: Opc = NVPTX::TEX_1D_S32_F32; break; case NVPTXISD::Tex1DS32FloatLevel: Opc = NVPTX::TEX_1D_S32_F32_LEVEL; break; case NVPTXISD::Tex1DS32FloatGrad: Opc = NVPTX::TEX_1D_S32_F32_GRAD; break; case NVPTXISD::Tex1DU32S32: Opc = NVPTX::TEX_1D_U32_S32; break; case NVPTXISD::Tex1DU32Float: Opc = NVPTX::TEX_1D_U32_F32; break; case NVPTXISD::Tex1DU32FloatLevel: Opc = NVPTX::TEX_1D_U32_F32_LEVEL; break; case NVPTXISD::Tex1DU32FloatGrad: Opc = NVPTX::TEX_1D_U32_F32_GRAD; break; case NVPTXISD::Tex1DArrayFloatS32: Opc = NVPTX::TEX_1D_ARRAY_F32_S32; break; case NVPTXISD::Tex1DArrayFloatFloat: Opc = NVPTX::TEX_1D_ARRAY_F32_F32; break; case NVPTXISD::Tex1DArrayFloatFloatLevel: Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; break; case NVPTXISD::Tex1DArrayFloatFloatGrad: Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; break; case NVPTXISD::Tex1DArrayS32S32: Opc = NVPTX::TEX_1D_ARRAY_S32_S32; break; case NVPTXISD::Tex1DArrayS32Float: Opc = NVPTX::TEX_1D_ARRAY_S32_F32; break; case NVPTXISD::Tex1DArrayS32FloatLevel: Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; break; case NVPTXISD::Tex1DArrayS32FloatGrad: Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; break; case NVPTXISD::Tex1DArrayU32S32: Opc = NVPTX::TEX_1D_ARRAY_U32_S32; break; case NVPTXISD::Tex1DArrayU32Float: Opc = NVPTX::TEX_1D_ARRAY_U32_F32; break; case NVPTXISD::Tex1DArrayU32FloatLevel: Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; break; case NVPTXISD::Tex1DArrayU32FloatGrad: Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; break; case NVPTXISD::Tex2DFloatS32: Opc = NVPTX::TEX_2D_F32_S32; break; case NVPTXISD::Tex2DFloatFloat: Opc = NVPTX::TEX_2D_F32_F32; break; case NVPTXISD::Tex2DFloatFloatLevel: Opc = NVPTX::TEX_2D_F32_F32_LEVEL; break; case NVPTXISD::Tex2DFloatFloatGrad: Opc = NVPTX::TEX_2D_F32_F32_GRAD; break; case NVPTXISD::Tex2DS32S32: Opc = NVPTX::TEX_2D_S32_S32; break; case NVPTXISD::Tex2DS32Float: Opc = NVPTX::TEX_2D_S32_F32; break; case NVPTXISD::Tex2DS32FloatLevel: Opc = NVPTX::TEX_2D_S32_F32_LEVEL; break; case NVPTXISD::Tex2DS32FloatGrad: Opc = NVPTX::TEX_2D_S32_F32_GRAD; break; case NVPTXISD::Tex2DU32S32: Opc = NVPTX::TEX_2D_U32_S32; break; case NVPTXISD::Tex2DU32Float: Opc = NVPTX::TEX_2D_U32_F32; break; case NVPTXISD::Tex2DU32FloatLevel: Opc = NVPTX::TEX_2D_U32_F32_LEVEL; break; case NVPTXISD::Tex2DU32FloatGrad: Opc = NVPTX::TEX_2D_U32_F32_GRAD; break; case NVPTXISD::Tex2DArrayFloatS32: Opc = NVPTX::TEX_2D_ARRAY_F32_S32; break; case NVPTXISD::Tex2DArrayFloatFloat: Opc = NVPTX::TEX_2D_ARRAY_F32_F32; break; case NVPTXISD::Tex2DArrayFloatFloatLevel: Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; break; case NVPTXISD::Tex2DArrayFloatFloatGrad: Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; break; case NVPTXISD::Tex2DArrayS32S32: Opc = NVPTX::TEX_2D_ARRAY_S32_S32; break; case NVPTXISD::Tex2DArrayS32Float: Opc = NVPTX::TEX_2D_ARRAY_S32_F32; break; case NVPTXISD::Tex2DArrayS32FloatLevel: Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; break; case NVPTXISD::Tex2DArrayS32FloatGrad: Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; break; case NVPTXISD::Tex2DArrayU32S32: Opc = NVPTX::TEX_2D_ARRAY_U32_S32; break; case NVPTXISD::Tex2DArrayU32Float: Opc = NVPTX::TEX_2D_ARRAY_U32_F32; break; case NVPTXISD::Tex2DArrayU32FloatLevel: Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; break; case NVPTXISD::Tex2DArrayU32FloatGrad: Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; break; case NVPTXISD::Tex3DFloatS32: Opc = NVPTX::TEX_3D_F32_S32; break; case NVPTXISD::Tex3DFloatFloat: Opc = NVPTX::TEX_3D_F32_F32; break; case NVPTXISD::Tex3DFloatFloatLevel: Opc = NVPTX::TEX_3D_F32_F32_LEVEL; break; case NVPTXISD::Tex3DFloatFloatGrad: Opc = NVPTX::TEX_3D_F32_F32_GRAD; break; case NVPTXISD::Tex3DS32S32: Opc = NVPTX::TEX_3D_S32_S32; break; case NVPTXISD::Tex3DS32Float: Opc = NVPTX::TEX_3D_S32_F32; break; case NVPTXISD::Tex3DS32FloatLevel: Opc = NVPTX::TEX_3D_S32_F32_LEVEL; break; case NVPTXISD::Tex3DS32FloatGrad: Opc = NVPTX::TEX_3D_S32_F32_GRAD; break; case NVPTXISD::Tex3DU32S32: Opc = NVPTX::TEX_3D_U32_S32; break; case NVPTXISD::Tex3DU32Float: Opc = NVPTX::TEX_3D_U32_F32; break; case NVPTXISD::Tex3DU32FloatLevel: Opc = NVPTX::TEX_3D_U32_F32_LEVEL; break; case NVPTXISD::Tex3DU32FloatGrad: Opc = NVPTX::TEX_3D_U32_F32_GRAD; break; case NVPTXISD::TexCubeFloatFloat: Opc = NVPTX::TEX_CUBE_F32_F32; break; case NVPTXISD::TexCubeFloatFloatLevel: Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; break; case NVPTXISD::TexCubeS32Float: Opc = NVPTX::TEX_CUBE_S32_F32; break; case NVPTXISD::TexCubeS32FloatLevel: Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; break; case NVPTXISD::TexCubeU32Float: Opc = NVPTX::TEX_CUBE_U32_F32; break; case NVPTXISD::TexCubeU32FloatLevel: Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; break; case NVPTXISD::TexCubeArrayFloatFloat: Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; break; case NVPTXISD::TexCubeArrayFloatFloatLevel: Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; break; case NVPTXISD::TexCubeArrayS32Float: Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; break; case NVPTXISD::TexCubeArrayS32FloatLevel: Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; break; case NVPTXISD::TexCubeArrayU32Float: Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; break; case NVPTXISD::TexCubeArrayU32FloatLevel: Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; break; case NVPTXISD::Tld4R2DFloatFloat: Opc = NVPTX::TLD4_R_2D_F32_F32; break; case NVPTXISD::Tld4G2DFloatFloat: Opc = NVPTX::TLD4_G_2D_F32_F32; break; case NVPTXISD::Tld4B2DFloatFloat: Opc = NVPTX::TLD4_B_2D_F32_F32; break; case NVPTXISD::Tld4A2DFloatFloat: Opc = NVPTX::TLD4_A_2D_F32_F32; break; case NVPTXISD::Tld4R2DS64Float: Opc = NVPTX::TLD4_R_2D_S32_F32; break; case NVPTXISD::Tld4G2DS64Float: Opc = NVPTX::TLD4_G_2D_S32_F32; break; case NVPTXISD::Tld4B2DS64Float: Opc = NVPTX::TLD4_B_2D_S32_F32; break; case NVPTXISD::Tld4A2DS64Float: Opc = NVPTX::TLD4_A_2D_S32_F32; break; case NVPTXISD::Tld4R2DU64Float: Opc = NVPTX::TLD4_R_2D_U32_F32; break; case NVPTXISD::Tld4G2DU64Float: Opc = NVPTX::TLD4_G_2D_U32_F32; break; case NVPTXISD::Tld4B2DU64Float: Opc = NVPTX::TLD4_B_2D_U32_F32; break; case NVPTXISD::Tld4A2DU64Float: Opc = NVPTX::TLD4_A_2D_U32_F32; break; case NVPTXISD::TexUnified1DFloatS32: Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; break; case NVPTXISD::TexUnified1DFloatFloat: Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; break; case NVPTXISD::TexUnified1DFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; break; case NVPTXISD::TexUnified1DFloatFloatGrad: Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; break; case NVPTXISD::TexUnified1DS32S32: Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; break; case NVPTXISD::TexUnified1DS32Float: Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; break; case NVPTXISD::TexUnified1DS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; break; case NVPTXISD::TexUnified1DS32FloatGrad: Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; break; case NVPTXISD::TexUnified1DU32S32: Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; break; case NVPTXISD::TexUnified1DU32Float: Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; break; case NVPTXISD::TexUnified1DU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; break; case NVPTXISD::TexUnified1DU32FloatGrad: Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; break; case NVPTXISD::TexUnified1DArrayFloatS32: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; break; case NVPTXISD::TexUnified1DArrayFloatFloat: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; break; case NVPTXISD::TexUnified1DArrayFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; break; case NVPTXISD::TexUnified1DArrayFloatFloatGrad: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; break; case NVPTXISD::TexUnified1DArrayS32S32: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; break; case NVPTXISD::TexUnified1DArrayS32Float: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; break; case NVPTXISD::TexUnified1DArrayS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; break; case NVPTXISD::TexUnified1DArrayS32FloatGrad: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; break; case NVPTXISD::TexUnified1DArrayU32S32: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; break; case NVPTXISD::TexUnified1DArrayU32Float: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; break; case NVPTXISD::TexUnified1DArrayU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; break; case NVPTXISD::TexUnified1DArrayU32FloatGrad: Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; break; case NVPTXISD::TexUnified2DFloatS32: Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; break; case NVPTXISD::TexUnified2DFloatFloat: Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; break; case NVPTXISD::TexUnified2DFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; break; case NVPTXISD::TexUnified2DFloatFloatGrad: Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; break; case NVPTXISD::TexUnified2DS32S32: Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; break; case NVPTXISD::TexUnified2DS32Float: Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; break; case NVPTXISD::TexUnified2DS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; break; case NVPTXISD::TexUnified2DS32FloatGrad: Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; break; case NVPTXISD::TexUnified2DU32S32: Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; break; case NVPTXISD::TexUnified2DU32Float: Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; break; case NVPTXISD::TexUnified2DU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; break; case NVPTXISD::TexUnified2DU32FloatGrad: Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; break; case NVPTXISD::TexUnified2DArrayFloatS32: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; break; case NVPTXISD::TexUnified2DArrayFloatFloat: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; break; case NVPTXISD::TexUnified2DArrayFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; break; case NVPTXISD::TexUnified2DArrayFloatFloatGrad: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; break; case NVPTXISD::TexUnified2DArrayS32S32: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; break; case NVPTXISD::TexUnified2DArrayS32Float: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; break; case NVPTXISD::TexUnified2DArrayS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; break; case NVPTXISD::TexUnified2DArrayS32FloatGrad: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; break; case NVPTXISD::TexUnified2DArrayU32S32: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; break; case NVPTXISD::TexUnified2DArrayU32Float: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; break; case NVPTXISD::TexUnified2DArrayU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; break; case NVPTXISD::TexUnified2DArrayU32FloatGrad: Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; break; case NVPTXISD::TexUnified3DFloatS32: Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; break; case NVPTXISD::TexUnified3DFloatFloat: Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; break; case NVPTXISD::TexUnified3DFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; break; case NVPTXISD::TexUnified3DFloatFloatGrad: Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; break; case NVPTXISD::TexUnified3DS32S32: Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; break; case NVPTXISD::TexUnified3DS32Float: Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; break; case NVPTXISD::TexUnified3DS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; break; case NVPTXISD::TexUnified3DS32FloatGrad: Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; break; case NVPTXISD::TexUnified3DU32S32: Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; break; case NVPTXISD::TexUnified3DU32Float: Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; break; case NVPTXISD::TexUnified3DU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; break; case NVPTXISD::TexUnified3DU32FloatGrad: Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; break; case NVPTXISD::TexUnifiedCubeFloatFloat: Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; break; case NVPTXISD::TexUnifiedCubeFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; break; case NVPTXISD::TexUnifiedCubeS32Float: Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; break; case NVPTXISD::TexUnifiedCubeS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; break; case NVPTXISD::TexUnifiedCubeU32Float: Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; break; case NVPTXISD::TexUnifiedCubeU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; break; case NVPTXISD::TexUnifiedCubeArrayFloatFloat: Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; break; case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; break; case NVPTXISD::TexUnifiedCubeArrayS32Float: Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; break; case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; break; case NVPTXISD::TexUnifiedCubeArrayU32Float: Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; break; case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; break; case NVPTXISD::Tld4UnifiedR2DFloatFloat: Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; break; case NVPTXISD::Tld4UnifiedG2DFloatFloat: Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; break; case NVPTXISD::Tld4UnifiedB2DFloatFloat: Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; break; case NVPTXISD::Tld4UnifiedA2DFloatFloat: Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; break; case NVPTXISD::Tld4UnifiedR2DS64Float: Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; break; case NVPTXISD::Tld4UnifiedG2DS64Float: Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; break; case NVPTXISD::Tld4UnifiedB2DS64Float: Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; break; case NVPTXISD::Tld4UnifiedA2DS64Float: Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; break; case NVPTXISD::Tld4UnifiedR2DU64Float: Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; break; case NVPTXISD::Tld4UnifiedG2DU64Float: Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; break; case NVPTXISD::Tld4UnifiedB2DU64Float: Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; break; case NVPTXISD::Tld4UnifiedA2DU64Float: Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; break; } // Copy over operands SmallVector Ops(N->op_begin() + 1, N->op_end()); Ops.push_back(N->getOperand(0)); // Move chain to the back. ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); return true; } bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { unsigned Opc = 0; switch (N->getOpcode()) { default: return false; case NVPTXISD::Suld1DI8Clamp: Opc = NVPTX::SULD_1D_I8_CLAMP; break; case NVPTXISD::Suld1DI16Clamp: Opc = NVPTX::SULD_1D_I16_CLAMP; break; case NVPTXISD::Suld1DI32Clamp: Opc = NVPTX::SULD_1D_I32_CLAMP; break; case NVPTXISD::Suld1DI64Clamp: Opc = NVPTX::SULD_1D_I64_CLAMP; break; case NVPTXISD::Suld1DV2I8Clamp: Opc = NVPTX::SULD_1D_V2I8_CLAMP; break; case NVPTXISD::Suld1DV2I16Clamp: Opc = NVPTX::SULD_1D_V2I16_CLAMP; break; case NVPTXISD::Suld1DV2I32Clamp: Opc = NVPTX::SULD_1D_V2I32_CLAMP; break; case NVPTXISD::Suld1DV2I64Clamp: Opc = NVPTX::SULD_1D_V2I64_CLAMP; break; case NVPTXISD::Suld1DV4I8Clamp: Opc = NVPTX::SULD_1D_V4I8_CLAMP; break; case NVPTXISD::Suld1DV4I16Clamp: Opc = NVPTX::SULD_1D_V4I16_CLAMP; break; case NVPTXISD::Suld1DV4I32Clamp: Opc = NVPTX::SULD_1D_V4I32_CLAMP; break; case NVPTXISD::Suld1DArrayI8Clamp: Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; break; case NVPTXISD::Suld1DArrayI16Clamp: Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; break; case NVPTXISD::Suld1DArrayI32Clamp: Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; break; case NVPTXISD::Suld1DArrayI64Clamp: Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; break; case NVPTXISD::Suld1DArrayV2I8Clamp: Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; break; case NVPTXISD::Suld1DArrayV2I16Clamp: Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; break; case NVPTXISD::Suld1DArrayV2I32Clamp: Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; break; case NVPTXISD::Suld1DArrayV2I64Clamp: Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; break; case NVPTXISD::Suld1DArrayV4I8Clamp: Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; break; case NVPTXISD::Suld1DArrayV4I16Clamp: Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; break; case NVPTXISD::Suld1DArrayV4I32Clamp: Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; break; case NVPTXISD::Suld2DI8Clamp: Opc = NVPTX::SULD_2D_I8_CLAMP; break; case NVPTXISD::Suld2DI16Clamp: Opc = NVPTX::SULD_2D_I16_CLAMP; break; case NVPTXISD::Suld2DI32Clamp: Opc = NVPTX::SULD_2D_I32_CLAMP; break; case NVPTXISD::Suld2DI64Clamp: Opc = NVPTX::SULD_2D_I64_CLAMP; break; case NVPTXISD::Suld2DV2I8Clamp: Opc = NVPTX::SULD_2D_V2I8_CLAMP; break; case NVPTXISD::Suld2DV2I16Clamp: Opc = NVPTX::SULD_2D_V2I16_CLAMP; break; case NVPTXISD::Suld2DV2I32Clamp: Opc = NVPTX::SULD_2D_V2I32_CLAMP; break; case NVPTXISD::Suld2DV2I64Clamp: Opc = NVPTX::SULD_2D_V2I64_CLAMP; break; case NVPTXISD::Suld2DV4I8Clamp: Opc = NVPTX::SULD_2D_V4I8_CLAMP; break; case NVPTXISD::Suld2DV4I16Clamp: Opc = NVPTX::SULD_2D_V4I16_CLAMP; break; case NVPTXISD::Suld2DV4I32Clamp: Opc = NVPTX::SULD_2D_V4I32_CLAMP; break; case NVPTXISD::Suld2DArrayI8Clamp: Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; break; case NVPTXISD::Suld2DArrayI16Clamp: Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; break; case NVPTXISD::Suld2DArrayI32Clamp: Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; break; case NVPTXISD::Suld2DArrayI64Clamp: Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; break; case NVPTXISD::Suld2DArrayV2I8Clamp: Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; break; case NVPTXISD::Suld2DArrayV2I16Clamp: Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; break; case NVPTXISD::Suld2DArrayV2I32Clamp: Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; break; case NVPTXISD::Suld2DArrayV2I64Clamp: Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; break; case NVPTXISD::Suld2DArrayV4I8Clamp: Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; break; case NVPTXISD::Suld2DArrayV4I16Clamp: Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; break; case NVPTXISD::Suld2DArrayV4I32Clamp: Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; break; case NVPTXISD::Suld3DI8Clamp: Opc = NVPTX::SULD_3D_I8_CLAMP; break; case NVPTXISD::Suld3DI16Clamp: Opc = NVPTX::SULD_3D_I16_CLAMP; break; case NVPTXISD::Suld3DI32Clamp: Opc = NVPTX::SULD_3D_I32_CLAMP; break; case NVPTXISD::Suld3DI64Clamp: Opc = NVPTX::SULD_3D_I64_CLAMP; break; case NVPTXISD::Suld3DV2I8Clamp: Opc = NVPTX::SULD_3D_V2I8_CLAMP; break; case NVPTXISD::Suld3DV2I16Clamp: Opc = NVPTX::SULD_3D_V2I16_CLAMP; break; case NVPTXISD::Suld3DV2I32Clamp: Opc = NVPTX::SULD_3D_V2I32_CLAMP; break; case NVPTXISD::Suld3DV2I64Clamp: Opc = NVPTX::SULD_3D_V2I64_CLAMP; break; case NVPTXISD::Suld3DV4I8Clamp: Opc = NVPTX::SULD_3D_V4I8_CLAMP; break; case NVPTXISD::Suld3DV4I16Clamp: Opc = NVPTX::SULD_3D_V4I16_CLAMP; break; case NVPTXISD::Suld3DV4I32Clamp: Opc = NVPTX::SULD_3D_V4I32_CLAMP; break; case NVPTXISD::Suld1DI8Trap: Opc = NVPTX::SULD_1D_I8_TRAP; break; case NVPTXISD::Suld1DI16Trap: Opc = NVPTX::SULD_1D_I16_TRAP; break; case NVPTXISD::Suld1DI32Trap: Opc = NVPTX::SULD_1D_I32_TRAP; break; case NVPTXISD::Suld1DI64Trap: Opc = NVPTX::SULD_1D_I64_TRAP; break; case NVPTXISD::Suld1DV2I8Trap: Opc = NVPTX::SULD_1D_V2I8_TRAP; break; case NVPTXISD::Suld1DV2I16Trap: Opc = NVPTX::SULD_1D_V2I16_TRAP; break; case NVPTXISD::Suld1DV2I32Trap: Opc = NVPTX::SULD_1D_V2I32_TRAP; break; case NVPTXISD::Suld1DV2I64Trap: Opc = NVPTX::SULD_1D_V2I64_TRAP; break; case NVPTXISD::Suld1DV4I8Trap: Opc = NVPTX::SULD_1D_V4I8_TRAP; break; case NVPTXISD::Suld1DV4I16Trap: Opc = NVPTX::SULD_1D_V4I16_TRAP; break; case NVPTXISD::Suld1DV4I32Trap: Opc = NVPTX::SULD_1D_V4I32_TRAP; break; case NVPTXISD::Suld1DArrayI8Trap: Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; break; case NVPTXISD::Suld1DArrayI16Trap: Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; break; case NVPTXISD::Suld1DArrayI32Trap: Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; break; case NVPTXISD::Suld1DArrayI64Trap: Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; break; case NVPTXISD::Suld1DArrayV2I8Trap: Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; break; case NVPTXISD::Suld1DArrayV2I16Trap: Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; break; case NVPTXISD::Suld1DArrayV2I32Trap: Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; break; case NVPTXISD::Suld1DArrayV2I64Trap: Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; break; case NVPTXISD::Suld1DArrayV4I8Trap: Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; break; case NVPTXISD::Suld1DArrayV4I16Trap: Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; break; case NVPTXISD::Suld1DArrayV4I32Trap: Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; break; case NVPTXISD::Suld2DI8Trap: Opc = NVPTX::SULD_2D_I8_TRAP; break; case NVPTXISD::Suld2DI16Trap: Opc = NVPTX::SULD_2D_I16_TRAP; break; case NVPTXISD::Suld2DI32Trap: Opc = NVPTX::SULD_2D_I32_TRAP; break; case NVPTXISD::Suld2DI64Trap: Opc = NVPTX::SULD_2D_I64_TRAP; break; case NVPTXISD::Suld2DV2I8Trap: Opc = NVPTX::SULD_2D_V2I8_TRAP; break; case NVPTXISD::Suld2DV2I16Trap: Opc = NVPTX::SULD_2D_V2I16_TRAP; break; case NVPTXISD::Suld2DV2I32Trap: Opc = NVPTX::SULD_2D_V2I32_TRAP; break; case NVPTXISD::Suld2DV2I64Trap: Opc = NVPTX::SULD_2D_V2I64_TRAP; break; case NVPTXISD::Suld2DV4I8Trap: Opc = NVPTX::SULD_2D_V4I8_TRAP; break; case NVPTXISD::Suld2DV4I16Trap: Opc = NVPTX::SULD_2D_V4I16_TRAP; break; case NVPTXISD::Suld2DV4I32Trap: Opc = NVPTX::SULD_2D_V4I32_TRAP; break; case NVPTXISD::Suld2DArrayI8Trap: Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; break; case NVPTXISD::Suld2DArrayI16Trap: Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; break; case NVPTXISD::Suld2DArrayI32Trap: Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; break; case NVPTXISD::Suld2DArrayI64Trap: Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; break; case NVPTXISD::Suld2DArrayV2I8Trap: Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; break; case NVPTXISD::Suld2DArrayV2I16Trap: Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; break; case NVPTXISD::Suld2DArrayV2I32Trap: Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; break; case NVPTXISD::Suld2DArrayV2I64Trap: Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; break; case NVPTXISD::Suld2DArrayV4I8Trap: Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; break; case NVPTXISD::Suld2DArrayV4I16Trap: Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; break; case NVPTXISD::Suld2DArrayV4I32Trap: Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; break; case NVPTXISD::Suld3DI8Trap: Opc = NVPTX::SULD_3D_I8_TRAP; break; case NVPTXISD::Suld3DI16Trap: Opc = NVPTX::SULD_3D_I16_TRAP; break; case NVPTXISD::Suld3DI32Trap: Opc = NVPTX::SULD_3D_I32_TRAP; break; case NVPTXISD::Suld3DI64Trap: Opc = NVPTX::SULD_3D_I64_TRAP; break; case NVPTXISD::Suld3DV2I8Trap: Opc = NVPTX::SULD_3D_V2I8_TRAP; break; case NVPTXISD::Suld3DV2I16Trap: Opc = NVPTX::SULD_3D_V2I16_TRAP; break; case NVPTXISD::Suld3DV2I32Trap: Opc = NVPTX::SULD_3D_V2I32_TRAP; break; case NVPTXISD::Suld3DV2I64Trap: Opc = NVPTX::SULD_3D_V2I64_TRAP; break; case NVPTXISD::Suld3DV4I8Trap: Opc = NVPTX::SULD_3D_V4I8_TRAP; break; case NVPTXISD::Suld3DV4I16Trap: Opc = NVPTX::SULD_3D_V4I16_TRAP; break; case NVPTXISD::Suld3DV4I32Trap: Opc = NVPTX::SULD_3D_V4I32_TRAP; break; case NVPTXISD::Suld1DI8Zero: Opc = NVPTX::SULD_1D_I8_ZERO; break; case NVPTXISD::Suld1DI16Zero: Opc = NVPTX::SULD_1D_I16_ZERO; break; case NVPTXISD::Suld1DI32Zero: Opc = NVPTX::SULD_1D_I32_ZERO; break; case NVPTXISD::Suld1DI64Zero: Opc = NVPTX::SULD_1D_I64_ZERO; break; case NVPTXISD::Suld1DV2I8Zero: Opc = NVPTX::SULD_1D_V2I8_ZERO; break; case NVPTXISD::Suld1DV2I16Zero: Opc = NVPTX::SULD_1D_V2I16_ZERO; break; case NVPTXISD::Suld1DV2I32Zero: Opc = NVPTX::SULD_1D_V2I32_ZERO; break; case NVPTXISD::Suld1DV2I64Zero: Opc = NVPTX::SULD_1D_V2I64_ZERO; break; case NVPTXISD::Suld1DV4I8Zero: Opc = NVPTX::SULD_1D_V4I8_ZERO; break; case NVPTXISD::Suld1DV4I16Zero: Opc = NVPTX::SULD_1D_V4I16_ZERO; break; case NVPTXISD::Suld1DV4I32Zero: Opc = NVPTX::SULD_1D_V4I32_ZERO; break; case NVPTXISD::Suld1DArrayI8Zero: Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; break; case NVPTXISD::Suld1DArrayI16Zero: Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; break; case NVPTXISD::Suld1DArrayI32Zero: Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; break; case NVPTXISD::Suld1DArrayI64Zero: Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; break; case NVPTXISD::Suld1DArrayV2I8Zero: Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; break; case NVPTXISD::Suld1DArrayV2I16Zero: Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; break; case NVPTXISD::Suld1DArrayV2I32Zero: Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; break; case NVPTXISD::Suld1DArrayV2I64Zero: Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; break; case NVPTXISD::Suld1DArrayV4I8Zero: Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; break; case NVPTXISD::Suld1DArrayV4I16Zero: Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; break; case NVPTXISD::Suld1DArrayV4I32Zero: Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; break; case NVPTXISD::Suld2DI8Zero: Opc = NVPTX::SULD_2D_I8_ZERO; break; case NVPTXISD::Suld2DI16Zero: Opc = NVPTX::SULD_2D_I16_ZERO; break; case NVPTXISD::Suld2DI32Zero: Opc = NVPTX::SULD_2D_I32_ZERO; break; case NVPTXISD::Suld2DI64Zero: Opc = NVPTX::SULD_2D_I64_ZERO; break; case NVPTXISD::Suld2DV2I8Zero: Opc = NVPTX::SULD_2D_V2I8_ZERO; break; case NVPTXISD::Suld2DV2I16Zero: Opc = NVPTX::SULD_2D_V2I16_ZERO; break; case NVPTXISD::Suld2DV2I32Zero: Opc = NVPTX::SULD_2D_V2I32_ZERO; break; case NVPTXISD::Suld2DV2I64Zero: Opc = NVPTX::SULD_2D_V2I64_ZERO; break; case NVPTXISD::Suld2DV4I8Zero: Opc = NVPTX::SULD_2D_V4I8_ZERO; break; case NVPTXISD::Suld2DV4I16Zero: Opc = NVPTX::SULD_2D_V4I16_ZERO; break; case NVPTXISD::Suld2DV4I32Zero: Opc = NVPTX::SULD_2D_V4I32_ZERO; break; case NVPTXISD::Suld2DArrayI8Zero: Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; break; case NVPTXISD::Suld2DArrayI16Zero: Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; break; case NVPTXISD::Suld2DArrayI32Zero: Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; break; case NVPTXISD::Suld2DArrayI64Zero: Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; break; case NVPTXISD::Suld2DArrayV2I8Zero: Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; break; case NVPTXISD::Suld2DArrayV2I16Zero: Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; break; case NVPTXISD::Suld2DArrayV2I32Zero: Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; break; case NVPTXISD::Suld2DArrayV2I64Zero: Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; break; case NVPTXISD::Suld2DArrayV4I8Zero: Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; break; case NVPTXISD::Suld2DArrayV4I16Zero: Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; break; case NVPTXISD::Suld2DArrayV4I32Zero: Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; break; case NVPTXISD::Suld3DI8Zero: Opc = NVPTX::SULD_3D_I8_ZERO; break; case NVPTXISD::Suld3DI16Zero: Opc = NVPTX::SULD_3D_I16_ZERO; break; case NVPTXISD::Suld3DI32Zero: Opc = NVPTX::SULD_3D_I32_ZERO; break; case NVPTXISD::Suld3DI64Zero: Opc = NVPTX::SULD_3D_I64_ZERO; break; case NVPTXISD::Suld3DV2I8Zero: Opc = NVPTX::SULD_3D_V2I8_ZERO; break; case NVPTXISD::Suld3DV2I16Zero: Opc = NVPTX::SULD_3D_V2I16_ZERO; break; case NVPTXISD::Suld3DV2I32Zero: Opc = NVPTX::SULD_3D_V2I32_ZERO; break; case NVPTXISD::Suld3DV2I64Zero: Opc = NVPTX::SULD_3D_V2I64_ZERO; break; case NVPTXISD::Suld3DV4I8Zero: Opc = NVPTX::SULD_3D_V4I8_ZERO; break; case NVPTXISD::Suld3DV4I16Zero: Opc = NVPTX::SULD_3D_V4I16_ZERO; break; case NVPTXISD::Suld3DV4I32Zero: Opc = NVPTX::SULD_3D_V4I32_ZERO; break; } // Copy over operands SmallVector Ops(N->op_begin() + 1, N->op_end()); Ops.push_back(N->getOperand(0)); // Move chain to the back. ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); return true; } /// SelectBFE - Look for instruction sequences that can be made more efficient /// by using the 'bfe' (bit-field extract) PTX instruction bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Len; SDValue Start; SDValue Val; bool IsSigned = false; if (N->getOpcode() == ISD::AND) { // Canonicalize the operands // We want 'and %val, %mask' if (isa(LHS) && !isa(RHS)) { std::swap(LHS, RHS); } ConstantSDNode *Mask = dyn_cast(RHS); if (!Mask) { // We need a constant mask on the RHS of the AND return false; } // Extract the mask bits uint64_t MaskVal = Mask->getZExtValue(); if (!isMask_64(MaskVal)) { // We *could* handle shifted masks here, but doing so would require an // 'and' operation to fix up the low-order bits so we would trade // shr+and for bfe+and, which has the same throughput return false; } // How many bits are in our mask? uint64_t NumBits = countTrailingOnes(MaskVal); Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { // We have a 'srl/and' pair, extract the effective start bit and length Val = LHS.getNode()->getOperand(0); Start = LHS.getNode()->getOperand(1); ConstantSDNode *StartConst = dyn_cast(Start); if (StartConst) { uint64_t StartVal = StartConst->getZExtValue(); // How many "good" bits do we have left? "good" is defined here as bits // that exist in the original value, not shifted in. uint64_t GoodBits = Start.getValueSizeInBits() - StartVal; if (NumBits > GoodBits) { // Do not handle the case where bits have been shifted in. In theory // we could handle this, but the cost is likely higher than just // emitting the srl/and pair. return false; } Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); } else { // Do not handle the case where the shift amount (can be zero if no srl // was found) is not constant. We could handle this case, but it would // require run-time logic that would be more expensive than just // emitting the srl/and pair. return false; } } else { // Do not handle the case where the LHS of the and is not a shift. While // it would be trivial to handle this case, it would just transform // 'and' -> 'bfe', but 'and' has higher-throughput. return false; } } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { if (LHS->getOpcode() == ISD::AND) { ConstantSDNode *ShiftCnst = dyn_cast(RHS); if (!ShiftCnst) { // Shift amount must be constant return false; } uint64_t ShiftAmt = ShiftCnst->getZExtValue(); SDValue AndLHS = LHS->getOperand(0); SDValue AndRHS = LHS->getOperand(1); // Canonicalize the AND to have the mask on the RHS if (isa(AndLHS)) { std::swap(AndLHS, AndRHS); } ConstantSDNode *MaskCnst = dyn_cast(AndRHS); if (!MaskCnst) { // Mask must be constant return false; } uint64_t MaskVal = MaskCnst->getZExtValue(); uint64_t NumZeros; uint64_t NumBits; if (isMask_64(MaskVal)) { NumZeros = 0; // The number of bits in the result bitfield will be the number of // trailing ones (the AND) minus the number of bits we shift off NumBits = countTrailingOnes(MaskVal) - ShiftAmt; } else if (isShiftedMask_64(MaskVal)) { NumZeros = countTrailingZeros(MaskVal); unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); // The number of bits in the result bitfield will be the number of // trailing zeros plus the number of set bits in the mask minus the // number of bits we shift off NumBits = NumZeros + NumOnes - ShiftAmt; } else { // This is not a mask we can handle return false; } if (ShiftAmt < NumZeros) { // Handling this case would require extra logic that would make this // transformation non-profitable return false; } Val = AndLHS; Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); } else if (LHS->getOpcode() == ISD::SHL) { // Here, we have a pattern like: // // (sra (shl val, NN), MM) // or // (srl (shl val, NN), MM) // // If MM >= NN, we can efficiently optimize this with bfe Val = LHS->getOperand(0); SDValue ShlRHS = LHS->getOperand(1); ConstantSDNode *ShlCnst = dyn_cast(ShlRHS); if (!ShlCnst) { // Shift amount must be constant return false; } uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); SDValue ShrRHS = RHS; ConstantSDNode *ShrCnst = dyn_cast(ShrRHS); if (!ShrCnst) { // Shift amount must be constant return false; } uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); // To avoid extra codegen and be profitable, we need Outer >= Inner if (OuterShiftAmt < InnerShiftAmt) { return false; } // If the outer shift is more than the type size, we have no bitfield to // extract (since we also check that the inner shift is <= the outer shift // then this also implies that the inner shift is < the type size) if (OuterShiftAmt >= Val.getValueSizeInBits()) { return false; } Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32); Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt, DL, MVT::i32); if (N->getOpcode() == ISD::SRA) { // If we have a arithmetic right shift, we need to use the signed bfe // variant IsSigned = true; } } else { // No can do... return false; } } else { // No can do... return false; } unsigned Opc; // For the BFE operations we form here from "and" and "srl", always use the // unsigned variants. if (Val.getValueType() == MVT::i32) { if (IsSigned) { Opc = NVPTX::BFE_S32rii; } else { Opc = NVPTX::BFE_U32rii; } } else if (Val.getValueType() == MVT::i64) { if (IsSigned) { Opc = NVPTX::BFE_S64rii; } else { Opc = NVPTX::BFE_U64rii; } } else { // We cannot handle this type return false; } SDValue Ops[] = { Val, Start, Len }; ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops)); return true; } // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { // Return true if TGA or ES. if (N.getOpcode() == ISD::TargetGlobalAddress || N.getOpcode() == ISD::TargetExternalSymbol) { Address = N; return true; } if (N.getOpcode() == NVPTXISD::Wrapper) { Address = N.getOperand(0); return true; } // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) { if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam) return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address); } return false; } // symbol+offset bool NVPTXDAGToDAGISel::SelectADDRsi_imp( SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), mvt); return true; } } } return false; } // symbol+offset bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset) { return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); } // symbol+offset bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset) { return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); } // register+offset bool NVPTXDAGToDAGISel::SelectADDRri_imp( SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); return true; } if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { if (SelectDirectAddr(Addr.getOperand(0), Addr)) { return false; } if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), mvt); return true; } } return false; } // register+offset bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset) { return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); } // register+offset bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset) { return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); } bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; if (MemSDNode *mN = dyn_cast(N)) { if (spN == 0 && mN->getMemOperand()->getPseudoValue()) return true; Src = mN->getMemOperand()->getValue(); } if (!Src) return false; if (auto *PT = dyn_cast(Src->getType())) return (PT->getAddressSpace() == spN); return false; } /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { SDValue Op0, Op1; switch (ConstraintID) { default: return true; case InlineAsm::Constraint_m: // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; } if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { OutOps.push_back(Op0); OutOps.push_back(Op1); return false; } break; } return true; } /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a /// conversion from \p SrcTy to \p DestTy. unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, bool IsSigned) { switch (SrcTy.SimpleTy) { default: llvm_unreachable("Unhandled source type"); case MVT::i8: switch (DestTy.SimpleTy) { default: llvm_unreachable("Unhandled dest type"); case MVT::i16: return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; case MVT::i32: return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; case MVT::i64: return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; } case MVT::i16: switch (DestTy.SimpleTy) { default: llvm_unreachable("Unhandled dest type"); case MVT::i8: return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; case MVT::i32: return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; case MVT::i64: return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; } case MVT::i32: switch (DestTy.SimpleTy) { default: llvm_unreachable("Unhandled dest type"); case MVT::i8: return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; case MVT::i16: return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; case MVT::i64: return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; } case MVT::i64: switch (DestTy.SimpleTy) { default: llvm_unreachable("Unhandled dest type"); case MVT::i8: return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; case MVT::i16: return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; case MVT::i32: return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; } } } Index: projects/clang700-import/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp =================================================================== --- projects/clang700-import/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp (revision 337645) @@ -1,244 +1,248 @@ //===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; #define DEBUG_TYPE "bounds-checking" static cl::opt SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function")); STATISTIC(ChecksAdded, "Bounds checks added"); STATISTIC(ChecksSkipped, "Bounds checks skipped"); STATISTIC(ChecksUnable, "Bounds checks unable to add"); using BuilderTy = IRBuilder; -/// Adds run-time bounds checks to memory accessing instructions. +/// Gets the conditions under which memory accessing instructions will overflow. /// /// \p Ptr is the pointer that will be read/written, and \p InstVal is either /// the result from the load or the value being stored. It is used to determine /// the size of memory block that is touched. /// -/// \p GetTrapBB is a callable that returns the trap BB to use on failure. -/// -/// Returns true if any change was made to the IR, false otherwise. -template -static bool instrumentMemAccess(Value *Ptr, Value *InstVal, - const DataLayout &DL, TargetLibraryInfo &TLI, - ObjectSizeOffsetEvaluator &ObjSizeEval, - BuilderTy &IRB, GetTrapBBT GetTrapBB, - ScalarEvolution &SE) { +/// Returns the condition under which the access will overflow. +static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, + const DataLayout &DL, TargetLibraryInfo &TLI, + ObjectSizeOffsetEvaluator &ObjSizeEval, + BuilderTy &IRB, ScalarEvolution &SE) { uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType()); LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr); if (!ObjSizeEval.bothKnown(SizeOffset)) { ++ChecksUnable; - return false; + return nullptr; } Value *Size = SizeOffset.first; Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast(Size); Type *IntTy = DL.getIntPtrType(Ptr->getType()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size)); auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset)); auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal)); // three checks are required to ensure safety: // . Offset >= 0 (since the offset is given from the base ptr) // . Size >= Offset (unsigned) // . Size - Offset >= NeededSize (unsigned) // // optimization: if Size >= 0 (signed), skip 1st check // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows Value *ObjSize = IRB.CreateSub(Size, Offset); Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax()) ? ConstantInt::getFalse(Ptr->getContext()) : IRB.CreateICmpULT(Size, Offset); Value *Cmp3 = SizeRange.sub(OffsetRange) .getUnsignedMin() .uge(NeededSizeRange.getUnsignedMax()) ? ConstantInt::getFalse(Ptr->getContext()) : IRB.CreateICmpULT(ObjSize, NeededSizeVal); Value *Or = IRB.CreateOr(Cmp2, Cmp3); if ((!SizeCI || SizeCI->getValue().slt(0)) && !SizeRange.getSignedMin().isNonNegative()) { Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); Or = IRB.CreateOr(Cmp1, Or); } + return Or; +} + +/// Adds run-time bounds checks to memory accessing instructions. +/// +/// \p Or is the condition that should guard the trap. +/// +/// \p GetTrapBB is a callable that returns the trap BB to use on failure. +template +static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) { // check if the comparison is always false ConstantInt *C = dyn_cast_or_null(Or); if (C) { ++ChecksSkipped; // If non-zero, nothing to do. if (!C->getZExtValue()) - return true; + return; } ++ChecksAdded; BasicBlock::iterator SplitI = IRB.GetInsertPoint(); BasicBlock *OldBB = SplitI->getParent(); BasicBlock *Cont = OldBB->splitBasicBlock(SplitI); OldBB->getTerminator()->eraseFromParent(); if (C) { // If we have a constant zero, unconditionally branch. // FIXME: We should really handle this differently to bypass the splitting // the block. BranchInst::Create(GetTrapBB(IRB), OldBB); - return true; + return; } // Create the conditional branch. BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB); - return true; } static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE) { const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), /*RoundToAlign=*/true); // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory // touching instructions - std::vector WorkList; + SmallVector, 4> TrapInfo; for (Instruction &I : instructions(F)) { - if (isa(I) || isa(I) || isa(I) || - isa(I)) - WorkList.push_back(&I); + Value *Or = nullptr; + BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL)); + if (LoadInst *LI = dyn_cast(&I)) { + Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI, + ObjSizeEval, IRB, SE); + } else if (StoreInst *SI = dyn_cast(&I)) { + Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(), + DL, TLI, ObjSizeEval, IRB, SE); + } else if (AtomicCmpXchgInst *AI = dyn_cast(&I)) { + Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(), + DL, TLI, ObjSizeEval, IRB, SE); + } else if (AtomicRMWInst *AI = dyn_cast(&I)) { + Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), DL, + TLI, ObjSizeEval, IRB, SE); + } + if (Or) + TrapInfo.push_back(std::make_pair(&I, Or)); } // Create a trapping basic block on demand using a callback. Depending on // flags, this will either create a single block for the entire function or // will create a fresh block every time it is called. BasicBlock *TrapBB = nullptr; auto GetTrapBB = [&TrapBB](BuilderTy &IRB) { if (TrapBB && SingleTrapBB) return TrapBB; Function *Fn = IRB.GetInsertBlock()->getParent(); // FIXME: This debug location doesn't make a lot of sense in the // `SingleTrapBB` case. auto DebugLoc = IRB.getCurrentDebugLocation(); IRBuilder<>::InsertPointGuard Guard(IRB); TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); IRB.SetInsertPoint(TrapBB); auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); CallInst *TrapCall = IRB.CreateCall(F, {}); TrapCall->setDoesNotReturn(); TrapCall->setDoesNotThrow(); TrapCall->setDebugLoc(DebugLoc); IRB.CreateUnreachable(); return TrapBB; }; - bool MadeChange = false; - for (Instruction *Inst : WorkList) { + // Add the checks. + for (const auto &Entry : TrapInfo) { + Instruction *Inst = Entry.first; BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL)); - if (LoadInst *LI = dyn_cast(Inst)) { - MadeChange |= instrumentMemAccess(LI->getPointerOperand(), LI, DL, TLI, - ObjSizeEval, IRB, GetTrapBB, SE); - } else if (StoreInst *SI = dyn_cast(Inst)) { - MadeChange |= - instrumentMemAccess(SI->getPointerOperand(), SI->getValueOperand(), - DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE); - } else if (AtomicCmpXchgInst *AI = dyn_cast(Inst)) { - MadeChange |= - instrumentMemAccess(AI->getPointerOperand(), AI->getCompareOperand(), - DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE); - } else if (AtomicRMWInst *AI = dyn_cast(Inst)) { - MadeChange |= - instrumentMemAccess(AI->getPointerOperand(), AI->getValOperand(), DL, - TLI, ObjSizeEval, IRB, GetTrapBB, SE); - } else { - llvm_unreachable("unknown Instruction type"); - } + insertBoundsCheck(Entry.second, IRB, GetTrapBB); } - return MadeChange; + + return !TrapInfo.empty(); } PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); auto &SE = AM.getResult(F); if (!addBoundsChecking(F, TLI, SE)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } namespace { struct BoundsCheckingLegacyPass : public FunctionPass { static char ID; BoundsCheckingLegacyPass() : FunctionPass(ID) { initializeBoundsCheckingLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { auto &TLI = getAnalysis().getTLI(); auto &SE = getAnalysis().getSE(); return addBoundsChecking(F, TLI, SE); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); } }; } // namespace char BoundsCheckingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(BoundsCheckingLegacyPass, "bounds-checking", "Run-time bounds checking", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BoundsCheckingLegacyPass, "bounds-checking", "Run-time bounds checking", false, false) FunctionPass *llvm::createBoundsCheckingLegacyPass() { return new BoundsCheckingLegacyPass(); } Index: projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp (revision 337645) @@ -1,2666 +1,2690 @@ //===--- CGBlocks.cpp - Emit LLVM Code for declarations ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This contains code to emit blocks. // //===----------------------------------------------------------------------===// #include "CGBlocks.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/DeclObjC.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" #include #include using namespace clang; using namespace CodeGen; CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name) : Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false), HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false), LocalAddress(Address::invalid()), StructureType(nullptr), Block(block), DominatingIP(nullptr) { // Skip asm prefix, if any. 'name' is usually taken directly from // the mangled name of the enclosing function. if (!name.empty() && name[0] == '\01') name = name.substr(1); } // Anchor the vtable to this translation unit. BlockByrefHelpers::~BlockByrefHelpers() {} /// Build the given block as a global block. static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, const CGBlockInfo &blockInfo, llvm::Constant *blockFn); /// Build the helper function to copy a block. static llvm::Constant *buildCopyHelper(CodeGenModule &CGM, const CGBlockInfo &blockInfo) { return CodeGenFunction(CGM).GenerateCopyHelperFunction(blockInfo); } /// Build the helper function to dispose of a block. static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM, const CGBlockInfo &blockInfo) { return CodeGenFunction(CGM).GenerateDestroyHelperFunction(blockInfo); } /// buildBlockDescriptor - Build the block descriptor meta-data for a block. /// buildBlockDescriptor is accessed from 5th field of the Block_literal /// meta-data and contains stationary information about the block literal. /// Its definition will have 4 (or optionally 6) words. /// \code /// struct Block_descriptor { /// unsigned long reserved; /// unsigned long size; // size of Block_literal metadata in bytes. /// void *copy_func_helper_decl; // optional copy helper. /// void *destroy_func_decl; // optioanl destructor helper. /// void *block_method_encoding_address; // @encode for block literal signature. /// void *block_layout_info; // encoding of captured block variables. /// }; /// \endcode static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, const CGBlockInfo &blockInfo) { ASTContext &C = CGM.getContext(); llvm::IntegerType *ulong = cast(CGM.getTypes().ConvertType(C.UnsignedLongTy)); llvm::PointerType *i8p = nullptr; if (CGM.getLangOpts().OpenCL) i8p = llvm::Type::getInt8PtrTy( CGM.getLLVMContext(), C.getTargetAddressSpace(LangAS::opencl_constant)); else i8p = CGM.VoidPtrTy; ConstantInitBuilder builder(CGM); auto elements = builder.beginStruct(); // reserved elements.addInt(ulong, 0); // Size // FIXME: What is the right way to say this doesn't fit? We should give // a user diagnostic in that case. Better fix would be to change the // API to size_t. elements.addInt(ulong, blockInfo.BlockSize.getQuantity()); // Optional copy/dispose helpers. if (blockInfo.needsCopyDisposeHelpers()) { // copy_func_helper_decl elements.add(buildCopyHelper(CGM, blockInfo)); // destroy_func_decl elements.add(buildDisposeHelper(CGM, blockInfo)); } // Signature. Mandatory ObjC-style method descriptor @encode sequence. std::string typeAtEncoding = CGM.getContext().getObjCEncodingForBlock(blockInfo.getBlockExpr()); elements.add(llvm::ConstantExpr::getBitCast( CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p)); // GC layout. if (C.getLangOpts().ObjC1) { if (CGM.getLangOpts().getGC() != LangOptions::NonGC) elements.add(CGM.getObjCRuntime().BuildGCBlockLayout(CGM, blockInfo)); else elements.add(CGM.getObjCRuntime().BuildRCBlockLayout(CGM, blockInfo)); } else elements.addNullPointer(i8p); unsigned AddrSpace = 0; if (C.getLangOpts().OpenCL) AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant); llvm::GlobalVariable *global = elements.finishAndCreateGlobal("__block_descriptor_tmp", CGM.getPointerAlign(), /*constant*/ true, llvm::GlobalValue::InternalLinkage, AddrSpace); return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType()); } /* Purely notional variadic template describing the layout of a block. template struct Block_literal { /// Initialized to one of: /// extern void *_NSConcreteStackBlock[]; /// extern void *_NSConcreteGlobalBlock[]; /// /// In theory, we could start one off malloc'ed by setting /// BLOCK_NEEDS_FREE, giving it a refcount of 1, and using /// this isa: /// extern void *_NSConcreteMallocBlock[]; struct objc_class *isa; /// These are the flags (with corresponding bit number) that the /// compiler is actually supposed to know about. /// 23. BLOCK_IS_NOESCAPE - indicates that the block is non-escaping /// 25. BLOCK_HAS_COPY_DISPOSE - indicates that the block /// descriptor provides copy and dispose helper functions /// 26. BLOCK_HAS_CXX_OBJ - indicates that there's a captured /// object with a nontrivial destructor or copy constructor /// 28. BLOCK_IS_GLOBAL - indicates that the block is allocated /// as global memory /// 29. BLOCK_USE_STRET - indicates that the block function /// uses stret, which objc_msgSend needs to know about /// 30. BLOCK_HAS_SIGNATURE - indicates that the block has an /// @encoded signature string /// And we're not supposed to manipulate these: /// 24. BLOCK_NEEDS_FREE - indicates that the block has been moved /// to malloc'ed memory /// 27. BLOCK_IS_GC - indicates that the block has been moved to /// to GC-allocated memory /// Additionally, the bottom 16 bits are a reference count which /// should be zero on the stack. int flags; /// Reserved; should be zero-initialized. int reserved; /// Function pointer generated from block literal. _ResultType (*invoke)(Block_literal *, _ParamTypes...); /// Block description metadata generated from block literal. struct Block_descriptor *block_descriptor; /// Captured values follow. _CapturesTypes captures...; }; */ namespace { /// A chunk of data that we actually have to capture in the block. struct BlockLayoutChunk { CharUnits Alignment; CharUnits Size; Qualifiers::ObjCLifetime Lifetime; const BlockDecl::Capture *Capture; // null for 'this' llvm::Type *Type; QualType FieldType; BlockLayoutChunk(CharUnits align, CharUnits size, Qualifiers::ObjCLifetime lifetime, const BlockDecl::Capture *capture, llvm::Type *type, QualType fieldType) : Alignment(align), Size(size), Lifetime(lifetime), Capture(capture), Type(type), FieldType(fieldType) {} /// Tell the block info that this chunk has the given field index. void setIndex(CGBlockInfo &info, unsigned index, CharUnits offset) { if (!Capture) { info.CXXThisIndex = index; info.CXXThisOffset = offset; } else { auto C = CGBlockInfo::Capture::makeIndex(index, offset, FieldType); info.Captures.insert({Capture->getVariable(), C}); } } }; /// Order by 1) all __strong together 2) next, all byfref together 3) next, /// all __weak together. Preserve descending alignment in all situations. bool operator<(const BlockLayoutChunk &left, const BlockLayoutChunk &right) { if (left.Alignment != right.Alignment) return left.Alignment > right.Alignment; auto getPrefOrder = [](const BlockLayoutChunk &chunk) { if (chunk.Capture && chunk.Capture->isByRef()) return 1; if (chunk.Lifetime == Qualifiers::OCL_Strong) return 0; if (chunk.Lifetime == Qualifiers::OCL_Weak) return 2; return 3; }; return getPrefOrder(left) < getPrefOrder(right); } } // end anonymous namespace /// Determines if the given type is safe for constant capture in C++. static bool isSafeForCXXConstantCapture(QualType type) { const RecordType *recordType = type->getBaseElementTypeUnsafe()->getAs(); // Only records can be unsafe. if (!recordType) return true; const auto *record = cast(recordType->getDecl()); // Maintain semantics for classes with non-trivial dtors or copy ctors. if (!record->hasTrivialDestructor()) return false; if (record->hasNonTrivialCopyConstructor()) return false; // Otherwise, we just have to make sure there aren't any mutable // fields that might have changed since initialization. return !record->hasMutableFields(); } /// It is illegal to modify a const object after initialization. /// Therefore, if a const object has a constant initializer, we don't /// actually need to keep storage for it in the block; we'll just /// rematerialize it at the start of the block function. This is /// acceptable because we make no promises about address stability of /// captured variables. static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, CodeGenFunction *CGF, const VarDecl *var) { // Return if this is a function parameter. We shouldn't try to // rematerialize default arguments of function parameters. if (isa(var)) return nullptr; QualType type = var->getType(); // We can only do this if the variable is const. if (!type.isConstQualified()) return nullptr; // Furthermore, in C++ we have to worry about mutable fields: // C++ [dcl.type.cv]p4: // Except that any class member declared mutable can be // modified, any attempt to modify a const object during its // lifetime results in undefined behavior. if (CGM.getLangOpts().CPlusPlus && !isSafeForCXXConstantCapture(type)) return nullptr; // If the variable doesn't have any initializer (shouldn't this be // invalid?), it's not clear what we should do. Maybe capture as // zero? const Expr *init = var->getInit(); if (!init) return nullptr; return ConstantEmitter(CGM, CGF).tryEmitAbstractForInitializer(*var); } /// Get the low bit of a nonzero character count. This is the /// alignment of the nth byte if the 0th byte is universally aligned. static CharUnits getLowBit(CharUnits v) { return CharUnits::fromQuantity(v.getQuantity() & (~v.getQuantity() + 1)); } static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, SmallVectorImpl &elementTypes) { assert(elementTypes.empty()); if (CGM.getLangOpts().OpenCL) { // The header is basically 'struct { int; int; // custom_fields; }'. Assert that struct is packed. elementTypes.push_back(CGM.IntTy); /* total size */ elementTypes.push_back(CGM.IntTy); /* align */ unsigned Offset = 2 * CGM.getIntSize().getQuantity(); unsigned BlockAlign = CGM.getIntAlign().getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { // TargetOpenCLBlockHelp needs to make sure the struct is packed. // If necessary, add padding fields to the custom fields. unsigned Align = CGM.getDataLayout().getABITypeAlignment(I); if (BlockAlign < Align) BlockAlign = Align; assert(Offset % Align == 0); Offset += CGM.getDataLayout().getTypeAllocSize(I); elementTypes.push_back(I); } } info.BlockAlign = CharUnits::fromQuantity(BlockAlign); info.BlockSize = CharUnits::fromQuantity(Offset); } else { // The header is basically 'struct { void *; int; int; void *; void *; }'. // Assert that the struct is packed. assert(CGM.getIntSize() <= CGM.getPointerSize()); assert(CGM.getIntAlign() <= CGM.getPointerAlign()); assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); info.BlockAlign = CGM.getPointerAlign(); info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.getBlockDescriptorType()); } } static QualType getCaptureFieldType(const CodeGenFunction &CGF, const BlockDecl::Capture &CI) { const VarDecl *VD = CI.getVariable(); // If the variable is captured by an enclosing block or lambda expression, // use the type of the capture field. if (CGF.BlockInfo && CI.isNested()) return CGF.BlockInfo->getCapture(VD).fieldType(); if (auto *FD = CGF.LambdaCaptureFields.lookup(VD)) return FD->getType(); return VD->getType(); } /// Compute the layout of the given block. Attempts to lay the block /// out with minimal space requirements. static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, CGBlockInfo &info) { ASTContext &C = CGM.getContext(); const BlockDecl *block = info.getBlockDecl(); SmallVector elementTypes; initializeForBlockHeader(CGM, info, elementTypes); bool hasNonConstantCustomFields = false; if (auto *OpenCLHelper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) hasNonConstantCustomFields = !OpenCLHelper->areAllCustomFieldValuesConstant(info); if (!block->hasCaptures() && !hasNonConstantCustomFields) { info.StructureType = llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true); info.CanBeGlobal = true; return; } else if (C.getLangOpts().ObjC1 && CGM.getLangOpts().getGC() == LangOptions::NonGC) info.HasCapturedVariableLayout = true; // Collect the layout chunks. SmallVector layout; layout.reserve(block->capturesCXXThis() + (block->capture_end() - block->capture_begin())); CharUnits maxFieldAlign; // First, 'this'. if (block->capturesCXXThis()) { assert(CGF && CGF->CurFuncDecl && isa(CGF->CurFuncDecl) && "Can't capture 'this' outside a method"); QualType thisType = cast(CGF->CurFuncDecl)->getThisType(C); // Theoretically, this could be in a different address space, so // don't assume standard pointer size/align. llvm::Type *llvmType = CGM.getTypes().ConvertType(thisType); std::pair tinfo = CGM.getContext().getTypeInfoInChars(thisType); maxFieldAlign = std::max(maxFieldAlign, tinfo.second); layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first, Qualifiers::OCL_None, nullptr, llvmType, thisType)); } // Next, all the block captures. for (const auto &CI : block->captures()) { const VarDecl *variable = CI.getVariable(); if (CI.isByRef()) { // We have to copy/dispose of the __block reference. info.NeedsCopyDispose = true; // Just use void* instead of a pointer to the byref type. CharUnits align = CGM.getPointerAlign(); maxFieldAlign = std::max(maxFieldAlign, align); layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(), Qualifiers::OCL_None, &CI, CGM.VoidPtrTy, variable->getType())); continue; } // Otherwise, build a layout chunk with the size and alignment of // the declaration. if (llvm::Constant *constant = tryCaptureAsConstant(CGM, CGF, variable)) { info.Captures[variable] = CGBlockInfo::Capture::makeConstant(constant); continue; } // If we have a lifetime qualifier, honor it for capture purposes. // That includes *not* copying it if it's __unsafe_unretained. Qualifiers::ObjCLifetime lifetime = variable->getType().getObjCLifetime(); if (lifetime) { switch (lifetime) { case Qualifiers::OCL_None: llvm_unreachable("impossible"); case Qualifiers::OCL_ExplicitNone: case Qualifiers::OCL_Autoreleasing: break; case Qualifiers::OCL_Strong: case Qualifiers::OCL_Weak: info.NeedsCopyDispose = true; } // Block pointers require copy/dispose. So do Objective-C pointers. } else if (variable->getType()->isObjCRetainableType()) { // But honor the inert __unsafe_unretained qualifier, which doesn't // actually make it into the type system. if (variable->getType()->isObjCInertUnsafeUnretainedType()) { lifetime = Qualifiers::OCL_ExplicitNone; } else { info.NeedsCopyDispose = true; // used for mrr below. lifetime = Qualifiers::OCL_Strong; } // So do types that require non-trivial copy construction. } else if (CI.hasCopyExpr()) { info.NeedsCopyDispose = true; info.HasCXXObject = true; // So do C structs that require non-trivial copy construction or // destruction. } else if (variable->getType().isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct || variable->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) { info.NeedsCopyDispose = true; // And so do types with destructors. } else if (CGM.getLangOpts().CPlusPlus) { if (const CXXRecordDecl *record = variable->getType()->getAsCXXRecordDecl()) { if (!record->hasTrivialDestructor()) { info.HasCXXObject = true; info.NeedsCopyDispose = true; } } } QualType VT = getCaptureFieldType(*CGF, CI); CharUnits size = C.getTypeSizeInChars(VT); CharUnits align = C.getDeclAlign(variable); maxFieldAlign = std::max(maxFieldAlign, align); llvm::Type *llvmType = CGM.getTypes().ConvertTypeForMem(VT); layout.push_back( BlockLayoutChunk(align, size, lifetime, &CI, llvmType, VT)); } // If that was everything, we're done here. if (layout.empty()) { info.StructureType = llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true); info.CanBeGlobal = true; return; } // Sort the layout by alignment. We have to use a stable sort here // to get reproducible results. There should probably be an // llvm::array_pod_stable_sort. std::stable_sort(layout.begin(), layout.end()); // Needed for blocks layout info. info.BlockHeaderForcedGapOffset = info.BlockSize; info.BlockHeaderForcedGapSize = CharUnits::Zero(); CharUnits &blockSize = info.BlockSize; info.BlockAlign = std::max(maxFieldAlign, info.BlockAlign); // Assuming that the first byte in the header is maximally aligned, // get the alignment of the first byte following the header. CharUnits endAlign = getLowBit(blockSize); // If the end of the header isn't satisfactorily aligned for the // maximum thing, look for things that are okay with the header-end // alignment, and keep appending them until we get something that's // aligned right. This algorithm is only guaranteed optimal if // that condition is satisfied at some point; otherwise we can get // things like: // header // next byte has alignment 4 // something_with_size_5; // next byte has alignment 1 // something_with_alignment_8; // which has 7 bytes of padding, as opposed to the naive solution // which might have less (?). if (endAlign < maxFieldAlign) { SmallVectorImpl::iterator li = layout.begin() + 1, le = layout.end(); // Look for something that the header end is already // satisfactorily aligned for. for (; li != le && endAlign < li->Alignment; ++li) ; // If we found something that's naturally aligned for the end of // the header, keep adding things... if (li != le) { SmallVectorImpl::iterator first = li; for (; li != le; ++li) { assert(endAlign >= li->Alignment); li->setIndex(info, elementTypes.size(), blockSize); elementTypes.push_back(li->Type); blockSize += li->Size; endAlign = getLowBit(blockSize); // ...until we get to the alignment of the maximum field. if (endAlign >= maxFieldAlign) { break; } } // Don't re-append everything we just appended. layout.erase(first, li); } } assert(endAlign == getLowBit(blockSize)); // At this point, we just have to add padding if the end align still // isn't aligned right. if (endAlign < maxFieldAlign) { CharUnits newBlockSize = blockSize.alignTo(maxFieldAlign); CharUnits padding = newBlockSize - blockSize; // If we haven't yet added any fields, remember that there was an // initial gap; this need to go into the block layout bit map. if (blockSize == info.BlockHeaderForcedGapOffset) { info.BlockHeaderForcedGapSize = padding; } elementTypes.push_back(llvm::ArrayType::get(CGM.Int8Ty, padding.getQuantity())); blockSize = newBlockSize; endAlign = getLowBit(blockSize); // might be > maxFieldAlign } assert(endAlign >= maxFieldAlign); assert(endAlign == getLowBit(blockSize)); // Slam everything else on now. This works because they have // strictly decreasing alignment and we expect that size is always a // multiple of alignment. for (SmallVectorImpl::iterator li = layout.begin(), le = layout.end(); li != le; ++li) { if (endAlign < li->Alignment) { // size may not be multiple of alignment. This can only happen with // an over-aligned variable. We will be adding a padding field to // make the size be multiple of alignment. CharUnits padding = li->Alignment - endAlign; elementTypes.push_back(llvm::ArrayType::get(CGM.Int8Ty, padding.getQuantity())); blockSize += padding; endAlign = getLowBit(blockSize); } assert(endAlign >= li->Alignment); li->setIndex(info, elementTypes.size(), blockSize); elementTypes.push_back(li->Type); blockSize += li->Size; endAlign = getLowBit(blockSize); } info.StructureType = llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true); } /// Enter the scope of a block. This should be run at the entrance to /// a full-expression so that the block's cleanups are pushed at the /// right place in the stack. static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { assert(CGF.HaveInsertPoint()); // Allocate the block info and place it at the head of the list. CGBlockInfo &blockInfo = *new CGBlockInfo(block, CGF.CurFn->getName()); blockInfo.NextBlockInfo = CGF.FirstBlockInfo; CGF.FirstBlockInfo = &blockInfo; // Compute information about the layout, etc., of this block, // pushing cleanups as necessary. computeBlockInfo(CGF.CGM, &CGF, blockInfo); // Nothing else to do if it can be global. if (blockInfo.CanBeGlobal) return; // Make the allocation for the block. blockInfo.LocalAddress = CGF.CreateTempAlloca(blockInfo.StructureType, blockInfo.BlockAlign, "block"); // If there are cleanups to emit, enter them (but inactive). if (!blockInfo.NeedsCopyDispose) return; // Walk through the captures (in order) and find the ones not // captured by constant. for (const auto &CI : block->captures()) { // Ignore __block captures; there's nothing special in the // on-stack block that we need to do for them. if (CI.isByRef()) continue; // Ignore variables that are constant-captured. const VarDecl *variable = CI.getVariable(); CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (capture.isConstant()) continue; // Ignore objects that aren't destructed. QualType VT = getCaptureFieldType(CGF, CI); QualType::DestructionKind dtorKind = VT.isDestructedType(); if (dtorKind == QualType::DK_none) continue; CodeGenFunction::Destroyer *destroyer; // Block captures count as local values and have imprecise semantics. // They also can't be arrays, so need to worry about that. // // For const-qualified captures, emit clang.arc.use to ensure the captured // object doesn't get released while we are still depending on its validity // within the block. if (VT.isConstQualified() && VT.getObjCLifetime() == Qualifiers::OCL_Strong && CGF.CGM.getCodeGenOpts().OptimizationLevel != 0) { assert(CGF.CGM.getLangOpts().ObjCAutoRefCount && "expected ObjC ARC to be enabled"); destroyer = CodeGenFunction::emitARCIntrinsicUse; } else if (dtorKind == QualType::DK_objc_strong_lifetime) { destroyer = CodeGenFunction::destroyARCStrongImprecise; } else { destroyer = CGF.getDestroyer(dtorKind); } // GEP down to the address. Address addr = CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, capture.getIndex(), capture.getOffset()); // We can use that GEP as the dominating IP. if (!blockInfo.DominatingIP) blockInfo.DominatingIP = cast(addr.getPointer()); CleanupKind cleanupKind = InactiveNormalCleanup; bool useArrayEHCleanup = CGF.needsEHCleanup(dtorKind); if (useArrayEHCleanup) cleanupKind = InactiveNormalAndEHCleanup; CGF.pushDestroy(cleanupKind, addr, VT, destroyer, useArrayEHCleanup); // Remember where that cleanup was. capture.setCleanup(CGF.EHStack.stable_begin()); } } /// Enter a full-expression with a non-trivial number of objects to /// clean up. This is in this file because, at the moment, the only /// kind of cleanup object is a BlockDecl*. void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) { assert(E->getNumObjects() != 0); for (const ExprWithCleanups::CleanupObject &C : E->getObjects()) enterBlockScope(*this, C); } /// Find the layout for the given block in a linked list and remove it. static CGBlockInfo *findAndRemoveBlockInfo(CGBlockInfo **head, const BlockDecl *block) { while (true) { assert(head && *head); CGBlockInfo *cur = *head; // If this is the block we're looking for, splice it out of the list. if (cur->getBlockDecl() == block) { *head = cur->NextBlockInfo; return cur; } head = &cur->NextBlockInfo; } } /// Destroy a chain of block layouts. void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) { assert(head && "destroying an empty chain"); do { CGBlockInfo *cur = head; head = cur->NextBlockInfo; delete cur; } while (head != nullptr); } /// Emit a block literal expression in the current function. llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { // The block literal is emitted as a global variable, and the block invoke // function has to be extracted from its initializer. if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { return Block; } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; return EmitBlockLiteral(blockInfo); } // Find the block info for this block and take ownership of it. std::unique_ptr blockInfo; blockInfo.reset(findAndRemoveBlockInfo(&FirstBlockInfo, blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; return EmitBlockLiteral(*blockInfo); } llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) return CGM.getAddrOfGlobalBlockIfEmitted(blockInfo.BlockExpression); // Otherwise, we have to emit this as a local block. Address blockAddr = blockInfo.LocalAddress; assert(blockAddr.isValid() && "block has no address!"); llvm::Constant *isa; llvm::Constant *descriptor; BlockFlags flags; if (!IsOpenCL) { // If the block is non-escaping, set field 'isa 'to NSConcreteGlobalBlock // and set the BLOCK_IS_GLOBAL bit of field 'flags'. Copying a non-escaping // block just returns the original block and releasing it is a no-op. llvm::Constant *blockISA = blockInfo.getBlockDecl()->doesNotEscape() ? CGM.getNSConcreteGlobalBlock() : CGM.getNSConcreteStackBlock(); isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy); // Build the block descriptor. descriptor = buildBlockDescriptor(CGM, blockInfo); // Compute the initial on-stack block flags. flags = BLOCK_HAS_SIGNATURE; if (blockInfo.HasCapturedVariableLayout) flags |= BLOCK_HAS_EXTENDED_LAYOUT; if (blockInfo.needsCopyDisposeHelpers()) flags |= BLOCK_HAS_COPY_DISPOSE; if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; if (blockInfo.getBlockDecl()->doesNotEscape()) flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL; } auto projectField = [&](unsigned index, CharUnits offset, const Twine &name) -> Address { return Builder.CreateStructGEP(blockAddr, index, offset, name); }; auto storeField = [&](llvm::Value *value, unsigned index, CharUnits offset, const Twine &name) { Builder.CreateStore(value, projectField(index, offset, name)); }; // Initialize the block header. { // We assume all the header fields are densely packed. unsigned index = 0; CharUnits offset; auto addHeaderField = [&](llvm::Value *value, CharUnits size, const Twine &name) { storeField(value, index, offset, name); offset += size; index++; }; if (!IsOpenCL) { addHeaderField(isa, getPointerSize(), "block.isa"); addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()), getIntSize(), "block.flags"); addHeaderField(llvm::ConstantInt::get(IntTy, 0), getIntSize(), "block.reserved"); } else { addHeaderField( llvm::ConstantInt::get(IntTy, blockInfo.BlockSize.getQuantity()), getIntSize(), "block.size"); addHeaderField( llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } if (!IsOpenCL) { addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy), getPointerSize(), "block.invoke"); addHeaderField(descriptor, getPointerSize(), "block.descriptor"); } else if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { addHeaderField( I.first, CharUnits::fromQuantity( CGM.getDataLayout().getTypeAllocSize(I.first->getType())), I.second); } } } // Finally, capture all the values into the block. const BlockDecl *blockDecl = blockInfo.getBlockDecl(); // First, 'this'. if (blockDecl->capturesCXXThis()) { Address addr = projectField(blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, "block.captured-this.addr"); Builder.CreateStore(LoadCXXThis(), addr); } // Next, captured variables. for (const auto &CI : blockDecl->captures()) { const VarDecl *variable = CI.getVariable(); const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); // Ignore constant captures. if (capture.isConstant()) continue; QualType type = capture.fieldType(); // This will be a [[type]]*, except that a byref entry will just be // an i8**. Address blockField = projectField(capture.getIndex(), capture.getOffset(), "block.captured"); // Compute the address of the thing we're going to move into the // block literal. Address src = Address::invalid(); if (blockDecl->isConversionFromLambda()) { // The lambda capture in a lambda's conversion-to-block-pointer is // special; we'll simply emit it directly. src = Address::invalid(); } else if (CI.isByRef()) { if (BlockInfo && CI.isNested()) { // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = BlockInfo->getCapture(variable); // This is a [[type]]*, except that a byref entry will just be an i8**. src = Builder.CreateStructGEP(LoadBlockStruct(), enclosingCapture.getIndex(), enclosingCapture.getOffset(), "block.capture.addr"); } else { auto I = LocalDeclMap.find(variable); assert(I != LocalDeclMap.end()); src = I->second; } } else { DeclRefExpr declRef(const_cast(variable), /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type.getNonReferenceType(), VK_LValue, SourceLocation()); src = EmitDeclRefLValue(&declRef).getAddress(); }; // For byrefs, we just write the pointer to the byref struct into // the block field. There's no need to chase the forwarding // pointer at this point, since we're building something that will // live a shorter life than the stack byref anyway. if (CI.isByRef()) { // Get a void* that points to the byref struct. llvm::Value *byrefPointer; if (CI.isNested()) byrefPointer = Builder.CreateLoad(src, "byref.capture"); else byrefPointer = Builder.CreateBitCast(src.getPointer(), VoidPtrTy); // Write that void* into the capture field. Builder.CreateStore(byrefPointer, blockField); // If we have a copy constructor, evaluate that into the block field. } else if (const Expr *copyExpr = CI.getCopyExpr()) { if (blockDecl->isConversionFromLambda()) { // If we have a lambda conversion, emit the expression // directly into the block instead. AggValueSlot Slot = AggValueSlot::forAddr(blockField, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap); EmitAggExpr(copyExpr, Slot); } else { EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr); } // If it's a reference variable, copy the reference into the block field. } else if (type->isReferenceType()) { Builder.CreateStore(src.getPointer(), blockField); // If type is const-qualified, copy the value into the block field. } else if (type.isConstQualified() && type.getObjCLifetime() == Qualifiers::OCL_Strong && CGM.getCodeGenOpts().OptimizationLevel != 0) { llvm::Value *value = Builder.CreateLoad(src, "captured"); Builder.CreateStore(value, blockField); // If this is an ARC __strong block-pointer variable, don't do a // block copy. // // TODO: this can be generalized into the normal initialization logic: // we should never need to do a block-copy when initializing a local // variable, because the local variable's lifetime should be strictly // contained within the stack block's. } else if (type.getObjCLifetime() == Qualifiers::OCL_Strong && type->isBlockPointerType()) { // Load the block and do a simple retain. llvm::Value *value = Builder.CreateLoad(src, "block.captured_block"); value = EmitARCRetainNonBlock(value); // Do a primitive store to the block field. Builder.CreateStore(value, blockField); // Otherwise, fake up a POD copy into the block field. } else { // Fake up a new variable so that EmitScalarInit doesn't think // we're referring to the variable in its own initializer. ImplicitParamDecl BlockFieldPseudoVar(getContext(), type, ImplicitParamDecl::Other); // We use one of these or the other depending on whether the // reference is nested. DeclRefExpr declRef(const_cast(variable), /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type, VK_LValue, SourceLocation()); ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue, &declRef, VK_RValue); // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. EmitExprAsInit(&l2r, &BlockFieldPseudoVar, MakeAddrLValue(blockField, type, AlignmentSource::Decl), /*captured by init*/ false); } // Activate the cleanup if layout pushed one. if (!CI.isByRef()) { EHScopeStack::stable_iterator cleanup = capture.getCleanup(); if (cleanup.isValid()) ActivateCleanupBlock(cleanup, blockInfo.DominatingIP); } } // Cast to the converted block-pointer type, which happens (somewhat // unfortunately) to be a pointer to function type. llvm::Value *result = Builder.CreatePointerCast( blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); if (IsOpenCL) { CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn, result); } return result; } llvm::Type *CodeGenModule::getBlockDescriptorType() { if (BlockDescriptorType) return BlockDescriptorType; llvm::Type *UnsignedLongTy = getTypes().ConvertType(getContext().UnsignedLongTy); // struct __block_descriptor { // unsigned long reserved; // unsigned long block_size; // // // later, the following will be added // // struct { // void (*copyHelper)(); // void (*copyHelper)(); // } helpers; // !!! optional // // const char *signature; // the block signature // const char *layout; // reserved // }; BlockDescriptorType = llvm::StructType::create( "struct.__block_descriptor", UnsignedLongTy, UnsignedLongTy); // Now form a pointer to that. unsigned AddrSpace = 0; if (getLangOpts().OpenCL) AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant); BlockDescriptorType = llvm::PointerType::get(BlockDescriptorType, AddrSpace); return BlockDescriptorType; } llvm::Type *CodeGenModule::getGenericBlockLiteralType() { assert(!getLangOpts().OpenCL && "OpenCL does not need this"); if (GenericBlockLiteralType) return GenericBlockLiteralType; llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); // struct __block_literal_generic { // void *__isa; // int __flags; // int __reserved; // void (*__invoke)(void *); // struct __block_descriptor *__descriptor; // }; GenericBlockLiteralType = llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); return GenericBlockLiteralType; } RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); llvm::Value *FuncPtr; if (!CGM.getLangOpts().OpenCL) { // Get a pointer to the generic block literal. llvm::Type *BlockLiteralTy = llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0); // Bitcast the callee to a block literal. BlockPtr = Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); // Get the function pointer from the literal. FuncPtr = Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); } // Add the block literal. CallArgList Args; QualType VoidPtrQualTy = getContext().VoidPtrTy; llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); VoidPtrQualTy = getContext().getPointerType(getContext().getAddrSpaceQualType( getContext().VoidTy, LangAS::opencl_generic)); } BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); Args.add(RValue::get(BlockPtr), VoidPtrQualTy); QualType FnType = BPT->getPointeeType(); // And the rest of the arguments. EmitCallArgs(Args, FnType->getAs(), E->arguments()); // Load the function. llvm::Value *Func; if (CGM.getLangOpts().OpenCL) Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); else Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); const FunctionType *FuncTy = FnType->castAs(); const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBlockFunctionCall(Args, FuncTy); // Cast the function pointer to the right type. llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy); Func = Builder.CreatePointerCast(Func, BlockFTyPtr); // Prepare the callee. CGCallee Callee(CGCalleeInfo(), Func); // And call the block. return EmitCall(FnInfo, Callee, ReturnValue, Args); } Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, bool isByRef) { assert(BlockInfo && "evaluating block ref without block information?"); const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable); // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; Address addr = Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), capture.getOffset(), "block.capture.addr"); if (isByRef) { // addr should be a void** right now. Load, then cast the result // to byref*. auto &byrefInfo = getBlockByrefInfo(variable); addr = Address(Builder.CreateLoad(addr), byrefInfo.ByrefAlignment); auto byrefPointerType = llvm::PointerType::get(byrefInfo.Type, 0); addr = Builder.CreateBitCast(addr, byrefPointerType, "byref.addr"); addr = emitBlockByrefAddress(addr, byrefInfo, /*follow*/ true, variable->getName()); } if (capture.fieldType()->isReferenceType()) addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType())); return addr; } void CodeGenModule::setAddrOfGlobalBlock(const BlockExpr *BE, llvm::Constant *Addr) { bool Ok = EmittedGlobalBlocks.insert(std::make_pair(BE, Addr)).second; (void)Ok; assert(Ok && "Trying to replace an already-existing global block!"); } llvm::Constant * CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE, StringRef Name) { if (llvm::Constant *Block = getAddrOfGlobalBlockIfEmitted(BE)) return Block; CGBlockInfo blockInfo(BE->getBlockDecl(), Name); blockInfo.BlockExpression = BE; // Compute information about the layout, etc., of this block. computeBlockInfo(*this, nullptr, blockInfo); // Using that metadata, generate the actual block function. { CodeGenFunction::DeclMapTy LocalDeclMap; CodeGenFunction(*this).GenerateBlockFunction( GlobalDecl(), blockInfo, LocalDeclMap, /*IsLambdaConversionToBlock*/ false, /*BuildGlobalBlock*/ true); } return getAddrOfGlobalBlockIfEmitted(BE); } static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, const CGBlockInfo &blockInfo, llvm::Constant *blockFn) { assert(blockInfo.CanBeGlobal); // Callers should detect this case on their own: calling this function // generally requires computing layout information, which is a waste of time // if we've already emitted this block. assert(!CGM.getAddrOfGlobalBlockIfEmitted(blockInfo.BlockExpression) && "Refusing to re-emit a global block."); // Generate the constants for the block literal initializer. ConstantInitBuilder builder(CGM); auto fields = builder.beginStruct(); bool IsOpenCL = CGM.getLangOpts().OpenCL; + bool IsWindows = CGM.getTarget().getTriple().isOSWindows(); if (!IsOpenCL) { // isa - fields.add(CGM.getNSConcreteGlobalBlock()); + if (IsWindows) + fields.addNullPointer(CGM.Int8PtrPtrTy); + else + fields.add(CGM.getNSConcreteGlobalBlock()); // __flags BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; fields.addInt(CGM.IntTy, flags.getBitMask()); // Reserved fields.addInt(CGM.IntTy, 0); // Function fields.add(blockFn); } else { fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); } if (!IsOpenCL) { // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); } else if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) { fields.add(I); } } unsigned AddrSpace = 0; if (CGM.getContext().getLangOpts().OpenCL) AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); llvm::Constant *literal = fields.finishAndCreateGlobal( "__block_literal_global", blockInfo.BlockAlign, - /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace); + /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace); + + // Windows does not allow globals to be initialised to point to globals in + // different DLLs. Any such variables must run code to initialise them. + if (IsWindows) { + auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy, + {}), llvm::GlobalValue::InternalLinkage, ".block_isa_init", + &CGM.getModule()); + llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry", + Init)); + b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(), + b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity()); + b.CreateRetVoid(); + // We can't use the normal LLVM global initialisation array, because we + // need to specify that this runs early in library initialisation. + auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), + /*isConstant*/true, llvm::GlobalValue::InternalLinkage, + Init, ".block_isa_init_ptr"); + InitVar->setSection(".CRT$XCLa"); + CGM.addUsedGlobal(InitVar); + } // Return a constant of the appropriately-casted type. llvm::Type *RequiredType = CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType()); llvm::Constant *Result = llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); if (CGM.getContext().getLangOpts().OpenCL) CGM.getOpenCLRuntime().recordBlockInfo( blockInfo.BlockExpression, cast(blockFn->stripPointerCasts()), Result); return Result; } void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum, llvm::Value *arg) { assert(BlockInfo && "not emitting prologue of block invocation function?!"); // Allocate a stack slot like for any local variable to guarantee optimal // debug info at -O0. The mem2reg pass will eliminate it when optimizing. Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr"); Builder.CreateStore(arg, alloc); if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->setLocation(D->getLocation()); DI->EmitDeclareOfBlockLiteralArgVariable( *BlockInfo, D->getName(), argNum, cast(alloc.getPointer()), Builder); } } SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart(); ApplyDebugLocation Scope(*this, StartLoc); // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. BlockPointer = Builder.CreatePointerCast( arg, BlockInfo->StructureType->getPointerTo( getContext().getLangOpts().OpenCL ? getContext().getTargetAddressSpace(LangAS::opencl_generic) : 0), "block"); } Address CodeGenFunction::LoadBlockStruct() { assert(BlockInfo && "not in a block invocation function!"); assert(BlockPointer && "no block pointer set!"); return Address(BlockPointer, BlockInfo->BlockAlign); } llvm::Function * CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &blockInfo, const DeclMapTy &ldm, bool IsLambdaConversionToBlock, bool BuildGlobalBlock) { const BlockDecl *blockDecl = blockInfo.getBlockDecl(); CurGD = GD; CurEHLocation = blockInfo.getBlockExpr()->getLocEnd(); BlockInfo = &blockInfo; // Arrange for local static and local extern declarations to appear // to be local to this function as well, in case they're directly // referenced in a block. for (DeclMapTy::const_iterator i = ldm.begin(), e = ldm.end(); i != e; ++i) { const auto *var = dyn_cast(i->first); if (var && !var->hasLocalStorage()) setAddrOfLocalVar(var, i->second); } // Begin building the function declaration. // Build the argument list. FunctionArgList args; // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; // For OpenCL passed block pointer can be private AS local variable or // global AS program scope variable (for the case with and without captures). // Generic AS is used therefore to be able to accommodate both private and // generic AS in one implementation. if (getLangOpts().OpenCL) selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType( getContext().VoidTy, LangAS::opencl_generic)); IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); ImplicitParamDecl SelfDecl(getContext(), const_cast(blockDecl), SourceLocation(), II, selfTy, ImplicitParamDecl::ObjCSelf); args.push_back(&SelfDecl); // Now add the rest of the parameters. args.append(blockDecl->param_begin(), blockDecl->param_end()); // Create the function declaration. const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType(); const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args); if (CGM.ReturnSlotInterferesWithArgs(fnInfo)) blockInfo.UsesStret = true; llvm::FunctionType *fnLLVMType = CGM.getTypes().GetFunctionType(fnInfo); StringRef name = CGM.getBlockMangledName(GD, blockDecl); llvm::Function *fn = llvm::Function::Create( fnLLVMType, llvm::GlobalValue::InternalLinkage, name, &CGM.getModule()); CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo); if (BuildGlobalBlock) { auto GenVoidPtrTy = getContext().getLangOpts().OpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; buildGlobalBlock(CGM, blockInfo, llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy)); } // Begin generating the function. StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args, blockDecl->getLocation(), blockInfo.getBlockExpr()->getBody()->getLocStart()); // Okay. Undo some of what StartFunction did. // At -O0 we generate an explicit alloca for the BlockPointer, so the RA // won't delete the dbg.declare intrinsics for captured variables. llvm::Value *BlockPointerDbgLoc = BlockPointer; if (CGM.getCodeGenOpts().OptimizationLevel == 0) { // Allocate a stack slot for it, so we can point the debugger to it Address Alloca = CreateTempAlloca(BlockPointer->getType(), getPointerAlign(), "block.addr"); // Set the DebugLocation to empty, so the store is recognized as a // frame setup instruction by llvm::DwarfDebug::beginFunction(). auto NL = ApplyDebugLocation::CreateEmpty(*this); Builder.CreateStore(BlockPointer, Alloca); BlockPointerDbgLoc = Alloca.getPointer(); } // If we have a C++ 'this' reference, go ahead and force it into // existence now. if (blockDecl->capturesCXXThis()) { Address addr = Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, "block.captured-this"); CXXThisValue = Builder.CreateLoad(addr, "this"); } // Also force all the constant captures. for (const auto &CI : blockDecl->captures()) { const VarDecl *variable = CI.getVariable(); const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (!capture.isConstant()) continue; CharUnits align = getContext().getDeclAlign(variable); Address alloca = CreateMemTemp(variable->getType(), align, "block.captured-const"); Builder.CreateStore(capture.getConstant(), alloca); setAddrOfLocalVar(variable, alloca); } // Save a spot to insert the debug information for all the DeclRefExprs. llvm::BasicBlock *entry = Builder.GetInsertBlock(); llvm::BasicBlock::iterator entry_ptr = Builder.GetInsertPoint(); --entry_ptr; if (IsLambdaConversionToBlock) EmitLambdaBlockInvokeBody(); else { PGO.assignRegionCounters(GlobalDecl(blockDecl), fn); incrementProfileCounter(blockDecl->getBody()); EmitStmt(blockDecl->getBody()); } // Remember where we were... llvm::BasicBlock *resume = Builder.GetInsertBlock(); // Go back to the entry. ++entry_ptr; Builder.SetInsertPoint(entry, entry_ptr); // Emit debug information for all the DeclRefExprs. // FIXME: also for 'this' if (CGDebugInfo *DI = getDebugInfo()) { for (const auto &CI : blockDecl->captures()) { const VarDecl *variable = CI.getVariable(); DI->EmitLocation(Builder, variable->getLocation()); if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); if (capture.isConstant()) { auto addr = LocalDeclMap.find(variable)->second; (void)DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(), Builder); continue; } DI->EmitDeclareOfBlockDeclRefVariable( variable, BlockPointerDbgLoc, Builder, blockInfo, entry_ptr == entry->end() ? nullptr : &*entry_ptr); } } // Recover location if it was changed in the above loop. DI->EmitLocation(Builder, cast(blockDecl->getBody())->getRBracLoc()); } // And resume where we left off. if (resume == nullptr) Builder.ClearInsertionPoint(); else Builder.SetInsertPoint(resume); FinishFunction(cast(blockDecl->getBody())->getRBracLoc()); return fn; } namespace { /// Represents a type of copy/destroy operation that should be performed for an /// entity that's captured by a block. enum class BlockCaptureEntityKind { CXXRecord, // Copy or destroy ARCWeak, ARCStrong, NonTrivialCStruct, BlockObject, // Assign or release None }; /// Represents a captured entity that requires extra operations in order for /// this entity to be copied or destroyed correctly. struct BlockCaptureManagedEntity { BlockCaptureEntityKind Kind; BlockFieldFlags Flags; const BlockDecl::Capture &CI; const CGBlockInfo::Capture &Capture; BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags, const BlockDecl::Capture &CI, const CGBlockInfo::Capture &Capture) : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {} }; } // end anonymous namespace static std::pair computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, const LangOptions &LangOpts) { if (CI.getCopyExpr()) { assert(!CI.isByRef()); // don't bother computing flags return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); } BlockFieldFlags Flags; if (CI.isByRef()) { Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); } Flags = BLOCK_FIELD_IS_OBJECT; bool isBlockPointer = T->isBlockPointerType(); if (isBlockPointer) Flags = BLOCK_FIELD_IS_BLOCK; switch (T.isNonTrivialToPrimitiveCopy()) { case QualType::PCK_Struct: return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct, BlockFieldFlags()); case QualType::PCK_ARCWeak: // We need to register __weak direct captures with the runtime. return std::make_pair(BlockCaptureEntityKind::ARCWeak, Flags); case QualType::PCK_ARCStrong: // We need to retain the copied value for __strong direct captures. // If it's a block pointer, we have to copy the block and assign that to // the destination pointer, so we might as well use _Block_object_assign. // Otherwise we can avoid that. return std::make_pair(!isBlockPointer ? BlockCaptureEntityKind::ARCStrong : BlockCaptureEntityKind::BlockObject, Flags); case QualType::PCK_Trivial: case QualType::PCK_VolatileTrivial: { if (!T->isObjCRetainableType()) // For all other types, the memcpy is fine. return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); // Special rules for ARC captures: Qualifiers QS = T.getQualifiers(); // Non-ARC captures of retainable pointers are strong and // therefore require a call to _Block_object_assign. if (!QS.getObjCLifetime() && !LangOpts.ObjCAutoRefCount) return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); // Otherwise the memcpy is fine. return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); } } llvm_unreachable("after exhaustive PrimitiveCopyKind switch"); } /// Find the set of block captures that need to be explicitly copied or destroy. static void findBlockCapturedManagedEntities( const CGBlockInfo &BlockInfo, const LangOptions &LangOpts, SmallVectorImpl &ManagedCaptures, llvm::function_ref( const BlockDecl::Capture &, QualType, const LangOptions &)> Predicate) { for (const auto &CI : BlockInfo.getBlockDecl()->captures()) { const VarDecl *Variable = CI.getVariable(); const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable); if (Capture.isConstant()) continue; auto Info = Predicate(CI, Variable->getType(), LangOpts); if (Info.first != BlockCaptureEntityKind::None) ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture); } } namespace { /// Release a __block variable. struct CallBlockRelease final : EHScopeStack::Cleanup { Address Addr; BlockFieldFlags FieldFlags; bool LoadBlockVarAddr; CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue) : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *BlockVarAddr; if (LoadBlockVarAddr) { BlockVarAddr = CGF.Builder.CreateLoad(Addr); BlockVarAddr = CGF.Builder.CreateBitCast(BlockVarAddr, CGF.VoidPtrTy); } else { BlockVarAddr = Addr.getPointer(); } CGF.BuildBlockRelease(BlockVarAddr, FieldFlags); } }; } // end anonymous namespace static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind, Address Field, QualType CaptureType, BlockFieldFlags Flags, bool EHOnly, CodeGenFunction &CGF) { switch (CaptureKind) { case BlockCaptureEntityKind::CXXRecord: case BlockCaptureEntityKind::ARCWeak: case BlockCaptureEntityKind::NonTrivialCStruct: case BlockCaptureEntityKind::ARCStrong: { if (CaptureType.isDestructedType() && (!EHOnly || CGF.needsEHCleanup(CaptureType.isDestructedType()))) { CodeGenFunction::Destroyer *Destroyer = CaptureKind == BlockCaptureEntityKind::ARCStrong ? CodeGenFunction::destroyARCStrongImprecise : CGF.getDestroyer(CaptureType.isDestructedType()); CleanupKind Kind = EHOnly ? EHCleanup : CGF.getCleanupKind(CaptureType.isDestructedType()); CGF.pushDestroy(Kind, Field, CaptureType, Destroyer, Kind & EHCleanup); } break; } case BlockCaptureEntityKind::BlockObject: { if (!EHOnly || CGF.getLangOpts().Exceptions) { CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup; CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true); } break; } case BlockCaptureEntityKind::None: llvm_unreachable("unexpected BlockCaptureEntityKind"); } } /// Generate the copy-helper function for a block closure object: /// static void block_copy_helper(block_t *dst, block_t *src); /// The runtime will have previously initialized 'dst' by doing a /// bit-copy of 'src'. /// /// Note that this copies an entire block closure object to the heap; /// it should not be confused with a 'byref copy helper', which moves /// the contents of an individual __block variable to the heap. llvm::Constant * CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); FunctionArgList args; ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&DstDecl); ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); // FIXME: it would be nice if these were mergeable with things with // identical semantics. llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_block_", &CGM.getModule()); IdentifierInfo *II = &CGM.getContext().Idents.get("__copy_helper_block_"); FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, C.VoidTy, nullptr, SC_Static, false, false); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); StartFunction(FD, C.VoidTy, Fn, FI, args); ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); Address src = GetAddrOfLocalVar(&SrcDecl); src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block.source"); Address dst = GetAddrOfLocalVar(&DstDecl); dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign); dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest"); SmallVector CopiedCaptures; findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures, computeCopyInfoForBlockCapture); for (const auto &CopiedCapture : CopiedCaptures) { const BlockDecl::Capture &CI = CopiedCapture.CI; const CGBlockInfo::Capture &capture = CopiedCapture.Capture; QualType captureType = CI.getVariable()->getType(); BlockFieldFlags flags = CopiedCapture.Flags; unsigned index = capture.getIndex(); Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); // If there's an explicit copy expression, we do that. if (CI.getCopyExpr()) { assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord); EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr()); } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { EmitARCCopyWeak(dstField, srcField); // If this is a C struct that requires non-trivial copy construction, emit a // call to its copy constructor. } else if (CopiedCapture.Kind == BlockCaptureEntityKind::NonTrivialCStruct) { QualType varType = CI.getVariable()->getType(); callCStructCopyConstructor(MakeAddrLValue(dstField, varType), MakeAddrLValue(srcField, varType)); } else { llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { // At -O0, store null into the destination field (so that the // storeStrong doesn't over-release) and then call storeStrong. // This is a workaround to not having an initStrong call. if (CGM.getCodeGenOpts().OptimizationLevel == 0) { auto *ty = cast(srcValue->getType()); llvm::Value *null = llvm::ConstantPointerNull::get(ty); Builder.CreateStore(null, dstField); EmitARCStoreStrongCall(dstField, srcValue, true); // With optimization enabled, take advantage of the fact that // the blocks runtime guarantees a memcpy of the block data, and // just emit a retain of the src field. } else { EmitARCRetainNonBlock(srcValue); // Unless EH cleanup is required, we don't need this anymore, so kill // it. It's not quite worth the annoyance to avoid creating it in the // first place. if (!needsEHCleanup(captureType.isDestructedType())) cast(dstField.getPointer())->eraseFromParent(); } } else { assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject); srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy); llvm::Value *dstAddr = Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy); llvm::Value *args[] = { dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) }; const VarDecl *variable = CI.getVariable(); bool copyCanThrow = false; if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) { const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(variable); if (copyExpr) { copyCanThrow = true; // FIXME: reuse the noexcept logic } } if (copyCanThrow) { EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args); } else { EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args); } } } // Ensure that we destroy the copied object if an exception is thrown later // in the helper function. pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true, *this); } FinishFunction(); return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } static BlockFieldFlags getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI, QualType T) { BlockFieldFlags Flags = BLOCK_FIELD_IS_OBJECT; if (T->isBlockPointerType()) Flags = BLOCK_FIELD_IS_BLOCK; return Flags; } static std::pair computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, const LangOptions &LangOpts) { if (CI.isByRef()) { BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; return std::make_pair(BlockCaptureEntityKind::BlockObject, Flags); } switch (T.isDestructedType()) { case QualType::DK_cxx_destructor: return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); case QualType::DK_objc_strong_lifetime: // Use objc_storeStrong for __strong direct captures; the // dynamic tools really like it when we do this. return std::make_pair(BlockCaptureEntityKind::ARCStrong, getBlockFieldFlagsForObjCObjectPointer(CI, T)); case QualType::DK_objc_weak_lifetime: // Support __weak direct captures. return std::make_pair(BlockCaptureEntityKind::ARCWeak, getBlockFieldFlagsForObjCObjectPointer(CI, T)); case QualType::DK_nontrivial_c_struct: return std::make_pair(BlockCaptureEntityKind::NonTrivialCStruct, BlockFieldFlags()); case QualType::DK_none: { // Non-ARC captures are strong, and we need to use _Block_object_dispose. if (T->isObjCRetainableType() && !T.getQualifiers().hasObjCLifetime() && !LangOpts.ObjCAutoRefCount) return std::make_pair(BlockCaptureEntityKind::BlockObject, getBlockFieldFlagsForObjCObjectPointer(CI, T)); // Otherwise, we have nothing to do. return std::make_pair(BlockCaptureEntityKind::None, BlockFieldFlags()); } } llvm_unreachable("after exhaustive DestructionKind switch"); } /// Generate the destroy-helper function for a block closure object: /// static void block_destroy_helper(block_t *theBlock); /// /// Note that this destroys a heap-allocated block closure object; /// it should not be confused with a 'byref destroy helper', which /// destroys the heap-allocated contents of an individual __block /// variable. llvm::Constant * CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { ASTContext &C = getContext(); FunctionArgList args; ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, "__destroy_helper_block_", &CGM.getModule()); IdentifierInfo *II = &CGM.getContext().Idents.get("__destroy_helper_block_"); FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, C.VoidTy, nullptr, SC_Static, false, false); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); StartFunction(FD, C.VoidTy, Fn, FI, args); ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); Address src = GetAddrOfLocalVar(&SrcDecl); src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign); src = Builder.CreateBitCast(src, structPtrTy, "block"); CodeGenFunction::RunCleanupsScope cleanups(*this); SmallVector DestroyedCaptures; findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures, computeDestroyInfoForBlockCapture); for (const auto &DestroyedCapture : DestroyedCaptures) { const BlockDecl::Capture &CI = DestroyedCapture.CI; const CGBlockInfo::Capture &capture = DestroyedCapture.Capture; BlockFieldFlags flags = DestroyedCapture.Flags; Address srcField = Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); pushCaptureCleanup(DestroyedCapture.Kind, srcField, CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this); } cleanups.ForceCleanup(); FinishFunction(); return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } namespace { /// Emits the copy/dispose helper functions for a __block object of id type. class ObjectByrefHelpers final : public BlockByrefHelpers { BlockFieldFlags Flags; public: ObjectByrefHelpers(CharUnits alignment, BlockFieldFlags flags) : BlockByrefHelpers(alignment), Flags(flags) {} void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { destField = CGF.Builder.CreateBitCast(destField, CGF.VoidPtrTy); srcField = CGF.Builder.CreateBitCast(srcField, CGF.VoidPtrPtrTy); llvm::Value *srcValue = CGF.Builder.CreateLoad(srcField); unsigned flags = (Flags | BLOCK_BYREF_CALLER).getBitMask(); llvm::Value *flagsVal = llvm::ConstantInt::get(CGF.Int32Ty, flags); llvm::Value *fn = CGF.CGM.getBlockObjectAssign(); llvm::Value *args[] = { destField.getPointer(), srcValue, flagsVal }; CGF.EmitNounwindRuntimeCall(fn, args); } void emitDispose(CodeGenFunction &CGF, Address field) override { field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0)); llvm::Value *value = CGF.Builder.CreateLoad(field); CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER); } void profileImpl(llvm::FoldingSetNodeID &id) const override { id.AddInteger(Flags.getBitMask()); } }; /// Emits the copy/dispose helpers for an ARC __block __weak variable. class ARCWeakByrefHelpers final : public BlockByrefHelpers { public: ARCWeakByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {} void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { CGF.EmitARCMoveWeak(destField, srcField); } void emitDispose(CodeGenFunction &CGF, Address field) override { CGF.EmitARCDestroyWeak(field); } void profileImpl(llvm::FoldingSetNodeID &id) const override { // 0 is distinguishable from all pointers and byref flags id.AddInteger(0); } }; /// Emits the copy/dispose helpers for an ARC __block __strong variable /// that's not of block-pointer type. class ARCStrongByrefHelpers final : public BlockByrefHelpers { public: ARCStrongByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {} void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { // Do a "move" by copying the value and then zeroing out the old // variable. llvm::Value *value = CGF.Builder.CreateLoad(srcField); llvm::Value *null = llvm::ConstantPointerNull::get(cast(value->getType())); if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) { CGF.Builder.CreateStore(null, destField); CGF.EmitARCStoreStrongCall(destField, value, /*ignored*/ true); CGF.EmitARCStoreStrongCall(srcField, null, /*ignored*/ true); return; } CGF.Builder.CreateStore(value, destField); CGF.Builder.CreateStore(null, srcField); } void emitDispose(CodeGenFunction &CGF, Address field) override { CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime); } void profileImpl(llvm::FoldingSetNodeID &id) const override { // 1 is distinguishable from all pointers and byref flags id.AddInteger(1); } }; /// Emits the copy/dispose helpers for an ARC __block __strong /// variable that's of block-pointer type. class ARCStrongBlockByrefHelpers final : public BlockByrefHelpers { public: ARCStrongBlockByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {} void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { // Do the copy with objc_retainBlock; that's all that // _Block_object_assign would do anyway, and we'd have to pass the // right arguments to make sure it doesn't get no-op'ed. llvm::Value *oldValue = CGF.Builder.CreateLoad(srcField); llvm::Value *copy = CGF.EmitARCRetainBlock(oldValue, /*mandatory*/ true); CGF.Builder.CreateStore(copy, destField); } void emitDispose(CodeGenFunction &CGF, Address field) override { CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime); } void profileImpl(llvm::FoldingSetNodeID &id) const override { // 2 is distinguishable from all pointers and byref flags id.AddInteger(2); } }; /// Emits the copy/dispose helpers for a __block variable with a /// nontrivial copy constructor or destructor. class CXXByrefHelpers final : public BlockByrefHelpers { QualType VarType; const Expr *CopyExpr; public: CXXByrefHelpers(CharUnits alignment, QualType type, const Expr *copyExpr) : BlockByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {} bool needsCopy() const override { return CopyExpr != nullptr; } void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { if (!CopyExpr) return; CGF.EmitSynthesizedCXXCopyCtor(destField, srcField, CopyExpr); } void emitDispose(CodeGenFunction &CGF, Address field) override { EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin(); CGF.PushDestructorCleanup(VarType, field); CGF.PopCleanupBlocks(cleanupDepth); } void profileImpl(llvm::FoldingSetNodeID &id) const override { id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr()); } }; /// Emits the copy/dispose helpers for a __block variable that is a non-trivial /// C struct. class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers { QualType VarType; public: NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type) : BlockByrefHelpers(alignment), VarType(type) {} void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { CGF.callCStructMoveConstructor(CGF.MakeAddrLValue(destField, VarType), CGF.MakeAddrLValue(srcField, VarType)); } bool needsDispose() const override { return VarType.isDestructedType(); } void emitDispose(CodeGenFunction &CGF, Address field) override { EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin(); CGF.pushDestroy(VarType.isDestructedType(), field, VarType); CGF.PopCleanupBlocks(cleanupDepth); } void profileImpl(llvm::FoldingSetNodeID &id) const override { id.AddPointer(VarType.getCanonicalType().getAsOpaquePtr()); } }; } // end anonymous namespace static llvm::Constant * generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, BlockByrefHelpers &generator) { ASTContext &Context = CGF.getContext(); QualType R = Context.VoidTy; FunctionArgList args; ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Dst); ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Src); const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, "__Block_byref_object_copy_", &CGF.CGM.getModule()); IdentifierInfo *II = &Context.Idents.get("__Block_byref_object_copy_"); FunctionDecl *FD = FunctionDecl::Create(Context, Context.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, R, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); CGF.StartFunction(FD, R, Fn, FI, args); if (generator.needsCopy()) { llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0); // dst->x Address destField = CGF.GetAddrOfLocalVar(&Dst); destField = Address(CGF.Builder.CreateLoad(destField), byrefInfo.ByrefAlignment); destField = CGF.Builder.CreateBitCast(destField, byrefPtrType); destField = CGF.emitBlockByrefAddress(destField, byrefInfo, false, "dest-object"); // src->x Address srcField = CGF.GetAddrOfLocalVar(&Src); srcField = Address(CGF.Builder.CreateLoad(srcField), byrefInfo.ByrefAlignment); srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType); srcField = CGF.emitBlockByrefAddress(srcField, byrefInfo, false, "src-object"); generator.emitCopy(CGF, destField, srcField); } CGF.FinishFunction(); return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy); } /// Build the copy helper for a __block variable. static llvm::Constant *buildByrefCopyHelper(CodeGenModule &CGM, const BlockByrefInfo &byrefInfo, BlockByrefHelpers &generator) { CodeGenFunction CGF(CGM); return generateByrefCopyHelper(CGF, byrefInfo, generator); } /// Generate code for a __block variable's dispose helper. static llvm::Constant * generateByrefDisposeHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, BlockByrefHelpers &generator) { ASTContext &Context = CGF.getContext(); QualType R = Context.VoidTy; FunctionArgList args; ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Src); const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, "__Block_byref_object_dispose_", &CGF.CGM.getModule()); IdentifierInfo *II = &Context.Idents.get("__Block_byref_object_dispose_"); FunctionDecl *FD = FunctionDecl::Create(Context, Context.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, R, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); CGF.StartFunction(FD, R, Fn, FI, args); if (generator.needsDispose()) { Address addr = CGF.GetAddrOfLocalVar(&Src); addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.ByrefAlignment); auto byrefPtrType = byrefInfo.Type->getPointerTo(0); addr = CGF.Builder.CreateBitCast(addr, byrefPtrType); addr = CGF.emitBlockByrefAddress(addr, byrefInfo, false, "object"); generator.emitDispose(CGF, addr); } CGF.FinishFunction(); return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy); } /// Build the dispose helper for a __block variable. static llvm::Constant *buildByrefDisposeHelper(CodeGenModule &CGM, const BlockByrefInfo &byrefInfo, BlockByrefHelpers &generator) { CodeGenFunction CGF(CGM); return generateByrefDisposeHelper(CGF, byrefInfo, generator); } /// Lazily build the copy and dispose helpers for a __block variable /// with the given information. template static T *buildByrefHelpers(CodeGenModule &CGM, const BlockByrefInfo &byrefInfo, T &&generator) { llvm::FoldingSetNodeID id; generator.Profile(id); void *insertPos; BlockByrefHelpers *node = CGM.ByrefHelpersCache.FindNodeOrInsertPos(id, insertPos); if (node) return static_cast(node); generator.CopyHelper = buildByrefCopyHelper(CGM, byrefInfo, generator); generator.DisposeHelper = buildByrefDisposeHelper(CGM, byrefInfo, generator); T *copy = new (CGM.getContext()) T(std::forward(generator)); CGM.ByrefHelpersCache.InsertNode(copy, insertPos); return copy; } /// Build the copy and dispose helpers for the given __block variable /// emission. Places the helpers in the global cache. Returns null /// if no helpers are required. BlockByrefHelpers * CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType, const AutoVarEmission &emission) { const VarDecl &var = *emission.Variable; QualType type = var.getType(); auto &byrefInfo = getBlockByrefInfo(&var); // The alignment we care about for the purposes of uniquing byref // helpers is the alignment of the actual byref value field. CharUnits valueAlignment = byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset); if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) { const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var); if (!copyExpr && record->hasTrivialDestructor()) return nullptr; return ::buildByrefHelpers( CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr)); } // If type is a non-trivial C struct type that is non-trivial to // destructly move or destroy, build the copy and dispose helpers. if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct || type.isDestructedType() == QualType::DK_nontrivial_c_struct) return ::buildByrefHelpers( CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type)); // Otherwise, if we don't have a retainable type, there's nothing to do. // that the runtime does extra copies. if (!type->isObjCRetainableType()) return nullptr; Qualifiers qs = type.getQualifiers(); // If we have lifetime, that dominates. if (Qualifiers::ObjCLifetime lifetime = qs.getObjCLifetime()) { switch (lifetime) { case Qualifiers::OCL_None: llvm_unreachable("impossible"); // These are just bits as far as the runtime is concerned. case Qualifiers::OCL_ExplicitNone: case Qualifiers::OCL_Autoreleasing: return nullptr; // Tell the runtime that this is ARC __weak, called by the // byref routines. case Qualifiers::OCL_Weak: return ::buildByrefHelpers(CGM, byrefInfo, ARCWeakByrefHelpers(valueAlignment)); // ARC __strong __block variables need to be retained. case Qualifiers::OCL_Strong: // Block pointers need to be copied, and there's no direct // transfer possible. if (type->isBlockPointerType()) { return ::buildByrefHelpers(CGM, byrefInfo, ARCStrongBlockByrefHelpers(valueAlignment)); // Otherwise, we transfer ownership of the retain from the stack // to the heap. } else { return ::buildByrefHelpers(CGM, byrefInfo, ARCStrongByrefHelpers(valueAlignment)); } } llvm_unreachable("fell out of lifetime switch!"); } BlockFieldFlags flags; if (type->isBlockPointerType()) { flags |= BLOCK_FIELD_IS_BLOCK; } else if (CGM.getContext().isObjCNSObjectType(type) || type->isObjCObjectPointerType()) { flags |= BLOCK_FIELD_IS_OBJECT; } else { return nullptr; } if (type.isObjCGCWeak()) flags |= BLOCK_FIELD_IS_WEAK; return ::buildByrefHelpers(CGM, byrefInfo, ObjectByrefHelpers(valueAlignment, flags)); } Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr, const VarDecl *var, bool followForward) { auto &info = getBlockByrefInfo(var); return emitBlockByrefAddress(baseAddr, info, followForward, var->getName()); } Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr, const BlockByrefInfo &info, bool followForward, const llvm::Twine &name) { // Chase the forwarding address if requested. if (followForward) { Address forwardingAddr = Builder.CreateStructGEP(baseAddr, 1, getPointerSize(), "forwarding"); baseAddr = Address(Builder.CreateLoad(forwardingAddr), info.ByrefAlignment); } return Builder.CreateStructGEP(baseAddr, info.FieldIndex, info.FieldOffset, name); } /// BuildByrefInfo - This routine changes a __block variable declared as T x /// into: /// /// struct { /// void *__isa; /// void *__forwarding; /// int32_t __flags; /// int32_t __size; /// void *__copy_helper; // only if needed /// void *__destroy_helper; // only if needed /// void *__byref_variable_layout;// only if needed /// char padding[X]; // only if needed /// T x; /// } x /// const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { auto it = BlockByrefInfos.find(D); if (it != BlockByrefInfos.end()) return it->second; llvm::StructType *byrefType = llvm::StructType::create(getLLVMContext(), "struct.__block_byref_" + D->getNameAsString()); QualType Ty = D->getType(); CharUnits size; SmallVector types; // void *__isa; types.push_back(Int8PtrTy); size += getPointerSize(); // void *__forwarding; types.push_back(llvm::PointerType::getUnqual(byrefType)); size += getPointerSize(); // int32_t __flags; types.push_back(Int32Ty); size += CharUnits::fromQuantity(4); // int32_t __size; types.push_back(Int32Ty); size += CharUnits::fromQuantity(4); // Note that this must match *exactly* the logic in buildByrefHelpers. bool hasCopyAndDispose = getContext().BlockRequiresCopying(Ty, D); if (hasCopyAndDispose) { /// void *__copy_helper; types.push_back(Int8PtrTy); size += getPointerSize(); /// void *__destroy_helper; types.push_back(Int8PtrTy); size += getPointerSize(); } bool HasByrefExtendedLayout = false; Qualifiers::ObjCLifetime Lifetime; if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) && HasByrefExtendedLayout) { /// void *__byref_variable_layout; types.push_back(Int8PtrTy); size += CharUnits::fromQuantity(PointerSizeInBytes); } // T x; llvm::Type *varTy = ConvertTypeForMem(Ty); bool packed = false; CharUnits varAlign = getContext().getDeclAlign(D); CharUnits varOffset = size.alignTo(varAlign); // We may have to insert padding. if (varOffset != size) { llvm::Type *paddingTy = llvm::ArrayType::get(Int8Ty, (varOffset - size).getQuantity()); types.push_back(paddingTy); size = varOffset; // Conversely, we might have to prevent LLVM from inserting padding. } else if (CGM.getDataLayout().getABITypeAlignment(varTy) > varAlign.getQuantity()) { packed = true; } types.push_back(varTy); byrefType->setBody(types, packed); BlockByrefInfo info; info.Type = byrefType; info.FieldIndex = types.size() - 1; info.FieldOffset = varOffset; info.ByrefAlignment = std::max(varAlign, getPointerAlign()); auto pair = BlockByrefInfos.insert({D, info}); assert(pair.second && "info was inserted recursively?"); return pair.first->second; } /// Initialize the structural components of a __block variable, i.e. /// everything but the actual object. void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { // Find the address of the local. Address addr = emission.Addr; // That's an alloca of the byref structure type. llvm::StructType *byrefType = cast( cast(addr.getPointer()->getType())->getElementType()); unsigned nextHeaderIndex = 0; CharUnits nextHeaderOffset; auto storeHeaderField = [&](llvm::Value *value, CharUnits fieldSize, const Twine &name) { auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, nextHeaderOffset, name); Builder.CreateStore(value, fieldAddr); nextHeaderIndex++; nextHeaderOffset += fieldSize; }; // Build the byref helpers if necessary. This is null if we don't need any. BlockByrefHelpers *helpers = buildByrefHelpers(*byrefType, emission); const VarDecl &D = *emission.Variable; QualType type = D.getType(); bool HasByrefExtendedLayout; Qualifiers::ObjCLifetime ByrefLifetime; bool ByRefHasLifetime = getContext().getByrefLifetime(type, ByrefLifetime, HasByrefExtendedLayout); llvm::Value *V; // Initialize the 'isa', which is just 0 or 1. int isa = 0; if (type.isObjCGCWeak()) isa = 1; V = Builder.CreateIntToPtr(Builder.getInt32(isa), Int8PtrTy, "isa"); storeHeaderField(V, getPointerSize(), "byref.isa"); // Store the address of the variable into its own forwarding pointer. storeHeaderField(addr.getPointer(), getPointerSize(), "byref.forwarding"); // Blocks ABI: // c) the flags field is set to either 0 if no helper functions are // needed or BLOCK_BYREF_HAS_COPY_DISPOSE if they are, BlockFlags flags; if (helpers) flags |= BLOCK_BYREF_HAS_COPY_DISPOSE; if (ByRefHasLifetime) { if (HasByrefExtendedLayout) flags |= BLOCK_BYREF_LAYOUT_EXTENDED; else switch (ByrefLifetime) { case Qualifiers::OCL_Strong: flags |= BLOCK_BYREF_LAYOUT_STRONG; break; case Qualifiers::OCL_Weak: flags |= BLOCK_BYREF_LAYOUT_WEAK; break; case Qualifiers::OCL_ExplicitNone: flags |= BLOCK_BYREF_LAYOUT_UNRETAINED; break; case Qualifiers::OCL_None: if (!type->isObjCObjectPointerType() && !type->isBlockPointerType()) flags |= BLOCK_BYREF_LAYOUT_NON_OBJECT; break; default: break; } if (CGM.getLangOpts().ObjCGCBitmapPrint) { printf("\n Inline flag for BYREF variable layout (%d):", flags.getBitMask()); if (flags & BLOCK_BYREF_HAS_COPY_DISPOSE) printf(" BLOCK_BYREF_HAS_COPY_DISPOSE"); if (flags & BLOCK_BYREF_LAYOUT_MASK) { BlockFlags ThisFlag(flags.getBitMask() & BLOCK_BYREF_LAYOUT_MASK); if (ThisFlag == BLOCK_BYREF_LAYOUT_EXTENDED) printf(" BLOCK_BYREF_LAYOUT_EXTENDED"); if (ThisFlag == BLOCK_BYREF_LAYOUT_STRONG) printf(" BLOCK_BYREF_LAYOUT_STRONG"); if (ThisFlag == BLOCK_BYREF_LAYOUT_WEAK) printf(" BLOCK_BYREF_LAYOUT_WEAK"); if (ThisFlag == BLOCK_BYREF_LAYOUT_UNRETAINED) printf(" BLOCK_BYREF_LAYOUT_UNRETAINED"); if (ThisFlag == BLOCK_BYREF_LAYOUT_NON_OBJECT) printf(" BLOCK_BYREF_LAYOUT_NON_OBJECT"); } printf("\n"); } } storeHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()), getIntSize(), "byref.flags"); CharUnits byrefSize = CGM.GetTargetTypeStoreSize(byrefType); V = llvm::ConstantInt::get(IntTy, byrefSize.getQuantity()); storeHeaderField(V, getIntSize(), "byref.size"); if (helpers) { storeHeaderField(helpers->CopyHelper, getPointerSize(), "byref.copyHelper"); storeHeaderField(helpers->DisposeHelper, getPointerSize(), "byref.disposeHelper"); } if (ByRefHasLifetime && HasByrefExtendedLayout) { auto layoutInfo = CGM.getObjCRuntime().BuildByrefLayout(CGM, type); storeHeaderField(layoutInfo, getPointerSize(), "byref.layout"); } } void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) { llvm::Value *F = CGM.getBlockObjectDispose(); llvm::Value *args[] = { Builder.CreateBitCast(V, Int8PtrTy), llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) }; EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors? } void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags, bool LoadBlockVarAddr) { EHStack.pushCleanup(Kind, Addr, Flags, LoadBlockVarAddr); } /// Adjust the declaration of something from the blocks API. static void configureBlocksRuntimeObject(CodeGenModule &CGM, llvm::Constant *C) { auto *GV = cast(C->stripPointerCasts()); if (CGM.getTarget().getTriple().isOSBinFormatCOFF()) { IdentifierInfo &II = CGM.getContext().Idents.get(C->getName()); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); assert((isa(C->stripPointerCasts()) || isa(C->stripPointerCasts())) && "expected Function or GlobalVariable"); const NamedDecl *ND = nullptr; for (const auto &Result : DC->lookup(&II)) if ((ND = dyn_cast(Result)) || (ND = dyn_cast(Result))) break; // TODO: support static blocks runtime if (GV->isDeclaration() && (!ND || !ND->hasAttr())) { GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } else { GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } } if (CGM.getLangOpts().BlocksRuntimeOptional && GV->isDeclaration() && GV->hasExternalLinkage()) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); CGM.setDSOLocal(GV); } llvm::Constant *CodeGenModule::getBlockObjectDispose() { if (BlockObjectDispose) return BlockObjectDispose; llvm::Type *args[] = { Int8PtrTy, Int32Ty }; llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectDispose = CreateRuntimeFunction(fty, "_Block_object_dispose"); configureBlocksRuntimeObject(*this, BlockObjectDispose); return BlockObjectDispose; } llvm::Constant *CodeGenModule::getBlockObjectAssign() { if (BlockObjectAssign) return BlockObjectAssign; llvm::Type *args[] = { Int8PtrTy, Int8PtrTy, Int32Ty }; llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectAssign = CreateRuntimeFunction(fty, "_Block_object_assign"); configureBlocksRuntimeObject(*this, BlockObjectAssign); return BlockObjectAssign; } llvm::Constant *CodeGenModule::getNSConcreteGlobalBlock() { if (NSConcreteGlobalBlock) return NSConcreteGlobalBlock; NSConcreteGlobalBlock = GetOrCreateLLVMGlobal("_NSConcreteGlobalBlock", Int8PtrTy->getPointerTo(), nullptr); configureBlocksRuntimeObject(*this, NSConcreteGlobalBlock); return NSConcreteGlobalBlock; } llvm::Constant *CodeGenModule::getNSConcreteStackBlock() { if (NSConcreteStackBlock) return NSConcreteStackBlock; NSConcreteStackBlock = GetOrCreateLLVMGlobal("_NSConcreteStackBlock", Int8PtrTy->getPointerTo(), nullptr); configureBlocksRuntimeObject(*this, NSConcreteStackBlock); return NSConcreteStackBlock; } Index: projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp (revision 337645) @@ -1,3939 +1,3909 @@ //===------- CGObjCGNU.cpp - Emit LLVM Code from ASTs for a Module --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This provides Objective-C code generation targeting the GNU runtime. The // class in this file generates structures used by the GNU Objective-C runtime // library. These structures are defined in objc/objc.h and objc/objc-api.h in // the GNU runtime distribution. // //===----------------------------------------------------------------------===// #include "CGObjCRuntime.h" #include "CGCleanup.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConvertUTF.h" #include using namespace clang; using namespace CodeGen; namespace { std::string SymbolNameForMethod( StringRef ClassName, StringRef CategoryName, const Selector MethodName, bool isClassMethod) { std::string MethodNameColonStripped = MethodName.getAsString(); std::replace(MethodNameColonStripped.begin(), MethodNameColonStripped.end(), ':', '_'); return (Twine(isClassMethod ? "_c_" : "_i_") + ClassName + "_" + CategoryName + "_" + MethodNameColonStripped).str(); } /// Class that lazily initialises the runtime function. Avoids inserting the /// types and the function declaration into a module if they're not used, and /// avoids constructing the type more than once if it's used more than once. class LazyRuntimeFunction { CodeGenModule *CGM; llvm::FunctionType *FTy; const char *FunctionName; llvm::Constant *Function; public: /// Constructor leaves this class uninitialized, because it is intended to /// be used as a field in another class and not all of the types that are /// used as arguments will necessarily be available at construction time. LazyRuntimeFunction() : CGM(nullptr), FunctionName(nullptr), Function(nullptr) {} /// Initialises the lazy function with the name, return type, and the types /// of the arguments. template void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, Tys *... Types) { CGM = Mod; FunctionName = name; Function = nullptr; if(sizeof...(Tys)) { SmallVector ArgTys({Types...}); FTy = llvm::FunctionType::get(RetTy, ArgTys, false); } else { FTy = llvm::FunctionType::get(RetTy, None, false); } } llvm::FunctionType *getType() { return FTy; } /// Overloaded cast operator, allows the class to be implicitly cast to an /// LLVM constant. operator llvm::Constant *() { if (!Function) { if (!FunctionName) return nullptr; Function = CGM->CreateRuntimeFunction(FTy, FunctionName); } return Function; } operator llvm::Function *() { return cast((llvm::Constant *)*this); } }; /// GNU Objective-C runtime code generation. This class implements the parts of /// Objective-C support that are specific to the GNU family of runtimes (GCC, /// GNUstep and ObjFW). class CGObjCGNU : public CGObjCRuntime { protected: /// The LLVM module into which output is inserted llvm::Module &TheModule; /// strut objc_super. Used for sending messages to super. This structure /// contains the receiver (object) and the expected class. llvm::StructType *ObjCSuperTy; /// struct objc_super*. The type of the argument to the superclass message /// lookup functions. llvm::PointerType *PtrToObjCSuperTy; /// LLVM type for selectors. Opaque pointer (i8*) unless a header declaring /// SEL is included in a header somewhere, in which case it will be whatever /// type is declared in that header, most likely {i8*, i8*}. llvm::PointerType *SelectorTy; /// LLVM i8 type. Cached here to avoid repeatedly getting it in all of the /// places where it's used llvm::IntegerType *Int8Ty; /// Pointer to i8 - LLVM type of char*, for all of the places where the /// runtime needs to deal with C strings. llvm::PointerType *PtrToInt8Ty; /// struct objc_protocol type llvm::StructType *ProtocolTy; /// Protocol * type. llvm::PointerType *ProtocolPtrTy; /// Instance Method Pointer type. This is a pointer to a function that takes, /// at a minimum, an object and a selector, and is the generic type for /// Objective-C methods. Due to differences between variadic / non-variadic /// calling conventions, it must always be cast to the correct type before /// actually being used. llvm::PointerType *IMPTy; /// Type of an untyped Objective-C object. Clang treats id as a built-in type /// when compiling Objective-C code, so this may be an opaque pointer (i8*), /// but if the runtime header declaring it is included then it may be a /// pointer to a structure. llvm::PointerType *IdTy; /// Pointer to a pointer to an Objective-C object. Used in the new ABI /// message lookup function and some GC-related functions. llvm::PointerType *PtrToIdTy; /// The clang type of id. Used when using the clang CGCall infrastructure to /// call Objective-C methods. CanQualType ASTIdTy; /// LLVM type for C int type. llvm::IntegerType *IntTy; /// LLVM type for an opaque pointer. This is identical to PtrToInt8Ty, but is /// used in the code to document the difference between i8* meaning a pointer /// to a C string and i8* meaning a pointer to some opaque type. llvm::PointerType *PtrTy; /// LLVM type for C long type. The runtime uses this in a lot of places where /// it should be using intptr_t, but we can't fix this without breaking /// compatibility with GCC... llvm::IntegerType *LongTy; /// LLVM type for C size_t. Used in various runtime data structures. llvm::IntegerType *SizeTy; /// LLVM type for C intptr_t. llvm::IntegerType *IntPtrTy; /// LLVM type for C ptrdiff_t. Mainly used in property accessor functions. llvm::IntegerType *PtrDiffTy; /// LLVM type for C int*. Used for GCC-ABI-compatible non-fragile instance /// variables. llvm::PointerType *PtrToIntTy; /// LLVM type for Objective-C BOOL type. llvm::Type *BoolTy; /// 32-bit integer type, to save us needing to look it up every time it's used. llvm::IntegerType *Int32Ty; /// 64-bit integer type, to save us needing to look it up every time it's used. llvm::IntegerType *Int64Ty; /// The type of struct objc_property. llvm::StructType *PropertyMetadataTy; /// Metadata kind used to tie method lookups to message sends. The GNUstep /// runtime provides some LLVM passes that can use this to do things like /// automatic IMP caching and speculative inlining. unsigned msgSendMDKind; /// Helper to check if we are targeting a specific runtime version or later. bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; return (R.getKind() == kind) && (R.getVersion() >= VersionTuple(major, minor)); } std::string SymbolForProtocol(StringRef Name) { return (StringRef("._OBJC_PROTOCOL_") + Name).str(); } std::string SymbolForProtocolRef(StringRef Name) { return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str(); } /// Helper function that generates a constant string and returns a pointer to /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. llvm::Constant *MakeConstantString(StringRef Str, const char *Name = "") { ConstantAddress Array = CGM.GetAddrOfConstantCString(Str, Name); return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(), Array.getPointer(), Zeros); } /// Emits a linkonce_odr string, whose name is the prefix followed by the /// string value. This allows the linker to combine the strings between /// different modules. Used for EH typeinfo names, selector strings, and a /// few other things. llvm::Constant *ExportUniqueString(const std::string &Str, const std::string &prefix, bool Private=false) { std::string name = prefix + Str; auto *ConstStr = TheModule.getGlobalVariable(name); if (!ConstStr) { llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str); auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true, llvm::GlobalValue::LinkOnceODRLinkage, value, name); if (Private) GV->setVisibility(llvm::GlobalValue::HiddenVisibility); ConstStr = GV; } return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(), ConstStr, Zeros); } /// Returns a property name and encoding string. llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD, const Decl *Container) { assert(!isRuntime(ObjCRuntime::GNUstep, 2)); if (isRuntime(ObjCRuntime::GNUstep, 1, 6)) { std::string NameAndAttributes; std::string TypeStr = CGM.getContext().getObjCEncodingForPropertyDecl(PD, Container); NameAndAttributes += '\0'; NameAndAttributes += TypeStr.length() + 3; NameAndAttributes += TypeStr; NameAndAttributes += '\0'; NameAndAttributes += PD->getNameAsString(); return MakeConstantString(NameAndAttributes); } return MakeConstantString(PD->getNameAsString()); } /// Push the property attributes into two structure fields. void PushPropertyAttributes(ConstantStructBuilder &Fields, const ObjCPropertyDecl *property, bool isSynthesized=true, bool isDynamic=true) { int attrs = property->getPropertyAttributes(); // For read-only properties, clear the copy and retain flags if (attrs & ObjCPropertyDecl::OBJC_PR_readonly) { attrs &= ~ObjCPropertyDecl::OBJC_PR_copy; attrs &= ~ObjCPropertyDecl::OBJC_PR_retain; attrs &= ~ObjCPropertyDecl::OBJC_PR_weak; attrs &= ~ObjCPropertyDecl::OBJC_PR_strong; } // The first flags field has the same attribute values as clang uses internally Fields.addInt(Int8Ty, attrs & 0xff); attrs >>= 8; attrs <<= 2; // For protocol properties, synthesized and dynamic have no meaning, so we // reuse these flags to indicate that this is a protocol property (both set // has no meaning, as a property can't be both synthesized and dynamic) attrs |= isSynthesized ? (1<<0) : 0; attrs |= isDynamic ? (1<<1) : 0; // The second field is the next four fields left shifted by two, with the // low bit set to indicate whether the field is synthesized or dynamic. Fields.addInt(Int8Ty, attrs & 0xff); // Two padding fields Fields.addInt(Int8Ty, 0); Fields.addInt(Int8Ty, 0); } virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields, int count) { // int count; Fields.addInt(IntTy, count); // int size; (only in GNUstep v2 ABI. if (isRuntime(ObjCRuntime::GNUstep, 2)) { llvm::DataLayout td(&TheModule); Fields.addInt(IntTy, td.getTypeSizeInBits(PropertyMetadataTy) / CGM.getContext().getCharWidth()); } // struct objc_property_list *next; Fields.add(NULLPtr); // struct objc_property properties[] return Fields.beginArray(PropertyMetadataTy); } virtual void PushProperty(ConstantArrayBuilder &PropertiesArray, const ObjCPropertyDecl *property, const Decl *OCD, bool isSynthesized=true, bool isDynamic=true) { auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy); ASTContext &Context = CGM.getContext(); Fields.add(MakePropertyEncodingString(property, OCD)); PushPropertyAttributes(Fields, property, isSynthesized, isDynamic); auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { if (accessor) { std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor); llvm::Constant *TypeEncoding = MakeConstantString(TypeStr); Fields.add(MakeConstantString(accessor->getSelector().getAsString())); Fields.add(TypeEncoding); } else { Fields.add(NULLPtr); Fields.add(NULLPtr); } }; addPropertyMethod(property->getGetterMethodDecl()); addPropertyMethod(property->getSetterMethodDecl()); Fields.finishAndAddTo(PropertiesArray); } /// Ensures that the value has the required type, by inserting a bitcast if /// required. This function lets us avoid inserting bitcasts that are /// redundant. llvm::Value* EnforceType(CGBuilderTy &B, llvm::Value *V, llvm::Type *Ty) { if (V->getType() == Ty) return V; return B.CreateBitCast(V, Ty); } Address EnforceType(CGBuilderTy &B, Address V, llvm::Type *Ty) { if (V.getType() == Ty) return V; return B.CreateBitCast(V, Ty); } // Some zeros used for GEPs in lots of places. llvm::Constant *Zeros[2]; /// Null pointer value. Mainly used as a terminator in various arrays. llvm::Constant *NULLPtr; /// LLVM context. llvm::LLVMContext &VMContext; protected: /// Placeholder for the class. Lots of things refer to the class before we've /// actually emitted it. We use this alias as a placeholder, and then replace /// it with a pointer to the class structure before finally emitting the /// module. llvm::GlobalAlias *ClassPtrAlias; /// Placeholder for the metaclass. Lots of things refer to the class before /// we've / actually emitted it. We use this alias as a placeholder, and then /// replace / it with a pointer to the metaclass structure before finally /// emitting the / module. llvm::GlobalAlias *MetaClassPtrAlias; /// All of the classes that have been generated for this compilation units. std::vector Classes; /// All of the categories that have been generated for this compilation units. std::vector Categories; /// All of the Objective-C constant strings that have been generated for this /// compilation units. std::vector ConstantStrings; /// Map from string values to Objective-C constant strings in the output. /// Used to prevent emitting Objective-C strings more than once. This should /// not be required at all - CodeGenModule should manage this list. llvm::StringMap ObjCStrings; /// All of the protocols that have been declared. llvm::StringMap ExistingProtocols; /// For each variant of a selector, we store the type encoding and a /// placeholder value. For an untyped selector, the type will be the empty /// string. Selector references are all done via the module's selector table, /// so we create an alias as a placeholder and then replace it with the real /// value later. typedef std::pair TypedSelector; /// Type of the selector map. This is roughly equivalent to the structure /// used in the GNUstep runtime, which maintains a list of all of the valid /// types for a selector in a table. typedef llvm::DenseMap > SelectorMap; /// A map from selectors to selector types. This allows us to emit all /// selectors of the same name and type together. SelectorMap SelectorTable; /// Selectors related to memory management. When compiling in GC mode, we /// omit these. Selector RetainSel, ReleaseSel, AutoreleaseSel; /// Runtime functions used for memory management in GC mode. Note that clang /// supports code generation for calling these functions, but neither GNU /// runtime actually supports this API properly yet. LazyRuntimeFunction IvarAssignFn, StrongCastAssignFn, MemMoveFn, WeakReadFn, WeakAssignFn, GlobalAssignFn; typedef std::pair ClassAliasPair; /// All classes that have aliases set for them. std::vector ClassAliases; protected: /// Function used for throwing Objective-C exceptions. LazyRuntimeFunction ExceptionThrowFn; /// Function used for rethrowing exceptions, used at the end of \@finally or /// \@synchronize blocks. LazyRuntimeFunction ExceptionReThrowFn; /// Function called when entering a catch function. This is required for /// differentiating Objective-C exceptions and foreign exceptions. LazyRuntimeFunction EnterCatchFn; /// Function called when exiting from a catch block. Used to do exception /// cleanup. LazyRuntimeFunction ExitCatchFn; /// Function called when entering an \@synchronize block. Acquires the lock. LazyRuntimeFunction SyncEnterFn; /// Function called when exiting an \@synchronize block. Releases the lock. LazyRuntimeFunction SyncExitFn; private: /// Function called if fast enumeration detects that the collection is /// modified during the update. LazyRuntimeFunction EnumerationMutationFn; /// Function for implementing synthesized property getters that return an /// object. LazyRuntimeFunction GetPropertyFn; /// Function for implementing synthesized property setters that return an /// object. LazyRuntimeFunction SetPropertyFn; /// Function used for non-object declared property getters. LazyRuntimeFunction GetStructPropertyFn; /// Function used for non-object declared property setters. LazyRuntimeFunction SetStructPropertyFn; protected: /// The version of the runtime that this class targets. Must match the /// version in the runtime. int RuntimeVersion; /// The version of the protocol class. Used to differentiate between ObjC1 /// and ObjC2 protocols. Objective-C 1 protocols can not contain optional /// components and can not contain declared properties. We always emit /// Objective-C 2 property structures, but we have to pretend that they're /// Objective-C 1 property structures when targeting the GCC runtime or it /// will abort. const int ProtocolVersion; /// The version of the class ABI. This value is used in the class structure /// and indicates how various fields should be interpreted. const int ClassABIVersion; /// Generates an instance variable list structure. This is a structure /// containing a size and an array of structures containing instance variable /// metadata. This is used purely for introspection in the fragile ABI. In /// the non-fragile ABI, it's used for instance variable fixup. virtual llvm::Constant *GenerateIvarList(ArrayRef IvarNames, ArrayRef IvarTypes, ArrayRef IvarOffsets, ArrayRef IvarAlign, ArrayRef IvarOwnership); /// Generates a method list structure. This is a structure containing a size /// and an array of structures containing method metadata. /// /// This structure is used by both classes and categories, and contains a next /// pointer allowing them to be chained together in a linked list. llvm::Constant *GenerateMethodList(StringRef ClassName, StringRef CategoryName, ArrayRef Methods, bool isClassMethodList); /// Emits an empty protocol. This is used for \@protocol() where no protocol /// is found. The runtime will (hopefully) fix up the pointer to refer to the /// real protocol. virtual llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName); /// Generates a list of property metadata structures. This follows the same /// pattern as method and instance variable metadata lists. llvm::Constant *GeneratePropertyList(const Decl *Container, const ObjCContainerDecl *OCD, bool isClassProperty=false, bool protocolOptionalProperties=false); /// Generates a list of referenced protocols. Classes, categories, and /// protocols all use this structure. llvm::Constant *GenerateProtocolList(ArrayRef Protocols); /// To ensure that all protocols are seen by the runtime, we add a category on /// a class defined in the runtime, declaring no methods, but adopting the /// protocols. This is a horribly ugly hack, but it allows us to collect all /// of the protocols without changing the ABI. void GenerateProtocolHolderCategory(); /// Generates a class structure. llvm::Constant *GenerateClassStructure( llvm::Constant *MetaClass, llvm::Constant *SuperClass, unsigned info, const char *Name, llvm::Constant *Version, llvm::Constant *InstanceSize, llvm::Constant *IVars, llvm::Constant *Methods, llvm::Constant *Protocols, llvm::Constant *IvarOffsets, llvm::Constant *Properties, llvm::Constant *StrongIvarBitmap, llvm::Constant *WeakIvarBitmap, bool isMeta=false); /// Generates a method list. This is used by protocols to define the required /// and optional methods. virtual llvm::Constant *GenerateProtocolMethodList( ArrayRef Methods); /// Emits optional and required method lists. template void EmitProtocolMethodList(T &&Methods, llvm::Constant *&Required, llvm::Constant *&Optional) { SmallVector RequiredMethods; SmallVector OptionalMethods; for (const auto *I : Methods) if (I->isOptional()) OptionalMethods.push_back(I); else RequiredMethods.push_back(I); Required = GenerateProtocolMethodList(RequiredMethods); Optional = GenerateProtocolMethodList(OptionalMethods); } /// Returns a selector with the specified type encoding. An empty string is /// used to return an untyped selector (with the types field set to NULL). virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding); /// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this /// contains the class and ivar names, in the v2 ABI this contains the type /// encoding as well. virtual std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString() + '.' + Ivar->getNameAsString(); return Name; } /// Returns the variable used to store the offset of an instance variable. llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar); /// Emits a reference to a class. This allows the linker to object if there /// is no class of the matching name. void EmitClassRef(const std::string &className); /// Emits a pointer to the named class virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, bool isWeak); /// Looks up the method for sending a message to the specified object. This /// mechanism differs between the GCC and GNU runtimes, so this method must be /// overridden in subclasses. virtual llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) = 0; /// Looks up the method for sending a message to a superclass. This /// mechanism differs between the GCC and GNU runtimes, so this method must /// be overridden in subclasses. virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) = 0; /// Libobjc2 uses a bitfield representation where small(ish) bitfields are /// stored in a 64-bit value with the low bit set to 1 and the remaining 63 /// bits set to their values, LSB first, while larger ones are stored in a /// structure of this / form: /// /// struct { int32_t length; int32_t values[length]; }; /// /// The values in the array are stored in host-endian format, with the least /// significant bit being assumed to come first in the bitfield. Therefore, /// a bitfield with the 64th bit set will be (int64_t)&{ 2, [0, 1<<31] }, /// while a bitfield / with the 63rd bit set will be 1<<64. llvm::Constant *MakeBitField(ArrayRef bits); public: CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, unsigned protocolClassVersion, unsigned classABI=1); ConstantAddress GenerateConstantString(const StringLiteral *) override; RValue GenerateMessageSend(CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, Selector Sel, llvm::Value *Receiver, const CallArgList &CallArgs, const ObjCInterfaceDecl *Class, const ObjCMethodDecl *Method) override; RValue GenerateMessageSendSuper(CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, Selector Sel, const ObjCInterfaceDecl *Class, bool isCategoryImpl, llvm::Value *Receiver, bool IsClassMessage, const CallArgList &CallArgs, const ObjCMethodDecl *Method) override; llvm::Value *GetClass(CodeGenFunction &CGF, const ObjCInterfaceDecl *OID) override; llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel) override; Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override; llvm::Value *GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl *Method) override; virtual llvm::Constant *GetConstantSelector(Selector Sel, const std::string &TypeEncoding) { llvm_unreachable("Runtime unable to generate constant selector"); } llvm::Constant *GetConstantSelector(const ObjCMethodDecl *M) { return GetConstantSelector(M->getSelector(), CGM.getContext().getObjCEncodingForMethodDecl(M)); } llvm::Constant *GetEHType(QualType T) override; llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) override; void GenerateCategory(const ObjCCategoryImplDecl *CMD) override; void GenerateClass(const ObjCImplementationDecl *ClassDecl) override; void RegisterAlias(const ObjCCompatibleAliasDecl *OAD) override; llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) override; void GenerateProtocol(const ObjCProtocolDecl *PD) override; llvm::Function *ModuleInitFunction() override; llvm::Constant *GetPropertyGetFunction() override; llvm::Constant *GetPropertySetFunction() override; llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, bool copy) override; llvm::Constant *GetSetStructFunction() override; llvm::Constant *GetGetStructFunction() override; llvm::Constant *GetCppAtomicObjectGetFunction() override; llvm::Constant *GetCppAtomicObjectSetFunction() override; llvm::Constant *EnumerationMutationFunction() override; void EmitTryStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S) override; void EmitSynchronizedStmt(CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S) override; void EmitThrowStmt(CodeGenFunction &CGF, const ObjCAtThrowStmt &S, bool ClearInsertionPoint=true) override; llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF, Address AddrWeakObj) override; void EmitObjCWeakAssign(CodeGenFunction &CGF, llvm::Value *src, Address dst) override; void EmitObjCGlobalAssign(CodeGenFunction &CGF, llvm::Value *src, Address dest, bool threadlocal=false) override; void EmitObjCIvarAssign(CodeGenFunction &CGF, llvm::Value *src, Address dest, llvm::Value *ivarOffset) override; void EmitObjCStrongCastAssign(CodeGenFunction &CGF, llvm::Value *src, Address dest) override; void EmitGCMemmoveCollectable(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size) override; LValue EmitObjCValueForIvar(CodeGenFunction &CGF, QualType ObjectTy, llvm::Value *BaseValue, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) override; llvm::Value *EmitIvarOffset(CodeGenFunction &CGF, const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar) override; llvm::Value *EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) override; llvm::Constant *BuildGCBlockLayout(CodeGenModule &CGM, const CGBlockInfo &blockInfo) override { return NULLPtr; } llvm::Constant *BuildRCBlockLayout(CodeGenModule &CGM, const CGBlockInfo &blockInfo) override { return NULLPtr; } llvm::Constant *BuildByrefLayout(CodeGenModule &CGM, QualType T) override { return NULLPtr; } }; /// Class representing the legacy GCC Objective-C ABI. This is the default when /// -fobjc-nonfragile-abi is not specified. /// /// The GCC ABI target actually generates code that is approximately compatible /// with the new GNUstep runtime ABI, but refrains from using any features that /// would not work with the GCC runtime. For example, clang always generates /// the extended form of the class structure, and the extra fields are simply /// ignored by GCC libobjc. class CGObjCGCC : public CGObjCGNU { /// The GCC ABI message lookup function. Returns an IMP pointing to the /// method implementation for this message. LazyRuntimeFunction MsgLookupFn; /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as /// arguments. Returns the IMP for the corresponding method. LazyRuntimeFunction MsgLookupSuperFn; protected: llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; llvm::Value *args[] = { EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; llvm::CallSite imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); return imp.getInstruction(); } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy).getPointer(), cmd}; return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); } public: CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) { // IMP objc_msg_lookup(id, SEL); MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy); // IMP objc_msg_lookup_super(struct objc_super*, SEL); MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, PtrToObjCSuperTy, SelectorTy); } }; /// Class used when targeting the new GNUstep runtime ABI. class CGObjCGNUstep : public CGObjCGNU { /// The slot lookup function. Returns a pointer to a cacheable structure /// that contains (among other things) the IMP. LazyRuntimeFunction SlotLookupFn; /// The GNUstep ABI superclass message lookup function. Takes a pointer to /// a structure describing the receiver and the class, and a selector as /// arguments. Returns the slot for the corresponding method. Superclass /// message lookup rarely changes, so this is a good caching opportunity. LazyRuntimeFunction SlotLookupSuperFn; /// Specialised function for setting atomic retain properties LazyRuntimeFunction SetPropertyAtomic; /// Specialised function for setting atomic copy properties LazyRuntimeFunction SetPropertyAtomicCopy; /// Specialised function for setting nonatomic retain properties LazyRuntimeFunction SetPropertyNonAtomic; /// Specialised function for setting nonatomic copy properties LazyRuntimeFunction SetPropertyNonAtomicCopy; /// Function to perform atomic copies of C++ objects with nontrivial copy /// constructors from Objective-C ivars. LazyRuntimeFunction CxxAtomicObjectGetFn; /// Function to perform atomic copies of C++ objects with nontrivial copy /// constructors to Objective-C ivars. LazyRuntimeFunction CxxAtomicObjectSetFn; /// Type of an slot structure pointer. This is returned by the various /// lookup functions. llvm::Type *SlotTy; public: llvm::Constant *GetEHType(QualType T) override; protected: llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; llvm::Function *LookupFn = SlotLookupFn; // Store the receiver on the stack so that we can reload it later Address ReceiverPtr = CGF.CreateTempAlloca(Receiver->getType(), CGF.getPointerAlign()); Builder.CreateStore(Receiver, ReceiverPtr); llvm::Value *self; if (isa(CGF.CurCodeDecl)) { self = CGF.LoadObjCSelf(); } else { self = llvm::ConstantPointerNull::get(IdTy); } // The lookup function is guaranteed not to capture the receiver pointer. LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), EnforceType(Builder, cmd, SelectorTy), EnforceType(Builder, self, IdTy) }; llvm::CallSite slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); slot.setOnlyReadsMemory(); slot->setMetadata(msgSendMDKind, node); // Load the imp from the slot llvm::Value *imp = Builder.CreateAlignedLoad( Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4), CGF.getPointerAlign()); // The lookup function may have changed the receiver, so make sure we use // the new one. Receiver = Builder.CreateLoad(ReceiverPtr, true); return imp; } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; llvm::Value *lookupArgs[] = {ObjCSuper.getPointer(), cmd}; llvm::CallInst *slot = CGF.EmitNounwindRuntimeCall(SlotLookupSuperFn, lookupArgs); slot->setOnlyReadsMemory(); return Builder.CreateAlignedLoad(Builder.CreateStructGEP(nullptr, slot, 4), CGF.getPointerAlign()); } public: CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNUstep(Mod, 9, 3, 1) {} CGObjCGNUstep(CodeGenModule &Mod, unsigned ABI, unsigned ProtocolABI, unsigned ClassABI) : CGObjCGNU(Mod, ABI, ProtocolABI, ClassABI) { const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime; llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy, PtrTy, PtrTy, IntTy, IMPTy); SlotTy = llvm::PointerType::getUnqual(SlotStructTy); // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender); SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy, SelectorTy, IdTy); // Slot_t objc_slot_lookup_super(struct objc_super*, SEL); SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, PtrToObjCSuperTy, SelectorTy); // If we're in ObjC++ mode, then we want to make if (CGM.getLangOpts().CPlusPlus) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void *__cxa_begin_catch(void *e) EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy); // void __cxa_end_catch(void) ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy); // void _Unwind_Resume_or_Rethrow(void*) ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy, PtrTy); } else if (R.getVersion() >= VersionTuple(1, 7)) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // id objc_begin_catch(void *e) EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy); // void objc_end_catch(void) ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy); // void _Unwind_Resume_or_Rethrow(void*) ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy, PtrTy); } llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy", VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy); // void objc_setCppObjectAtomic(void *dest, const void *src, void // *helper); CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy, PtrTy, PtrTy); // void objc_getCppObjectAtomic(void *dest, const void *src, void // *helper); CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy, PtrTy, PtrTy); } llvm::Constant *GetCppAtomicObjectGetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= VersionTuple(1, 7)); return CxxAtomicObjectGetFn; } llvm::Constant *GetCppAtomicObjectSetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= VersionTuple(1, 7)); return CxxAtomicObjectSetFn; } llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, bool copy) override { // The optimised property functions omit the GC check, and so are not // safe to use in GC mode. The standard functions are fast in GC mode, // so there is less advantage in using them. assert ((CGM.getLangOpts().getGC() == LangOptions::NonGC)); // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= VersionTuple(1, 7)); if (atomic) { if (copy) return SetPropertyAtomicCopy; return SetPropertyAtomic; } return copy ? SetPropertyNonAtomicCopy : SetPropertyNonAtomic; } }; /// GNUstep Objective-C ABI version 2 implementation. /// This is the ABI that provides a clean break with the legacy GCC ABI and /// cleans up a number of things that were added to work around 1980s linkers. class CGObjCGNUstep2 : public CGObjCGNUstep { /// The section for selectors. static constexpr const char *const SelSection = "__objc_selectors"; /// The section for classes. static constexpr const char *const ClsSection = "__objc_classes"; /// The section for references to classes. static constexpr const char *const ClsRefSection = "__objc_class_refs"; /// The section for categories. static constexpr const char *const CatSection = "__objc_cats"; /// The section for protocols. static constexpr const char *const ProtocolSection = "__objc_protocols"; /// The section for protocol references. static constexpr const char *const ProtocolRefSection = "__objc_protocol_refs"; /// The section for class aliases static constexpr const char *const ClassAliasSection = "__objc_class_aliases"; /// The section for constexpr constant strings static constexpr const char *const ConstantStringSection = "__objc_constant_string"; /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as /// arguments. Returns the IMP for the corresponding method. LazyRuntimeFunction MsgLookupSuperFn; /// A flag indicating if we've emitted at least one protocol. /// If we haven't, then we need to emit an empty protocol, to ensure that the /// __start__objc_protocols and __stop__objc_protocols sections exist. bool EmittedProtocol = false; /// A flag indicating if we've emitted at least one protocol reference. /// If we haven't, then we need to emit an empty protocol, to ensure that the /// __start__objc_protocol_refs and __stop__objc_protocol_refs sections /// exist. bool EmittedProtocolRef = false; /// A flag indicating if we've emitted at least one class. /// If we haven't, then we need to emit an empty protocol, to ensure that the /// __start__objc_classes and __stop__objc_classes sections / exist. bool EmittedClass = false; /// Generate the name of a symbol for a reference to a class. Accesses to /// classes should be indirected via this. std::string SymbolForClassRef(StringRef Name, bool isWeak) { if (isWeak) return (StringRef("._OBJC_WEAK_REF_CLASS_") + Name).str(); else return (StringRef("._OBJC_REF_CLASS_") + Name).str(); } /// Generate the name of a class symbol. std::string SymbolForClass(StringRef Name) { return (StringRef("._OBJC_CLASS_") + Name).str(); } void CallRuntimeFunction(CGBuilderTy &B, StringRef FunctionName, ArrayRef Args) { SmallVector Types; for (auto *Arg : Args) Types.push_back(Arg->getType()); llvm::FunctionType *FT = llvm::FunctionType::get(B.getVoidTy(), Types, false); llvm::Value *Fn = CGM.CreateRuntimeFunction(FT, FunctionName); B.CreateCall(Fn, Args); } ConstantAddress GenerateConstantString(const StringLiteral *SL) override { auto Str = SL->getString(); CharUnits Align = CGM.getPointerAlign(); // Look for an existing one llvm::StringMap::iterator old = ObjCStrings.find(Str); if (old != ObjCStrings.end()) return ConstantAddress(old->getValue(), Align); bool isNonASCII = SL->containsNonAscii(); auto LiteralLength = SL->getLength(); if ((CGM.getTarget().getPointerWidth(0) == 64) && (LiteralLength < 9) && !isNonASCII) { // Tiny strings are only used on 64-bit platforms. They store 8 7-bit // ASCII characters in the high 56 bits, followed by a 4-bit length and a // 3-bit tag (which is always 4). uint64_t str = 0; // Fill in the characters for (unsigned i=0 ; igetCodeUnit(i)) << ((64 - 4 - 3) - (i*7)); // Fill in the length str |= LiteralLength << 3; // Set the tag str |= 4; auto *ObjCStr = llvm::ConstantExpr::getIntToPtr( llvm::ConstantInt::get(Int64Ty, str), IdTy); ObjCStrings[Str] = ObjCStr; return ConstantAddress(ObjCStr, Align); } StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass; if (StringClass.empty()) StringClass = "NSConstantString"; std::string Sym = SymbolForClass(StringClass); llvm::Constant *isa = TheModule.getNamedGlobal(Sym); if (!isa) isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, llvm::GlobalValue::ExternalLinkage, nullptr, Sym); else if (isa->getType() != PtrToIdTy) isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); // struct // { // Class isa; // uint32_t flags; // uint32_t length; // Number of codepoints // uint32_t size; // Number of bytes // uint32_t hash; // const char *data; // }; ConstantInitBuilder Builder(CGM); auto Fields = Builder.beginStruct(); Fields.add(isa); // For now, all non-ASCII strings are represented as UTF-16. As such, the // number of bytes is simply double the number of UTF-16 codepoints. In // ASCII strings, the number of bytes is equal to the number of non-ASCII // codepoints. if (isNonASCII) { unsigned NumU8CodeUnits = Str.size(); // A UTF-16 representation of a unicode string contains at most the same // number of code units as a UTF-8 representation. Allocate that much // space, plus one for the final null character. SmallVector ToBuf(NumU8CodeUnits + 1); const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)Str.data(); llvm::UTF16 *ToPtr = &ToBuf[0]; (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumU8CodeUnits, &ToPtr, ToPtr + NumU8CodeUnits, llvm::strictConversion); uint32_t StringLength = ToPtr - &ToBuf[0]; // Add null terminator *ToPtr = 0; // Flags: 2 indicates UTF-16 encoding Fields.addInt(Int32Ty, 2); // Number of UTF-16 codepoints Fields.addInt(Int32Ty, StringLength); // Number of bytes Fields.addInt(Int32Ty, StringLength * 2); // Hash. Not currently initialised by the compiler. Fields.addInt(Int32Ty, 0); // pointer to the data string. auto Arr = llvm::makeArrayRef(&ToBuf[0], ToPtr+1); auto *C = llvm::ConstantDataArray::get(VMContext, Arr); auto *Buffer = new llvm::GlobalVariable(TheModule, C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); Buffer->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Fields.add(Buffer); } else { // Flags: 0 indicates ASCII encoding Fields.addInt(Int32Ty, 0); // Number of UTF-16 codepoints, each ASCII byte is a UTF-16 codepoint Fields.addInt(Int32Ty, Str.size()); // Number of bytes Fields.addInt(Int32Ty, Str.size()); // Hash. Not currently initialised by the compiler. Fields.addInt(Int32Ty, 0); // Data pointer Fields.add(MakeConstantString(Str)); } std::string StringName; bool isNamed = !isNonASCII; if (isNamed) { StringName = ".objc_str_"; for (int i=0,e=Str.size() ; isetSection(ConstantStringSection); if (isNamed) { ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName)); ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility); } llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy); ObjCStrings[Str] = ObjCStr; ConstantStrings.push_back(ObjCStr); return ConstantAddress(ObjCStr, Align); } void PushProperty(ConstantArrayBuilder &PropertiesArray, const ObjCPropertyDecl *property, const Decl *OCD, bool isSynthesized=true, bool isDynamic=true) override { // struct objc_property // { // const char *name; // const char *attributes; // const char *type; // SEL getter; // SEL setter; // }; auto Fields = PropertiesArray.beginStruct(PropertyMetadataTy); ASTContext &Context = CGM.getContext(); Fields.add(MakeConstantString(property->getNameAsString())); std::string TypeStr = CGM.getContext().getObjCEncodingForPropertyDecl(property, OCD); Fields.add(MakeConstantString(TypeStr)); std::string typeStr; Context.getObjCEncodingForType(property->getType(), typeStr); Fields.add(MakeConstantString(typeStr)); auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { if (accessor) { std::string TypeStr = Context.getObjCEncodingForMethodDecl(accessor); Fields.add(GetConstantSelector(accessor->getSelector(), TypeStr)); } else { Fields.add(NULLPtr); } }; addPropertyMethod(property->getGetterMethodDecl()); addPropertyMethod(property->getSetterMethodDecl()); Fields.finishAndAddTo(PropertiesArray); } llvm::Constant * GenerateProtocolMethodList(ArrayRef Methods) override { // struct objc_protocol_method_description // { // SEL selector; // const char *types; // }; llvm::StructType *ObjCMethodDescTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty }); ASTContext &Context = CGM.getContext(); ConstantInitBuilder Builder(CGM); // struct objc_protocol_method_description_list // { // int count; // int size; // struct objc_protocol_method_description methods[]; // }; auto MethodList = Builder.beginStruct(); // int count; MethodList.addInt(IntTy, Methods.size()); // int size; // sizeof(struct objc_method_description) llvm::DataLayout td(&TheModule); MethodList.addInt(IntTy, td.getTypeSizeInBits(ObjCMethodDescTy) / CGM.getContext().getCharWidth()); // struct objc_method_description[] auto MethodArray = MethodList.beginArray(ObjCMethodDescTy); for (auto *M : Methods) { auto Method = MethodArray.beginStruct(ObjCMethodDescTy); Method.add(CGObjCGNU::GetConstantSelector(M)); Method.add(GetTypeString(Context.getObjCEncodingForMethodDecl(M, true))); Method.finishAndAddTo(MethodArray); } MethodArray.finishAndAddTo(MethodList); return MethodList.finishAndCreateGlobal(".objc_protocol_method_list", CGM.getPointerAlign()); } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { // Don't access the slot unless we're trying to cache the result. CGBuilderTy &Builder = CGF.Builder; llvm::Value *lookupArgs[] = {CGObjCGNU::EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy).getPointer(), cmd}; return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); } llvm::GlobalVariable *GetClassVar(StringRef Name, bool isWeak=false) { std::string SymbolName = SymbolForClassRef(Name, isWeak); auto *ClassSymbol = TheModule.getNamedGlobal(SymbolName); if (ClassSymbol) return ClassSymbol; ClassSymbol = new llvm::GlobalVariable(TheModule, IdTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SymbolName); // If this is a weak symbol, then we are creating a valid definition for // the symbol, pointing to a weak definition of the real class pointer. If // this is not a weak reference, then we are expecting another compilation // unit to provide the real indirection symbol. if (isWeak) ClassSymbol->setInitializer(new llvm::GlobalVariable(TheModule, Int8Ty, false, llvm::GlobalValue::ExternalWeakLinkage, nullptr, SymbolForClass(Name))); assert(ClassSymbol->getName() == SymbolName); return ClassSymbol; } llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, bool isWeak) override { return CGF.Builder.CreateLoad(Address(GetClassVar(Name, isWeak), CGM.getPointerAlign())); } int32_t FlagsForOwnership(Qualifiers::ObjCLifetime Ownership) { // typedef enum { // ownership_invalid = 0, // ownership_strong = 1, // ownership_weak = 2, // ownership_unsafe = 3 // } ivar_ownership; int Flag; switch (Ownership) { case Qualifiers::OCL_Strong: Flag = 1; break; case Qualifiers::OCL_Weak: Flag = 2; break; case Qualifiers::OCL_ExplicitNone: Flag = 3; break; case Qualifiers::OCL_None: case Qualifiers::OCL_Autoreleasing: assert(Ownership != Qualifiers::OCL_Autoreleasing); Flag = 0; } return Flag; } llvm::Constant *GenerateIvarList(ArrayRef IvarNames, ArrayRef IvarTypes, ArrayRef IvarOffsets, ArrayRef IvarAlign, ArrayRef IvarOwnership) override { llvm_unreachable("Method should not be called!"); } llvm::Constant *GenerateEmptyProtocol(StringRef ProtocolName) override { std::string Name = SymbolForProtocol(ProtocolName); auto *GV = TheModule.getGlobalVariable(Name); if (!GV) { // Emit a placeholder symbol. GV = new llvm::GlobalVariable(TheModule, ProtocolTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, Name); GV->setAlignment(CGM.getPointerAlign().getQuantity()); } return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy); } /// Existing protocol references. llvm::StringMap ExistingProtocolRefs; llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) override { auto Name = PD->getNameAsString(); auto *&Ref = ExistingProtocolRefs[Name]; if (!Ref) { auto *&Protocol = ExistingProtocols[Name]; if (!Protocol) Protocol = GenerateProtocolRef(PD); std::string RefName = SymbolForProtocolRef(Name); assert(!TheModule.getGlobalVariable(RefName)); // Emit a reference symbol. auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy, false, llvm::GlobalValue::ExternalLinkage, llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName); GV->setSection(ProtocolRefSection); GV->setAlignment(CGM.getPointerAlign().getQuantity()); Ref = GV; } EmittedProtocolRef = true; return CGF.Builder.CreateAlignedLoad(Ref, CGM.getPointerAlign()); } llvm::Constant *GenerateProtocolList(ArrayRef Protocols) { llvm::ArrayType *ProtocolArrayTy = llvm::ArrayType::get(ProtocolPtrTy, Protocols.size()); llvm::Constant * ProtocolArray = llvm::ConstantArray::get(ProtocolArrayTy, Protocols); ConstantInitBuilder builder(CGM); auto ProtocolBuilder = builder.beginStruct(); ProtocolBuilder.addNullPointer(PtrTy); ProtocolBuilder.addInt(SizeTy, Protocols.size()); ProtocolBuilder.add(ProtocolArray); return ProtocolBuilder.finishAndCreateGlobal(".objc_protocol_list", CGM.getPointerAlign(), false, llvm::GlobalValue::InternalLinkage); } void GenerateProtocol(const ObjCProtocolDecl *PD) override { // Do nothing - we only emit referenced protocols. } llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) { std::string ProtocolName = PD->getNameAsString(); auto *&Protocol = ExistingProtocols[ProtocolName]; if (Protocol) return Protocol; EmittedProtocol = true; // Use the protocol definition, if there is one. if (const ObjCProtocolDecl *Def = PD->getDefinition()) PD = Def; SmallVector Protocols; for (const auto *PI : PD->protocols()) Protocols.push_back( llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), ProtocolPtrTy)); llvm::Constant *ProtocolList = GenerateProtocolList(Protocols); // Collect information about methods llvm::Constant *InstanceMethodList, *OptionalInstanceMethodList; llvm::Constant *ClassMethodList, *OptionalClassMethodList; EmitProtocolMethodList(PD->instance_methods(), InstanceMethodList, OptionalInstanceMethodList); EmitProtocolMethodList(PD->class_methods(), ClassMethodList, OptionalClassMethodList); auto SymName = SymbolForProtocol(ProtocolName); auto *OldGV = TheModule.getGlobalVariable(SymName); // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. ConstantInitBuilder builder(CGM); auto ProtocolBuilder = builder.beginStruct(); ProtocolBuilder.add(llvm::ConstantExpr::getIntToPtr( llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); ProtocolBuilder.add(MakeConstantString(ProtocolName)); ProtocolBuilder.add(ProtocolList); ProtocolBuilder.add(InstanceMethodList); ProtocolBuilder.add(ClassMethodList); ProtocolBuilder.add(OptionalInstanceMethodList); ProtocolBuilder.add(OptionalClassMethodList); // Required instance properties ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, false, false)); // Optional instance properties ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, false, true)); // Required class properties ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, true, false)); // Optional class properties ProtocolBuilder.add(GeneratePropertyList(nullptr, PD, true, true)); auto *GV = ProtocolBuilder.finishAndCreateGlobal(SymName, CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); GV->setSection(ProtocolSection); GV->setComdat(TheModule.getOrInsertComdat(SymName)); if (OldGV) { OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV, OldGV->getType())); OldGV->removeFromParent(); GV->setName(SymName); } Protocol = GV; return GV; } llvm::Constant *EnforceType(llvm::Constant *Val, llvm::Type *Ty) { if (Val->getType() == Ty) return Val; return llvm::ConstantExpr::getBitCast(Val, Ty); } llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding) override { return GetConstantSelector(Sel, TypeEncoding); } llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) { if (TypeEncoding.empty()) return NULLPtr; std::string MangledTypes = TypeEncoding; std::replace(MangledTypes.begin(), MangledTypes.end(), '@', '\1'); std::string TypesVarName = ".objc_sel_types_" + MangledTypes; auto *TypesGlobal = TheModule.getGlobalVariable(TypesVarName); if (!TypesGlobal) { llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext, TypeEncoding); auto *GV = new llvm::GlobalVariable(TheModule, Init->getType(), true, llvm::GlobalValue::LinkOnceODRLinkage, Init, TypesVarName); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); TypesGlobal = GV; } return llvm::ConstantExpr::getGetElementPtr(TypesGlobal->getValueType(), TypesGlobal, Zeros); } llvm::Constant *GetConstantSelector(Selector Sel, const std::string &TypeEncoding) override { // @ is used as a special character in symbol names (used for symbol // versioning), so mangle the name to not include it. Replace it with a // character that is not a valid type encoding character (and, being // non-printable, never will be!) std::string MangledTypes = TypeEncoding; std::replace(MangledTypes.begin(), MangledTypes.end(), '@', '\1'); auto SelVarName = (StringRef(".objc_selector_") + Sel.getAsString() + "_" + MangledTypes).str(); if (auto *GV = TheModule.getNamedGlobal(SelVarName)) return EnforceType(GV, SelectorTy); ConstantInitBuilder builder(CGM); auto SelBuilder = builder.beginStruct(); SelBuilder.add(ExportUniqueString(Sel.getAsString(), ".objc_sel_name_", true)); SelBuilder.add(GetTypeString(TypeEncoding)); auto *GV = SelBuilder.finishAndCreateGlobal(SelVarName, CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); GV->setComdat(TheModule.getOrInsertComdat(SelVarName)); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); GV->setSection(SelSection); auto *SelVal = EnforceType(GV, SelectorTy); return SelVal; } std::pair GetSectionBounds(StringRef Section) { auto *Start = new llvm::GlobalVariable(TheModule, PtrTy, /*isConstant*/false, llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__start_") + Section); Start->setVisibility(llvm::GlobalValue::HiddenVisibility); auto *Stop = new llvm::GlobalVariable(TheModule, PtrTy, /*isConstant*/false, llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__stop_") + Section); Stop->setVisibility(llvm::GlobalValue::HiddenVisibility); return { Start, Stop }; } llvm::Function *ModuleInitFunction() override { llvm::Function *LoadFunction = llvm::Function::Create( llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false), llvm::GlobalValue::LinkOnceODRLinkage, ".objcv2_load_function", &TheModule); LoadFunction->setVisibility(llvm::GlobalValue::HiddenVisibility); LoadFunction->setComdat(TheModule.getOrInsertComdat(".objcv2_load_function")); llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(VMContext, "entry", LoadFunction); CGBuilderTy B(CGM, VMContext); B.SetInsertPoint(EntryBB); ConstantInitBuilder builder(CGM); auto InitStructBuilder = builder.beginStruct(); InitStructBuilder.addInt(Int64Ty, 0); auto addSection = [&](const char *section) { auto bounds = GetSectionBounds(section); InitStructBuilder.add(bounds.first); InitStructBuilder.add(bounds.second); }; addSection(SelSection); addSection(ClsSection); addSection(ClsRefSection); addSection(CatSection); addSection(ProtocolSection); addSection(ProtocolRefSection); addSection(ClassAliasSection); addSection(ConstantStringSection); auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init", CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility); InitStruct->setComdat(TheModule.getOrInsertComdat(".objc_init")); CallRuntimeFunction(B, "__objc_load", {InitStruct});; B.CreateRetVoid(); // Make sure that the optimisers don't delete this function. CGM.addCompilerUsedGlobal(LoadFunction); // FIXME: Currently ELF only! // We have to do this by hand, rather than with @llvm.ctors, so that the // linker can remove the duplicate invocations. auto *InitVar = new llvm::GlobalVariable(TheModule, LoadFunction->getType(), /*isConstant*/true, llvm::GlobalValue::LinkOnceAnyLinkage, LoadFunction, ".objc_ctor"); // Check that this hasn't been renamed. This shouldn't happen, because // this function should be called precisely once. assert(InitVar->getName() == ".objc_ctor"); InitVar->setSection(".ctors"); InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility); InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor")); CGM.addCompilerUsedGlobal(InitVar); for (auto *C : Categories) { auto *Cat = cast(C->stripPointerCasts()); Cat->setSection(CatSection); CGM.addUsedGlobal(Cat); } // Add a null value fore each special section so that we can always // guarantee that the _start and _stop symbols will exist and be // meaningful. auto createNullGlobal = [&](StringRef Name, ArrayRef Init, StringRef Section) { auto nullBuilder = builder.beginStruct(); for (auto *F : Init) nullBuilder.add(F); auto GV = nullBuilder.finishAndCreateGlobal(Name, CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); GV->setSection(Section); GV->setComdat(TheModule.getOrInsertComdat(Name)); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.addUsedGlobal(GV); return GV; }; createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, SelSection); if (Categories.empty()) createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, CatSection); if (!EmittedClass) { createNullGlobal(".objc_null_cls_init_ref", NULLPtr, ClsSection); createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr }, ClsRefSection); } if (!EmittedProtocol) createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, ProtocolSection); if (!EmittedProtocolRef) createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, ProtocolRefSection); if (!ClassAliases.empty()) for (auto clsAlias : ClassAliases) createNullGlobal(std::string(".objc_class_alias") + clsAlias.second, { MakeConstantString(clsAlias.second), GetClassVar(clsAlias.first) }, ClassAliasSection); else createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr }, ClassAliasSection); if (ConstantStrings.empty()) { auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0); createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero, i32Zero, i32Zero, i32Zero, NULLPtr }, ConstantStringSection); } ConstantStrings.clear(); Categories.clear(); Classes.clear(); return nullptr;//CGObjCGNU::ModuleInitFunction(); } /// In the v2 ABI, ivar offset variables use the type encoding in their name /// to trigger linker failures if the types don't match. std::string GetIVarOffsetVariableName(const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) override { std::string TypeEncoding; CGM.getContext().getObjCEncodingForType(Ivar->getType(), TypeEncoding); // Prevent the @ from being interpreted as a symbol version. std::replace(TypeEncoding.begin(), TypeEncoding.end(), '@', '\1'); const std::string Name = "__objc_ivar_offset_" + ID->getNameAsString() + '.' + Ivar->getNameAsString() + '.' + TypeEncoding; return Name; } llvm::Value *EmitIvarOffset(CodeGenFunction &CGF, const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar) override { const std::string Name = GetIVarOffsetVariableName(Ivar->getContainingInterface(), Ivar); llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name); if (!IvarOffsetPointer) IvarOffsetPointer = new llvm::GlobalVariable(TheModule, IntTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, Name); CharUnits Align = CGM.getIntAlign(); llvm::Value *Offset = CGF.Builder.CreateAlignedLoad(IvarOffsetPointer, Align); if (Offset->getType() != PtrDiffTy) Offset = CGF.Builder.CreateZExtOrBitCast(Offset, PtrDiffTy); return Offset; } void GenerateClass(const ObjCImplementationDecl *OID) override { ASTContext &Context = CGM.getContext(); // Get the class name ObjCInterfaceDecl *classDecl = const_cast(OID->getClassInterface()); std::string className = classDecl->getNameAsString(); auto *classNameConstant = MakeConstantString(className); ConstantInitBuilder builder(CGM); auto metaclassFields = builder.beginStruct(); // struct objc_class *isa; metaclassFields.addNullPointer(PtrTy); // struct objc_class *super_class; metaclassFields.addNullPointer(PtrTy); // const char *name; metaclassFields.add(classNameConstant); // long version; metaclassFields.addInt(LongTy, 0); // unsigned long info; // objc_class_flag_meta metaclassFields.addInt(LongTy, 1); // long instance_size; // Setting this to zero is consistent with the older ABI, but it might be // more sensible to set this to sizeof(struct objc_class) metaclassFields.addInt(LongTy, 0); // struct objc_ivar_list *ivars; metaclassFields.addNullPointer(PtrTy); // struct objc_method_list *methods // FIXME: Almost identical code is copied and pasted below for the // class, but refactoring it cleanly requires C++14 generic lambdas. if (OID->classmeth_begin() == OID->classmeth_end()) metaclassFields.addNullPointer(PtrTy); else { SmallVector ClassMethods; ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(), OID->classmeth_end()); metaclassFields.addBitCast( GenerateMethodList(className, "", ClassMethods, true), PtrTy); } // void *dtable; metaclassFields.addNullPointer(PtrTy); // IMP cxx_construct; metaclassFields.addNullPointer(PtrTy); // IMP cxx_destruct; metaclassFields.addNullPointer(PtrTy); // struct objc_class *subclass_list metaclassFields.addNullPointer(PtrTy); // struct objc_class *sibling_class metaclassFields.addNullPointer(PtrTy); // struct objc_protocol_list *protocols; metaclassFields.addNullPointer(PtrTy); // struct reference_list *extra_data; metaclassFields.addNullPointer(PtrTy); // long abi_version; metaclassFields.addInt(LongTy, 0); // struct objc_property_list *properties metaclassFields.add(GeneratePropertyList(OID, classDecl, /*isClassProperty*/true)); auto *metaclass = metaclassFields.finishAndCreateGlobal("._OBJC_METACLASS_" + className, CGM.getPointerAlign()); auto classFields = builder.beginStruct(); // struct objc_class *isa; classFields.add(metaclass); // struct objc_class *super_class; // Get the superclass name. const ObjCInterfaceDecl * SuperClassDecl = OID->getClassInterface()->getSuperClass(); if (SuperClassDecl) { auto SuperClassName = SymbolForClass(SuperClassDecl->getNameAsString()); llvm::Constant *SuperClass = TheModule.getNamedGlobal(SuperClassName); if (!SuperClass) { SuperClass = new llvm::GlobalVariable(TheModule, PtrTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SuperClassName); } classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); } else classFields.addNullPointer(PtrTy); // const char *name; classFields.add(classNameConstant); // long version; classFields.addInt(LongTy, 0); // unsigned long info; // !objc_class_flag_meta classFields.addInt(LongTy, 0); // long instance_size; int superInstanceSize = !SuperClassDecl ? 0 : Context.getASTObjCInterfaceLayout(SuperClassDecl).getSize().getQuantity(); // Instance size is negative for classes that have not yet had their ivar // layout calculated. classFields.addInt(LongTy, 0 - (Context.getASTObjCImplementationLayout(OID).getSize().getQuantity() - superInstanceSize)); if (classDecl->all_declared_ivar_begin() == nullptr) classFields.addNullPointer(PtrTy); else { int ivar_count = 0; for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) ivar_count++; llvm::DataLayout td(&TheModule); // struct objc_ivar_list *ivars; ConstantInitBuilder b(CGM); auto ivarListBuilder = b.beginStruct(); // int count; ivarListBuilder.addInt(IntTy, ivar_count); // size_t size; llvm::StructType *ObjCIvarTy = llvm::StructType::get( PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, Int32Ty, Int32Ty); ivarListBuilder.addInt(SizeTy, td.getTypeSizeInBits(ObjCIvarTy) / CGM.getContext().getCharWidth()); // struct objc_ivar ivars[] auto ivarArrayBuilder = ivarListBuilder.beginArray(); CodeGenTypes &Types = CGM.getTypes(); for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { auto ivarTy = IVD->getType(); auto ivarBuilder = ivarArrayBuilder.beginStruct(); // const char *name; ivarBuilder.add(MakeConstantString(IVD->getNameAsString())); // const char *type; std::string TypeStr; //Context.getObjCEncodingForType(ivarTy, TypeStr, IVD, true); Context.getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, ivarTy, TypeStr, true); ivarBuilder.add(MakeConstantString(TypeStr)); // int *offset; uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); uint64_t Offset = BaseOffset - superInstanceSize; llvm::Constant *OffsetValue = llvm::ConstantInt::get(IntTy, Offset); std::string OffsetName = GetIVarOffsetVariableName(classDecl, IVD); llvm::GlobalVariable *OffsetVar = TheModule.getGlobalVariable(OffsetName); if (OffsetVar) OffsetVar->setInitializer(OffsetValue); else OffsetVar = new llvm::GlobalVariable(TheModule, IntTy, false, llvm::GlobalValue::ExternalLinkage, OffsetValue, OffsetName); auto ivarVisibility = (IVD->getAccessControl() == ObjCIvarDecl::Private || IVD->getAccessControl() == ObjCIvarDecl::Package || classDecl->getVisibility() == HiddenVisibility) ? llvm::GlobalValue::HiddenVisibility : llvm::GlobalValue::DefaultVisibility; OffsetVar->setVisibility(ivarVisibility); ivarBuilder.add(OffsetVar); // Ivar size ivarBuilder.addInt(Int32Ty, td.getTypeSizeInBits(Types.ConvertType(ivarTy)) / CGM.getContext().getCharWidth()); // Alignment will be stored as a base-2 log of the alignment. int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); // Objects that require more than 2^64-byte alignment should be impossible! assert(align < 64); // uint32_t flags; // Bits 0-1 are ownership. // Bit 2 indicates an extended type encoding // Bits 3-8 contain log2(aligment) ivarBuilder.addInt(Int32Ty, (align << 3) | (1<<2) | FlagsForOwnership(ivarTy.getQualifiers().getObjCLifetime())); ivarBuilder.finishAndAddTo(ivarArrayBuilder); } ivarArrayBuilder.finishAndAddTo(ivarListBuilder); auto ivarList = ivarListBuilder.finishAndCreateGlobal(".objc_ivar_list", CGM.getPointerAlign(), /*constant*/ false, llvm::GlobalValue::PrivateLinkage); classFields.add(ivarList); } // struct objc_method_list *methods SmallVector InstanceMethods; InstanceMethods.insert(InstanceMethods.begin(), OID->instmeth_begin(), OID->instmeth_end()); for (auto *propImpl : OID->property_impls()) if (propImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { ObjCPropertyDecl *prop = propImpl->getPropertyDecl(); auto addIfExists = [&](const ObjCMethodDecl* OMD) { if (OMD) InstanceMethods.push_back(OMD); }; addIfExists(prop->getGetterMethodDecl()); addIfExists(prop->getSetterMethodDecl()); } if (InstanceMethods.size() == 0) classFields.addNullPointer(PtrTy); else classFields.addBitCast( GenerateMethodList(className, "", InstanceMethods, false), PtrTy); // void *dtable; classFields.addNullPointer(PtrTy); // IMP cxx_construct; classFields.addNullPointer(PtrTy); // IMP cxx_destruct; classFields.addNullPointer(PtrTy); // struct objc_class *subclass_list classFields.addNullPointer(PtrTy); // struct objc_class *sibling_class classFields.addNullPointer(PtrTy); // struct objc_protocol_list *protocols; SmallVector Protocols; for (const auto *I : classDecl->protocols()) Protocols.push_back( llvm::ConstantExpr::getBitCast(GenerateProtocolRef(I), ProtocolPtrTy)); if (Protocols.empty()) classFields.addNullPointer(PtrTy); else classFields.add(GenerateProtocolList(Protocols)); // struct reference_list *extra_data; classFields.addNullPointer(PtrTy); // long abi_version; classFields.addInt(LongTy, 0); // struct objc_property_list *properties classFields.add(GeneratePropertyList(OID, classDecl)); auto *classStruct = classFields.finishAndCreateGlobal(SymbolForClass(className), CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); if (CGM.getTriple().isOSBinFormatCOFF()) { auto Storage = llvm::GlobalValue::DefaultStorageClass; if (OID->getClassInterface()->hasAttr()) Storage = llvm::GlobalValue::DLLImportStorageClass; else if (OID->getClassInterface()->hasAttr()) Storage = llvm::GlobalValue::DLLExportStorageClass; cast(classStruct)->setDLLStorageClass(Storage); } auto *classRefSymbol = GetClassVar(className); classRefSymbol->setSection(ClsRefSection); classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy)); // Resolve the class aliases, if they exist. // FIXME: Class pointer aliases shouldn't exist! if (ClassPtrAlias) { ClassPtrAlias->replaceAllUsesWith( llvm::ConstantExpr::getBitCast(classStruct, IdTy)); ClassPtrAlias->eraseFromParent(); ClassPtrAlias = nullptr; } if (auto Placeholder = TheModule.getNamedGlobal(SymbolForClass(className))) if (Placeholder != classStruct) { Placeholder->replaceAllUsesWith( llvm::ConstantExpr::getBitCast(classStruct, Placeholder->getType())); Placeholder->eraseFromParent(); classStruct->setName(SymbolForClass(className)); } if (MetaClassPtrAlias) { MetaClassPtrAlias->replaceAllUsesWith( llvm::ConstantExpr::getBitCast(metaclass, IdTy)); MetaClassPtrAlias->eraseFromParent(); MetaClassPtrAlias = nullptr; } assert(classStruct->getName() == SymbolForClass(className)); auto classInitRef = new llvm::GlobalVariable(TheModule, classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage, classStruct, "._OBJC_INIT_CLASS_" + className); classInitRef->setSection(ClsSection); CGM.addUsedGlobal(classInitRef); EmittedClass = true; } public: CGObjCGNUstep2(CodeGenModule &Mod) : CGObjCGNUstep(Mod, 10, 4, 2) { MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, PtrToObjCSuperTy, SelectorTy); // struct objc_property // { // const char *name; // const char *attributes; // const char *type; // SEL getter; // SEL setter; // } PropertyMetadataTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty }); } }; /// Support for the ObjFW runtime. class CGObjCObjFW: public CGObjCGNU { protected: /// The GCC ABI message lookup function. Returns an IMP pointing to the /// method implementation for this message. LazyRuntimeFunction MsgLookupFn; /// stret lookup function. While this does not seem to make sense at the /// first look, this is required to call the correct forwarding function. LazyRuntimeFunction MsgLookupFnSRet; /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as /// arguments. Returns the IMP for the corresponding method. LazyRuntimeFunction MsgLookupSuperFn, MsgLookupSuperFnSRet; llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver, llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; llvm::Value *args[] = { EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; llvm::CallSite imp; if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFnSRet, args); else imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); return imp.getInstruction(); } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; llvm::Value *lookupArgs[] = { EnforceType(Builder, ObjCSuper.getPointer(), PtrToObjCSuperTy), cmd, }; if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs); else return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs); } llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name, bool isWeak) override { if (isWeak) return CGObjCGNU::GetClassNamed(CGF, Name, isWeak); EmitClassRef(Name); std::string SymbolName = "_OBJC_CLASS_" + Name; llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(SymbolName); if (!ClassSymbol) ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SymbolName); return ClassSymbol; } public: CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) { // IMP objc_msg_lookup(id, SEL); MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy); MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy, SelectorTy); // IMP objc_msg_lookup_super(struct objc_super*, SEL); MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy, PtrToObjCSuperTy, SelectorTy); MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy, PtrToObjCSuperTy, SelectorTy); } }; } // end anonymous namespace /// Emits a reference to a dummy variable which is emitted with each class. /// This ensures that a linker error will be generated when trying to link /// together modules where a referenced class is not defined. void CGObjCGNU::EmitClassRef(const std::string &className) { std::string symbolRef = "__objc_class_ref_" + className; // Don't emit two copies of the same symbol if (TheModule.getGlobalVariable(symbolRef)) return; std::string symbolName = "__objc_class_name_" + className; llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(symbolName); if (!ClassSymbol) { ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, symbolName); } new llvm::GlobalVariable(TheModule, ClassSymbol->getType(), true, llvm::GlobalValue::WeakAnyLinkage, ClassSymbol, symbolRef); } CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, unsigned protocolClassVersion, unsigned classABI) : CGObjCRuntime(cgm), TheModule(CGM.getModule()), VMContext(cgm.getLLVMContext()), ClassPtrAlias(nullptr), MetaClassPtrAlias(nullptr), RuntimeVersion(runtimeABIVersion), ProtocolVersion(protocolClassVersion), ClassABIVersion(classABI) { msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend"); CodeGenTypes &Types = CGM.getTypes(); IntTy = cast( Types.ConvertType(CGM.getContext().IntTy)); LongTy = cast( Types.ConvertType(CGM.getContext().LongTy)); SizeTy = cast( Types.ConvertType(CGM.getContext().getSizeType())); PtrDiffTy = cast( Types.ConvertType(CGM.getContext().getPointerDiffType())); BoolTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); Int8Ty = llvm::Type::getInt8Ty(VMContext); // C string type. Used in lots of places. PtrToInt8Ty = llvm::PointerType::getUnqual(Int8Ty); ProtocolPtrTy = llvm::PointerType::getUnqual( Types.ConvertType(CGM.getContext().getObjCProtoType())); Zeros[0] = llvm::ConstantInt::get(LongTy, 0); Zeros[1] = Zeros[0]; NULLPtr = llvm::ConstantPointerNull::get(PtrToInt8Ty); // Get the selector Type. QualType selTy = CGM.getContext().getObjCSelType(); if (QualType() == selTy) { SelectorTy = PtrToInt8Ty; } else { SelectorTy = cast(CGM.getTypes().ConvertType(selTy)); } PtrToIntTy = llvm::PointerType::getUnqual(IntTy); PtrTy = PtrToInt8Ty; Int32Ty = llvm::Type::getInt32Ty(VMContext); Int64Ty = llvm::Type::getInt64Ty(VMContext); IntPtrTy = CGM.getDataLayout().getPointerSizeInBits() == 32 ? Int32Ty : Int64Ty; // Object type QualType UnqualIdTy = CGM.getContext().getObjCIdType(); ASTIdTy = CanQualType(); if (UnqualIdTy != QualType()) { ASTIdTy = CGM.getContext().getCanonicalType(UnqualIdTy); IdTy = cast(CGM.getTypes().ConvertType(ASTIdTy)); } else { IdTy = PtrToInt8Ty; } PtrToIdTy = llvm::PointerType::getUnqual(IdTy); ProtocolTy = llvm::StructType::get(IdTy, PtrToInt8Ty, // name PtrToInt8Ty, // protocols PtrToInt8Ty, // instance methods PtrToInt8Ty, // class methods PtrToInt8Ty, // optional instance methods PtrToInt8Ty, // optional class methods PtrToInt8Ty, // properties PtrToInt8Ty);// optional properties // struct objc_property_gsv1 // { // const char *name; // char attributes; // char attributes2; // char unused1; // char unused2; // const char *getter_name; // const char *getter_types; // const char *setter_name; // const char *setter_types; // } PropertyMetadataTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty }); ObjCSuperTy = llvm::StructType::get(IdTy, IdTy); PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy); llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void objc_exception_throw(id); ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy); // int objc_sync_enter(id); SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy); // int objc_sync_exit(id); SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy); // void objc_enumerationMutation (id) EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy, IdTy); // id objc_getProperty(id, SEL, ptrdiff_t, BOOL) GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy, PtrDiffTy, BoolTy); // void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL) SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy, PtrDiffTy, IdTy, BoolTy, BoolTy); // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL) GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, PtrDiffTy, BoolTy, BoolTy); // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL) SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, PtrDiffTy, BoolTy, BoolTy); // IMP type llvm::Type *IMPArgs[] = { IdTy, SelectorTy }; IMPTy = llvm::PointerType::getUnqual(llvm::FunctionType::get(IdTy, IMPArgs, true)); const LangOptions &Opts = CGM.getLangOpts(); if ((Opts.getGC() != LangOptions::NonGC) || Opts.ObjCAutoRefCount) RuntimeVersion = 10; // Don't bother initialising the GC stuff unless we're compiling in GC mode if (Opts.getGC() != LangOptions::NonGC) { // This is a bit of an hack. We should sort this out by having a proper // CGObjCGNUstep subclass for GC, but we may want to really support the old // ABI and GC added in ObjectiveC2.framework, so we fudge it a bit for now // Get selectors needed in GC mode RetainSel = GetNullarySelector("retain", CGM.getContext()); ReleaseSel = GetNullarySelector("release", CGM.getContext()); AutoreleaseSel = GetNullarySelector("autorelease", CGM.getContext()); // Get functions needed in GC mode // id objc_assign_ivar(id, id, ptrdiff_t); IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy); // id objc_assign_strongCast (id, id*) StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy, PtrToIdTy); // id objc_assign_global(id, id*); GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy); // id objc_assign_weak(id, id*); WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy); // id objc_read_weak(id*); WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy); // void *objc_memmove_collectable(void*, void *, size_t); MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy, SizeTy); } } llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, const std::string &Name, bool isWeak) { llvm::Constant *ClassName = MakeConstantString(Name); // With the incompatible ABI, this will need to be replaced with a direct // reference to the class symbol. For the compatible nonfragile ABI we are // still performing this lookup at run time but emitting the symbol for the // class externally so that we can make the switch later. // // Libobjc2 contains an LLVM pass that replaces calls to objc_lookup_class // with memoized versions or with static references if it's safe to do so. if (!isWeak) EmitClassRef(Name); llvm::Constant *ClassLookupFn = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, PtrToInt8Ty, true), "objc_lookup_class"); return CGF.EmitNounwindRuntimeCall(ClassLookupFn, ClassName); } // This has to perform the lookup every time, since posing and related // techniques can modify the name -> class mapping. llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF, const ObjCInterfaceDecl *OID) { auto *Value = GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported()); if (auto *ClassSymbol = dyn_cast(Value)) CGM.setGVProperties(ClassSymbol, OID); return Value; } llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { auto *Value = GetClassNamed(CGF, "NSAutoreleasePool", false); if (CGM.getTriple().isOSBinFormatCOFF()) { if (auto *ClassSymbol = dyn_cast(Value)) { IdentifierInfo &II = CGF.CGM.getContext().Idents.get("NSAutoreleasePool"); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); const VarDecl *VD = nullptr; for (const auto &Result : DC->lookup(&II)) if ((VD = dyn_cast(Result))) break; CGM.setGVProperties(ClassSymbol, VD); } } return Value; } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel, const std::string &TypeEncoding) { SmallVectorImpl &Types = SelectorTable[Sel]; llvm::GlobalAlias *SelValue = nullptr; for (SmallVectorImpl::iterator i = Types.begin(), e = Types.end() ; i!=e ; i++) { if (i->first == TypeEncoding) { SelValue = i->second; break; } } if (!SelValue) { SelValue = llvm::GlobalAlias::create( SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage, ".objc_selector_" + Sel.getAsString(), &TheModule); Types.emplace_back(TypeEncoding, SelValue); } return SelValue; } Address CGObjCGNU::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) { llvm::Value *SelValue = GetSelector(CGF, Sel); // Store it to a temporary. Does this satisfy the semantics of // GetAddrOfSelector? Hopefully. Address tmp = CGF.CreateTempAlloca(SelValue->getType(), CGF.getPointerAlign()); CGF.Builder.CreateStore(SelValue, tmp); return tmp; } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) { return GetSelector(CGF, Sel, std::string()); } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl *Method) { std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method); return GetSelector(CGF, Method->getSelector(), SelTypes); } llvm::Constant *CGObjCGNU::GetEHType(QualType T) { if (T->isObjCIdType() || T->isObjCQualifiedIdType()) { // With the old ABI, there was only one kind of catchall, which broke // foreign exceptions. With the new ABI, we use __objc_id_typeinfo as // a pointer indicating object catchalls, and NULL to indicate real // catchalls if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { return MakeConstantString("@id"); } else { return nullptr; } } // All other types should be Objective-C interface pointer types. const ObjCObjectPointerType *OPT = T->getAs(); assert(OPT && "Invalid @catch type."); const ObjCInterfaceDecl *IDecl = OPT->getObjectType()->getInterface(); assert(IDecl && "Invalid @catch type."); return MakeConstantString(IDecl->getIdentifier()->getName()); } llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { if (!CGM.getLangOpts().CPlusPlus) return CGObjCGNU::GetEHType(T); // For Objective-C++, we want to provide the ability to catch both C++ and // Objective-C objects in the same function. // There's a particular fixed type info for 'id'. if (T->isObjCIdType() || T->isObjCQualifiedIdType()) { llvm::Constant *IDEHType = CGM.getModule().getGlobalVariable("__objc_id_type_info"); if (!IDEHType) IDEHType = new llvm::GlobalVariable(CGM.getModule(), PtrToInt8Ty, false, llvm::GlobalValue::ExternalLinkage, nullptr, "__objc_id_type_info"); return llvm::ConstantExpr::getBitCast(IDEHType, PtrToInt8Ty); } const ObjCObjectPointerType *PT = T->getAs(); assert(PT && "Invalid @catch type."); const ObjCInterfaceType *IT = PT->getInterfaceType(); assert(IT && "Invalid @catch type."); std::string className = IT->getDecl()->getIdentifier()->getName(); std::string typeinfoName = "__objc_eh_typeinfo_" + className; // Return the existing typeinfo if it exists llvm::Constant *typeinfo = TheModule.getGlobalVariable(typeinfoName); if (typeinfo) return llvm::ConstantExpr::getBitCast(typeinfo, PtrToInt8Ty); // Otherwise create it. // vtable for gnustep::libobjc::__objc_class_type_info // It's quite ugly hard-coding this. Ideally we'd generate it using the host // platform's name mangling. const char *vtableName = "_ZTVN7gnustep7libobjc22__objc_class_type_infoE"; auto *Vtable = TheModule.getGlobalVariable(vtableName); if (!Vtable) { Vtable = new llvm::GlobalVariable(TheModule, PtrToInt8Ty, true, llvm::GlobalValue::ExternalLinkage, nullptr, vtableName); } llvm::Constant *Two = llvm::ConstantInt::get(IntTy, 2); auto *BVtable = llvm::ConstantExpr::getBitCast( llvm::ConstantExpr::getGetElementPtr(Vtable->getValueType(), Vtable, Two), PtrToInt8Ty); llvm::Constant *typeName = ExportUniqueString(className, "__objc_eh_typename_"); ConstantInitBuilder builder(CGM); auto fields = builder.beginStruct(); fields.add(BVtable); fields.add(typeName); llvm::Constant *TI = fields.finishAndCreateGlobal("__objc_eh_typeinfo_" + className, CGM.getPointerAlign(), /*constant*/ false, llvm::GlobalValue::LinkOnceODRLinkage); return llvm::ConstantExpr::getBitCast(TI, PtrToInt8Ty); } /// Generate an NSConstantString object. ConstantAddress CGObjCGNU::GenerateConstantString(const StringLiteral *SL) { std::string Str = SL->getString().str(); CharUnits Align = CGM.getPointerAlign(); // Look for an existing one llvm::StringMap::iterator old = ObjCStrings.find(Str); if (old != ObjCStrings.end()) return ConstantAddress(old->getValue(), Align); StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass; if (StringClass.empty()) StringClass = "NSConstantString"; std::string Sym = "_OBJC_CLASS_"; Sym += StringClass; llvm::Constant *isa = TheModule.getNamedGlobal(Sym); if (!isa) isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, llvm::GlobalValue::ExternalWeakLinkage, nullptr, Sym); else if (isa->getType() != PtrToIdTy) isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); ConstantInitBuilder Builder(CGM); auto Fields = Builder.beginStruct(); Fields.add(isa); Fields.add(MakeConstantString(Str)); Fields.addInt(IntTy, Str.size()); llvm::Constant *ObjCStr = Fields.finishAndCreateGlobal(".objc_str", Align); ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStr, PtrToInt8Ty); ObjCStrings[Str] = ObjCStr; ConstantStrings.push_back(ObjCStr); return ConstantAddress(ObjCStr, Align); } ///Generates a message send where the super is the receiver. This is a message ///send to self with special delivery semantics indicating which class's method ///should be called. RValue CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, Selector Sel, const ObjCInterfaceDecl *Class, bool isCategoryImpl, llvm::Value *Receiver, bool IsClassMessage, const CallArgList &CallArgs, const ObjCMethodDecl *Method) { CGBuilderTy &Builder = CGF.Builder; if (CGM.getLangOpts().getGC() == LangOptions::GCOnly) { if (Sel == RetainSel || Sel == AutoreleaseSel) { return RValue::get(EnforceType(Builder, Receiver, CGM.getTypes().ConvertType(ResultType))); } if (Sel == ReleaseSel) { return RValue::get(nullptr); } } llvm::Value *cmd = GetSelector(CGF, Sel); CallArgList ActualArgs; ActualArgs.add(RValue::get(EnforceType(Builder, Receiver, IdTy)), ASTIdTy); ActualArgs.add(RValue::get(cmd), CGF.getContext().getObjCSelType()); ActualArgs.addFrom(CallArgs); MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs); llvm::Value *ReceiverClass = nullptr; bool isV2ABI = isRuntime(ObjCRuntime::GNUstep, 2); if (isV2ABI) { ReceiverClass = GetClassNamed(CGF, Class->getSuperClass()->getNameAsString(), /*isWeak*/false); if (IsClassMessage) { // Load the isa pointer of the superclass is this is a class method. ReceiverClass = Builder.CreateBitCast(ReceiverClass, llvm::PointerType::getUnqual(IdTy)); ReceiverClass = Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); } ReceiverClass = EnforceType(Builder, ReceiverClass, IdTy); } else { if (isCategoryImpl) { llvm::Constant *classLookupFunction = nullptr; if (IsClassMessage) { classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( IdTy, PtrTy, true), "objc_get_meta_class"); } else { classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( IdTy, PtrTy, true), "objc_get_class"); } ReceiverClass = Builder.CreateCall(classLookupFunction, MakeConstantString(Class->getNameAsString())); } else { // Set up global aliases for the metaclass or class pointer if they do not // already exist. These will are forward-references which will be set to // pointers to the class and metaclass structure created for the runtime // load function. To send a message to super, we look up the value of the // super_class pointer from either the class or metaclass structure. if (IsClassMessage) { if (!MetaClassPtrAlias) { MetaClassPtrAlias = llvm::GlobalAlias::create( IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage, ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule); } ReceiverClass = MetaClassPtrAlias; } else { if (!ClassPtrAlias) { ClassPtrAlias = llvm::GlobalAlias::create( IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage, ".objc_class_ref" + Class->getNameAsString(), &TheModule); } ReceiverClass = ClassPtrAlias; } } // Cast the pointer to a simplified version of the class structure llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy); ReceiverClass = Builder.CreateBitCast(ReceiverClass, llvm::PointerType::getUnqual(CastTy)); // Get the superclass pointer ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1); // Load the superclass pointer ReceiverClass = Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign()); } // Construct the structure used to look up the IMP llvm::StructType *ObjCSuperTy = llvm::StructType::get(Receiver->getType(), IdTy); Address ObjCSuper = CGF.CreateTempAlloca(ObjCSuperTy, CGF.getPointerAlign()); Builder.CreateStore(Receiver, Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); Builder.CreateStore(ReceiverClass, Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); ObjCSuper = EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy); // Get the IMP llvm::Value *imp = LookupIMPSuper(CGF, ObjCSuper, cmd, MSI); imp = EnforceType(Builder, imp, MSI.MessengerType); llvm::Metadata *impMD[] = { llvm::MDString::get(VMContext, Sel.getAsString()), llvm::MDString::get(VMContext, Class->getSuperClass()->getNameAsString()), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt1Ty(VMContext), IsClassMessage))}; llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD); CGCallee callee(CGCalleeInfo(), imp); llvm::Instruction *call; RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); return msgRet; } /// Generate code for a message send expression. RValue CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, ReturnValueSlot Return, QualType ResultType, Selector Sel, llvm::Value *Receiver, const CallArgList &CallArgs, const ObjCInterfaceDecl *Class, const ObjCMethodDecl *Method) { CGBuilderTy &Builder = CGF.Builder; // Strip out message sends to retain / release in GC mode if (CGM.getLangOpts().getGC() == LangOptions::GCOnly) { if (Sel == RetainSel || Sel == AutoreleaseSel) { return RValue::get(EnforceType(Builder, Receiver, CGM.getTypes().ConvertType(ResultType))); } if (Sel == ReleaseSel) { return RValue::get(nullptr); } } // If the return type is something that goes in an integer register, the // runtime will handle 0 returns. For other cases, we fill in the 0 value // ourselves. // // The language spec says the result of this kind of message send is // undefined, but lots of people seem to have forgotten to read that // paragraph and insist on sending messages to nil that have structure // returns. With GCC, this generates a random return value (whatever happens // to be on the stack / in those registers at the time) on most platforms, // and generates an illegal instruction trap on SPARC. With LLVM it corrupts // the stack. bool isPointerSizedReturn = (ResultType->isAnyPointerType() || ResultType->isIntegralOrEnumerationType() || ResultType->isVoidType()); llvm::BasicBlock *startBB = nullptr; llvm::BasicBlock *messageBB = nullptr; llvm::BasicBlock *continueBB = nullptr; if (!isPointerSizedReturn) { startBB = Builder.GetInsertBlock(); messageBB = CGF.createBasicBlock("msgSend"); continueBB = CGF.createBasicBlock("continue"); llvm::Value *isNil = Builder.CreateICmpEQ(Receiver, llvm::Constant::getNullValue(Receiver->getType())); Builder.CreateCondBr(isNil, continueBB, messageBB); CGF.EmitBlock(messageBB); } IdTy = cast(CGM.getTypes().ConvertType(ASTIdTy)); llvm::Value *cmd; if (Method) cmd = GetSelector(CGF, Method); else cmd = GetSelector(CGF, Sel); cmd = EnforceType(Builder, cmd, SelectorTy); Receiver = EnforceType(Builder, Receiver, IdTy); llvm::Metadata *impMD[] = { llvm::MDString::get(VMContext, Sel.getAsString()), llvm::MDString::get(VMContext, Class ? Class->getNameAsString() : ""), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt1Ty(VMContext), Class != nullptr))}; llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD); CallArgList ActualArgs; ActualArgs.add(RValue::get(Receiver), ASTIdTy); ActualArgs.add(RValue::get(cmd), CGF.getContext().getObjCSelType()); ActualArgs.addFrom(CallArgs); MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs); // Get the IMP to call llvm::Value *imp; // If we have non-legacy dispatch specified, we try using the objc_msgSend() // functions. These are not supported on all platforms (or all runtimes on a // given platform), so we switch (CGM.getCodeGenOpts().getObjCDispatchMethod()) { case CodeGenOptions::Legacy: imp = LookupIMP(CGF, Receiver, cmd, node, MSI); break; case CodeGenOptions::Mixed: case CodeGenOptions::NonLegacy: if (CGM.ReturnTypeUsesFPRet(ResultType)) { imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend_fpret"); } else if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) { // The actual types here don't matter - we're going to bitcast the // function anyway imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend_stret"); } else { imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend"); } } // Reset the receiver in case the lookup modified it ActualArgs[0] = CallArg(RValue::get(Receiver), ASTIdTy); imp = EnforceType(Builder, imp, MSI.MessengerType); llvm::Instruction *call; CGCallee callee(CGCalleeInfo(), imp); RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); if (!isPointerSizedReturn) { messageBB = CGF.Builder.GetInsertBlock(); CGF.Builder.CreateBr(continueBB); CGF.EmitBlock(continueBB); if (msgRet.isScalar()) { llvm::Value *v = msgRet.getScalarVal(); llvm::PHINode *phi = Builder.CreatePHI(v->getType(), 2); phi->addIncoming(v, messageBB); phi->addIncoming(llvm::Constant::getNullValue(v->getType()), startBB); msgRet = RValue::get(phi); } else if (msgRet.isAggregate()) { Address v = msgRet.getAggregateAddress(); llvm::PHINode *phi = Builder.CreatePHI(v.getType(), 2); llvm::Type *RetTy = v.getElementType(); Address NullVal = CGF.CreateTempAlloca(RetTy, v.getAlignment(), "null"); CGF.InitTempAlloca(NullVal, llvm::Constant::getNullValue(RetTy)); phi->addIncoming(v.getPointer(), messageBB); phi->addIncoming(NullVal.getPointer(), startBB); msgRet = RValue::getAggregate(Address(phi, v.getAlignment())); } else /* isComplex() */ { std::pair v = msgRet.getComplexVal(); llvm::PHINode *phi = Builder.CreatePHI(v.first->getType(), 2); phi->addIncoming(v.first, messageBB); phi->addIncoming(llvm::Constant::getNullValue(v.first->getType()), startBB); llvm::PHINode *phi2 = Builder.CreatePHI(v.second->getType(), 2); phi2->addIncoming(v.second, messageBB); phi2->addIncoming(llvm::Constant::getNullValue(v.second->getType()), startBB); msgRet = RValue::getComplex(phi, phi2); } } return msgRet; } /// Generates a MethodList. Used in construction of a objc_class and /// objc_category structures. llvm::Constant *CGObjCGNU:: GenerateMethodList(StringRef ClassName, StringRef CategoryName, ArrayRef Methods, bool isClassMethodList) { if (Methods.empty()) return NULLPtr; ConstantInitBuilder Builder(CGM); auto MethodList = Builder.beginStruct(); MethodList.addNullPointer(CGM.Int8PtrTy); MethodList.addInt(Int32Ty, Methods.size()); // Get the method structure type. llvm::StructType *ObjCMethodTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, // Really a selector, but the runtime creates it us. PtrToInt8Ty, // Method types IMPTy // Method pointer }); bool isV2ABI = isRuntime(ObjCRuntime::GNUstep, 2); if (isV2ABI) { // size_t size; llvm::DataLayout td(&TheModule); MethodList.addInt(SizeTy, td.getTypeSizeInBits(ObjCMethodTy) / CGM.getContext().getCharWidth()); ObjCMethodTy = llvm::StructType::get(CGM.getLLVMContext(), { IMPTy, // Method pointer PtrToInt8Ty, // Selector PtrToInt8Ty // Extended type encoding }); } else { ObjCMethodTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, // Really a selector, but the runtime creates it us. PtrToInt8Ty, // Method types IMPTy // Method pointer }); } auto MethodArray = MethodList.beginArray(); ASTContext &Context = CGM.getContext(); for (const auto *OMD : Methods) { llvm::Constant *FnPtr = TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName, OMD->getSelector(), isClassMethodList)); assert(FnPtr && "Can't generate metadata for method that doesn't exist"); auto Method = MethodArray.beginStruct(ObjCMethodTy); if (isV2ABI) { Method.addBitCast(FnPtr, IMPTy); Method.add(GetConstantSelector(OMD->getSelector(), Context.getObjCEncodingForMethodDecl(OMD))); Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD, true))); } else { Method.add(MakeConstantString(OMD->getSelector().getAsString())); Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD))); Method.addBitCast(FnPtr, IMPTy); } Method.finishAndAddTo(MethodArray); } MethodArray.finishAndAddTo(MethodList); // Create an instance of the structure return MethodList.finishAndCreateGlobal(".objc_method_list", CGM.getPointerAlign()); } /// Generates an IvarList. Used in construction of a objc_class. llvm::Constant *CGObjCGNU:: GenerateIvarList(ArrayRef IvarNames, ArrayRef IvarTypes, ArrayRef IvarOffsets, ArrayRef IvarAlign, ArrayRef IvarOwnership) { if (IvarNames.empty()) return NULLPtr; ConstantInitBuilder Builder(CGM); // Structure containing array count followed by array. auto IvarList = Builder.beginStruct(); IvarList.addInt(IntTy, (int)IvarNames.size()); // Get the ivar structure type. llvm::StructType *ObjCIvarTy = llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, IntTy); // Array of ivar structures. auto Ivars = IvarList.beginArray(ObjCIvarTy); for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) { auto Ivar = Ivars.beginStruct(ObjCIvarTy); Ivar.add(IvarNames[i]); Ivar.add(IvarTypes[i]); Ivar.add(IvarOffsets[i]); Ivar.finishAndAddTo(Ivars); } Ivars.finishAndAddTo(IvarList); // Create an instance of the structure return IvarList.finishAndCreateGlobal(".objc_ivar_list", CGM.getPointerAlign()); } /// Generate a class structure llvm::Constant *CGObjCGNU::GenerateClassStructure( llvm::Constant *MetaClass, llvm::Constant *SuperClass, unsigned info, const char *Name, llvm::Constant *Version, llvm::Constant *InstanceSize, llvm::Constant *IVars, llvm::Constant *Methods, llvm::Constant *Protocols, llvm::Constant *IvarOffsets, llvm::Constant *Properties, llvm::Constant *StrongIvarBitmap, llvm::Constant *WeakIvarBitmap, bool isMeta) { // Set up the class structure // Note: Several of these are char*s when they should be ids. This is // because the runtime performs this translation on load. // // Fields marked New ABI are part of the GNUstep runtime. We emit them // anyway; the classes will still work with the GNU runtime, they will just // be ignored. llvm::StructType *ClassTy = llvm::StructType::get( PtrToInt8Ty, // isa PtrToInt8Ty, // super_class PtrToInt8Ty, // name LongTy, // version LongTy, // info LongTy, // instance_size IVars->getType(), // ivars Methods->getType(), // methods // These are all filled in by the runtime, so we pretend PtrTy, // dtable PtrTy, // subclass_list PtrTy, // sibling_class PtrTy, // protocols PtrTy, // gc_object_type // New ABI: LongTy, // abi_version IvarOffsets->getType(), // ivar_offsets Properties->getType(), // properties IntPtrTy, // strong_pointers IntPtrTy // weak_pointers ); ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(ClassTy); // Fill in the structure // isa Elements.addBitCast(MetaClass, PtrToInt8Ty); // super_class Elements.add(SuperClass); // name Elements.add(MakeConstantString(Name, ".class_name")); // version Elements.addInt(LongTy, 0); // info Elements.addInt(LongTy, info); // instance_size if (isMeta) { llvm::DataLayout td(&TheModule); Elements.addInt(LongTy, td.getTypeSizeInBits(ClassTy) / CGM.getContext().getCharWidth()); } else Elements.add(InstanceSize); // ivars Elements.add(IVars); // methods Elements.add(Methods); // These are all filled in by the runtime, so we pretend // dtable Elements.add(NULLPtr); // subclass_list Elements.add(NULLPtr); // sibling_class Elements.add(NULLPtr); // protocols Elements.addBitCast(Protocols, PtrTy); // gc_object_type Elements.add(NULLPtr); // abi_version Elements.addInt(LongTy, ClassABIVersion); // ivar_offsets Elements.add(IvarOffsets); // properties Elements.add(Properties); // strong_pointers Elements.add(StrongIvarBitmap); // weak_pointers Elements.add(WeakIvarBitmap); // Create an instance of the structure // This is now an externally visible symbol, so that we can speed up class // messages in the next ABI. We may already have some weak references to // this, so check and fix them properly. std::string ClassSym((isMeta ? "_OBJC_METACLASS_": "_OBJC_CLASS_") + std::string(Name)); llvm::GlobalVariable *ClassRef = TheModule.getNamedGlobal(ClassSym); llvm::Constant *Class = Elements.finishAndCreateGlobal(ClassSym, CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); if (ClassRef) { ClassRef->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(Class, ClassRef->getType())); ClassRef->removeFromParent(); Class->setName(ClassSym); } return Class; } llvm::Constant *CGObjCGNU:: GenerateProtocolMethodList(ArrayRef Methods) { // Get the method structure type. llvm::StructType *ObjCMethodDescTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty }); ASTContext &Context = CGM.getContext(); ConstantInitBuilder Builder(CGM); auto MethodList = Builder.beginStruct(); MethodList.addInt(IntTy, Methods.size()); auto MethodArray = MethodList.beginArray(ObjCMethodDescTy); for (auto *M : Methods) { auto Method = MethodArray.beginStruct(ObjCMethodDescTy); Method.add(MakeConstantString(M->getSelector().getAsString())); Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(M))); Method.finishAndAddTo(MethodArray); } MethodArray.finishAndAddTo(MethodList); return MethodList.finishAndCreateGlobal(".objc_method_list", CGM.getPointerAlign()); } // Create the protocol list structure used in classes, categories and so on llvm::Constant * CGObjCGNU::GenerateProtocolList(ArrayRef Protocols) { ConstantInitBuilder Builder(CGM); auto ProtocolList = Builder.beginStruct(); ProtocolList.add(NULLPtr); ProtocolList.addInt(LongTy, Protocols.size()); auto Elements = ProtocolList.beginArray(PtrToInt8Ty); for (const std::string *iter = Protocols.begin(), *endIter = Protocols.end(); iter != endIter ; iter++) { llvm::Constant *protocol = nullptr; llvm::StringMap::iterator value = ExistingProtocols.find(*iter); if (value == ExistingProtocols.end()) { protocol = GenerateEmptyProtocol(*iter); } else { protocol = value->getValue(); } Elements.addBitCast(protocol, PtrToInt8Ty); } Elements.finishAndAddTo(ProtocolList); return ProtocolList.finishAndCreateGlobal(".objc_protocol_list", CGM.getPointerAlign()); } llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) { llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()]; if (!protocol) GenerateProtocol(PD); llvm::Type *T = CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); } llvm::Constant * CGObjCGNU::GenerateEmptyProtocol(StringRef ProtocolName) { llvm::Constant *ProtocolList = GenerateProtocolList({}); llvm::Constant *MethodList = GenerateProtocolMethodList({}); MethodList = llvm::ConstantExpr::getBitCast(MethodList, PtrToInt8Ty); // Protocols are objects containing lists of the methods implemented and // protocols adopted. ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(); // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. Elements.add(llvm::ConstantExpr::getIntToPtr( llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); Elements.add(MakeConstantString(ProtocolName, ".objc_protocol_name")); Elements.add(ProtocolList); /* .protocol_list */ Elements.add(MethodList); /* .instance_methods */ Elements.add(MethodList); /* .class_methods */ Elements.add(MethodList); /* .optional_instance_methods */ Elements.add(MethodList); /* .optional_class_methods */ Elements.add(NULLPtr); /* .properties */ Elements.add(NULLPtr); /* .optional_properties */ return Elements.finishAndCreateGlobal(SymbolForProtocol(ProtocolName), CGM.getPointerAlign()); } void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { std::string ProtocolName = PD->getNameAsString(); // Use the protocol definition, if there is one. if (const ObjCProtocolDecl *Def = PD->getDefinition()) PD = Def; SmallVector Protocols; for (const auto *PI : PD->protocols()) Protocols.push_back(PI->getNameAsString()); SmallVector InstanceMethods; SmallVector OptionalInstanceMethods; for (const auto *I : PD->instance_methods()) if (I->isOptional()) OptionalInstanceMethods.push_back(I); else InstanceMethods.push_back(I); // Collect information about class methods: SmallVector ClassMethods; SmallVector OptionalClassMethods; for (const auto *I : PD->class_methods()) if (I->isOptional()) OptionalClassMethods.push_back(I); else ClassMethods.push_back(I); llvm::Constant *ProtocolList = GenerateProtocolList(Protocols); llvm::Constant *InstanceMethodList = GenerateProtocolMethodList(InstanceMethods); llvm::Constant *ClassMethodList = GenerateProtocolMethodList(ClassMethods); llvm::Constant *OptionalInstanceMethodList = GenerateProtocolMethodList(OptionalInstanceMethods); llvm::Constant *OptionalClassMethodList = GenerateProtocolMethodList(OptionalClassMethods); // Property metadata: name, attributes, isSynthesized, setter name, setter // types, getter name, getter types. // The isSynthesized value is always set to 0 in a protocol. It exists to // simplify the runtime library by allowing it to use the same data // structures for protocol metadata everywhere. llvm::Constant *PropertyList = GeneratePropertyList(nullptr, PD, false, false); llvm::Constant *OptionalPropertyList = GeneratePropertyList(nullptr, PD, false, true); // Protocols are objects containing lists of the methods implemented and // protocols adopted. // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(); Elements.add( llvm::ConstantExpr::getIntToPtr( llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy)); Elements.add(MakeConstantString(ProtocolName)); Elements.add(ProtocolList); Elements.add(InstanceMethodList); Elements.add(ClassMethodList); Elements.add(OptionalInstanceMethodList); Elements.add(OptionalClassMethodList); Elements.add(PropertyList); Elements.add(OptionalPropertyList); ExistingProtocols[ProtocolName] = llvm::ConstantExpr::getBitCast( Elements.finishAndCreateGlobal(".objc_protocol", CGM.getPointerAlign()), IdTy); } void CGObjCGNU::GenerateProtocolHolderCategory() { // Collect information about instance methods ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(); const std::string ClassName = "__ObjC_Protocol_Holder_Ugly_Hack"; const std::string CategoryName = "AnotherHack"; Elements.add(MakeConstantString(CategoryName)); Elements.add(MakeConstantString(ClassName)); // Instance method list Elements.addBitCast(GenerateMethodList( ClassName, CategoryName, {}, false), PtrTy); // Class method list Elements.addBitCast(GenerateMethodList( ClassName, CategoryName, {}, true), PtrTy); // Protocol list ConstantInitBuilder ProtocolListBuilder(CGM); auto ProtocolList = ProtocolListBuilder.beginStruct(); ProtocolList.add(NULLPtr); ProtocolList.addInt(LongTy, ExistingProtocols.size()); auto ProtocolElements = ProtocolList.beginArray(PtrTy); for (auto iter = ExistingProtocols.begin(), endIter = ExistingProtocols.end(); iter != endIter ; iter++) { ProtocolElements.addBitCast(iter->getValue(), PtrTy); } ProtocolElements.finishAndAddTo(ProtocolList); Elements.addBitCast( ProtocolList.finishAndCreateGlobal(".objc_protocol_list", CGM.getPointerAlign()), PtrTy); Categories.push_back(llvm::ConstantExpr::getBitCast( Elements.finishAndCreateGlobal("", CGM.getPointerAlign()), PtrTy)); } /// Libobjc2 uses a bitfield representation where small(ish) bitfields are /// stored in a 64-bit value with the low bit set to 1 and the remaining 63 /// bits set to their values, LSB first, while larger ones are stored in a /// structure of this / form: /// /// struct { int32_t length; int32_t values[length]; }; /// /// The values in the array are stored in host-endian format, with the least /// significant bit being assumed to come first in the bitfield. Therefore, a /// bitfield with the 64th bit set will be (int64_t)&{ 2, [0, 1<<31] }, while a /// bitfield / with the 63rd bit set will be 1<<64. llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef bits) { int bitCount = bits.size(); int ptrBits = CGM.getDataLayout().getPointerSizeInBits(); if (bitCount < ptrBits) { uint64_t val = 1; for (int i=0 ; i values; int v=0; while (v < bitCount) { int32_t word = 0; for (int i=0 ; (i<32) && (vgetClassInterface(); std::string ClassName = Class->getNameAsString(); std::string CategoryName = OCD->getNameAsString(); // Collect the names of referenced protocols SmallVector Protocols; const ObjCCategoryDecl *CatDecl = OCD->getCategoryDecl(); const ObjCList &Protos = CatDecl->getReferencedProtocols(); for (ObjCList::iterator I = Protos.begin(), E = Protos.end(); I != E; ++I) Protocols.push_back((*I)->getNameAsString()); ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(); Elements.add(MakeConstantString(CategoryName)); Elements.add(MakeConstantString(ClassName)); // Instance method list SmallVector InstanceMethods; InstanceMethods.insert(InstanceMethods.begin(), OCD->instmeth_begin(), OCD->instmeth_end()); Elements.addBitCast( GenerateMethodList(ClassName, CategoryName, InstanceMethods, false), PtrTy); // Class method list SmallVector ClassMethods; ClassMethods.insert(ClassMethods.begin(), OCD->classmeth_begin(), OCD->classmeth_end()); Elements.addBitCast( GenerateMethodList(ClassName, CategoryName, ClassMethods, true), PtrTy); // Protocol list Elements.addBitCast(GenerateProtocolList(Protocols), PtrTy); if (isRuntime(ObjCRuntime::GNUstep, 2)) { const ObjCCategoryDecl *Category = Class->FindCategoryDeclaration(OCD->getIdentifier()); if (Category) { // Instance properties Elements.addBitCast(GeneratePropertyList(OCD, Category, false), PtrTy); // Class properties Elements.addBitCast(GeneratePropertyList(OCD, Category, true), PtrTy); } else { Elements.addNullPointer(PtrTy); Elements.addNullPointer(PtrTy); } } Categories.push_back(llvm::ConstantExpr::getBitCast( Elements.finishAndCreateGlobal( std::string(".objc_category_")+ClassName+CategoryName, CGM.getPointerAlign()), PtrTy)); } llvm::Constant *CGObjCGNU::GeneratePropertyList(const Decl *Container, const ObjCContainerDecl *OCD, bool isClassProperty, bool protocolOptionalProperties) { SmallVector Properties; llvm::SmallPtrSet PropertySet; bool isProtocol = isa(OCD); ASTContext &Context = CGM.getContext(); std::function collectProtocolProperties = [&](const ObjCProtocolDecl *Proto) { for (const auto *P : Proto->protocols()) collectProtocolProperties(P); for (const auto *PD : Proto->properties()) { if (isClassProperty != PD->isClassProperty()) continue; // Skip any properties that are declared in protocols that this class // conforms to but are not actually implemented by this class. if (!isProtocol && !Context.getObjCPropertyImplDeclForPropertyDecl(PD, Container)) continue; if (!PropertySet.insert(PD->getIdentifier()).second) continue; Properties.push_back(PD); } }; if (const ObjCInterfaceDecl *OID = dyn_cast(OCD)) for (const ObjCCategoryDecl *ClassExt : OID->known_extensions()) for (auto *PD : ClassExt->properties()) { if (isClassProperty != PD->isClassProperty()) continue; PropertySet.insert(PD->getIdentifier()); Properties.push_back(PD); } for (const auto *PD : OCD->properties()) { if (isClassProperty != PD->isClassProperty()) continue; // If we're generating a list for a protocol, skip optional / required ones // when generating the other list. if (isProtocol && (protocolOptionalProperties != PD->isOptional())) continue; // Don't emit duplicate metadata for properties that were already in a // class extension. if (!PropertySet.insert(PD->getIdentifier()).second) continue; Properties.push_back(PD); } if (const ObjCInterfaceDecl *OID = dyn_cast(OCD)) for (const auto *P : OID->all_referenced_protocols()) collectProtocolProperties(P); else if (const ObjCCategoryDecl *CD = dyn_cast(OCD)) for (const auto *P : CD->protocols()) collectProtocolProperties(P); auto numProperties = Properties.size(); if (numProperties == 0) return NULLPtr; ConstantInitBuilder builder(CGM); auto propertyList = builder.beginStruct(); auto properties = PushPropertyListHeader(propertyList, numProperties); // Add all of the property methods need adding to the method list and to the // property metadata list. for (auto *property : Properties) { bool isSynthesized = false; bool isDynamic = false; if (!isProtocol) { auto *propertyImpl = Context.getObjCPropertyImplDeclForPropertyDecl(property, Container); if (propertyImpl) { isSynthesized = (propertyImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize); isDynamic = (propertyImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic); } } PushProperty(properties, property, Container, isSynthesized, isDynamic); } properties.finishAndAddTo(propertyList); return propertyList.finishAndCreateGlobal(".objc_property_list", CGM.getPointerAlign()); } void CGObjCGNU::RegisterAlias(const ObjCCompatibleAliasDecl *OAD) { // Get the class declaration for which the alias is specified. ObjCInterfaceDecl *ClassDecl = const_cast(OAD->getClassInterface()); ClassAliases.emplace_back(ClassDecl->getNameAsString(), OAD->getNameAsString()); } void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { ASTContext &Context = CGM.getContext(); // Get the superclass name. const ObjCInterfaceDecl * SuperClassDecl = OID->getClassInterface()->getSuperClass(); std::string SuperClassName; if (SuperClassDecl) { SuperClassName = SuperClassDecl->getNameAsString(); EmitClassRef(SuperClassName); } // Get the class name ObjCInterfaceDecl *ClassDecl = const_cast(OID->getClassInterface()); std::string ClassName = ClassDecl->getNameAsString(); // Emit the symbol that is used to generate linker errors if this class is // referenced in other modules but not declared. std::string classSymbolName = "__objc_class_name_" + ClassName; if (auto *symbol = TheModule.getGlobalVariable(classSymbolName)) { symbol->setInitializer(llvm::ConstantInt::get(LongTy, 0)); } else { new llvm::GlobalVariable(TheModule, LongTy, false, llvm::GlobalValue::ExternalLinkage, llvm::ConstantInt::get(LongTy, 0), classSymbolName); } // Get the size of instances. int instanceSize = Context.getASTObjCImplementationLayout(OID).getSize().getQuantity(); // Collect information about instance variables. SmallVector IvarNames; SmallVector IvarTypes; SmallVector IvarOffsets; SmallVector IvarAligns; SmallVector IvarOwnership; ConstantInitBuilder IvarOffsetBuilder(CGM); auto IvarOffsetValues = IvarOffsetBuilder.beginArray(PtrToIntTy); SmallVector WeakIvars; SmallVector StrongIvars; int superInstanceSize = !SuperClassDecl ? 0 : Context.getASTObjCInterfaceLayout(SuperClassDecl).getSize().getQuantity(); // For non-fragile ivars, set the instance size to 0 - {the size of just this // class}. The runtime will then set this to the correct value on load. if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { instanceSize = 0 - (instanceSize - superInstanceSize); } for (const ObjCIvarDecl *IVD = ClassDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { // Store the name IvarNames.push_back(MakeConstantString(IVD->getNameAsString())); // Get the type encoding for this ivar std::string TypeStr; Context.getObjCEncodingForType(IVD->getType(), TypeStr, IVD); IvarTypes.push_back(MakeConstantString(TypeStr)); IvarAligns.push_back(llvm::ConstantInt::get(IntTy, Context.getTypeSize(IVD->getType()))); // Get the offset uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); uint64_t Offset = BaseOffset; if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { Offset = BaseOffset - superInstanceSize; } llvm::Constant *OffsetValue = llvm::ConstantInt::get(IntTy, Offset); // Create the direct offset value std::string OffsetName = "__objc_ivar_offset_value_" + ClassName +"." + IVD->getNameAsString(); llvm::GlobalVariable *OffsetVar = TheModule.getGlobalVariable(OffsetName); if (OffsetVar) { OffsetVar->setInitializer(OffsetValue); // If this is the real definition, change its linkage type so that // different modules will use this one, rather than their private // copy. OffsetVar->setLinkage(llvm::GlobalValue::ExternalLinkage); } else OffsetVar = new llvm::GlobalVariable(TheModule, Int32Ty, false, llvm::GlobalValue::ExternalLinkage, OffsetValue, OffsetName); IvarOffsets.push_back(OffsetValue); IvarOffsetValues.add(OffsetVar); Qualifiers::ObjCLifetime lt = IVD->getType().getQualifiers().getObjCLifetime(); IvarOwnership.push_back(lt); switch (lt) { case Qualifiers::OCL_Strong: StrongIvars.push_back(true); WeakIvars.push_back(false); break; case Qualifiers::OCL_Weak: StrongIvars.push_back(false); WeakIvars.push_back(true); break; default: StrongIvars.push_back(false); WeakIvars.push_back(false); } } llvm::Constant *StrongIvarBitmap = MakeBitField(StrongIvars); llvm::Constant *WeakIvarBitmap = MakeBitField(WeakIvars); llvm::GlobalVariable *IvarOffsetArray = IvarOffsetValues.finishAndCreateGlobal(".ivar.offsets", CGM.getPointerAlign()); // Collect information about instance methods SmallVector InstanceMethods; InstanceMethods.insert(InstanceMethods.begin(), OID->instmeth_begin(), OID->instmeth_end()); SmallVector ClassMethods; ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(), OID->classmeth_end()); // Collect the same information about synthesized properties, which don't // show up in the instance method lists. for (auto *propertyImpl : OID->property_impls()) if (propertyImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { ObjCPropertyDecl *property = propertyImpl->getPropertyDecl(); auto addPropertyMethod = [&](const ObjCMethodDecl *accessor) { if (accessor) InstanceMethods.push_back(accessor); }; addPropertyMethod(property->getGetterMethodDecl()); addPropertyMethod(property->getSetterMethodDecl()); } llvm::Constant *Properties = GeneratePropertyList(OID, ClassDecl); // Collect the names of referenced protocols SmallVector Protocols; for (const auto *I : ClassDecl->protocols()) Protocols.push_back(I->getNameAsString()); // Get the superclass pointer. llvm::Constant *SuperClass; if (!SuperClassName.empty()) { SuperClass = MakeConstantString(SuperClassName, ".super_class_name"); } else { SuperClass = llvm::ConstantPointerNull::get(PtrToInt8Ty); } // Empty vector used to construct empty method lists SmallVector empty; // Generate the method and instance variable lists llvm::Constant *MethodList = GenerateMethodList(ClassName, "", InstanceMethods, false); llvm::Constant *ClassMethodList = GenerateMethodList(ClassName, "", ClassMethods, true); llvm::Constant *IvarList = GenerateIvarList(IvarNames, IvarTypes, IvarOffsets, IvarAligns, IvarOwnership); // Irrespective of whether we are compiling for a fragile or non-fragile ABI, // we emit a symbol containing the offset for each ivar in the class. This // allows code compiled for the non-Fragile ABI to inherit from code compiled // for the legacy ABI, without causing problems. The converse is also // possible, but causes all ivar accesses to be fragile. // Offset pointer for getting at the correct field in the ivar list when // setting up the alias. These are: The base address for the global, the // ivar array (second field), the ivar in this list (set for each ivar), and // the offset (third field in ivar structure) llvm::Type *IndexTy = Int32Ty; llvm::Constant *offsetPointerIndexes[] = {Zeros[0], llvm::ConstantInt::get(IndexTy, ClassABIVersion > 1 ? 2 : 1), nullptr, llvm::ConstantInt::get(IndexTy, ClassABIVersion > 1 ? 3 : 2) }; unsigned ivarIndex = 0; for (const ObjCIvarDecl *IVD = ClassDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { const std::string Name = GetIVarOffsetVariableName(ClassDecl, IVD); offsetPointerIndexes[2] = llvm::ConstantInt::get(IndexTy, ivarIndex); // Get the correct ivar field llvm::Constant *offsetValue = llvm::ConstantExpr::getGetElementPtr( cast(IvarList)->getValueType(), IvarList, offsetPointerIndexes); // Get the existing variable, if one exists. llvm::GlobalVariable *offset = TheModule.getNamedGlobal(Name); if (offset) { offset->setInitializer(offsetValue); // If this is the real definition, change its linkage type so that // different modules will use this one, rather than their private // copy. offset->setLinkage(llvm::GlobalValue::ExternalLinkage); } else // Add a new alias if there isn't one already. new llvm::GlobalVariable(TheModule, offsetValue->getType(), false, llvm::GlobalValue::ExternalLinkage, offsetValue, Name); ++ivarIndex; } llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0); //Generate metaclass for class methods llvm::Constant *MetaClassStruct = GenerateClassStructure( NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], NULLPtr, ClassMethodList, NULLPtr, NULLPtr, GeneratePropertyList(OID, ClassDecl, true), ZeroPtr, ZeroPtr, true); CGM.setGVProperties(cast(MetaClassStruct), OID->getClassInterface()); // Generate the class structure llvm::Constant *ClassStruct = GenerateClassStructure( MetaClassStruct, SuperClass, 0x11L, ClassName.c_str(), nullptr, llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList, GenerateProtocolList(Protocols), IvarOffsetArray, Properties, StrongIvarBitmap, WeakIvarBitmap); CGM.setGVProperties(cast(ClassStruct), OID->getClassInterface()); // Resolve the class aliases, if they exist. if (ClassPtrAlias) { ClassPtrAlias->replaceAllUsesWith( llvm::ConstantExpr::getBitCast(ClassStruct, IdTy)); ClassPtrAlias->eraseFromParent(); ClassPtrAlias = nullptr; } if (MetaClassPtrAlias) { MetaClassPtrAlias->replaceAllUsesWith( llvm::ConstantExpr::getBitCast(MetaClassStruct, IdTy)); MetaClassPtrAlias->eraseFromParent(); MetaClassPtrAlias = nullptr; } // Add class structure to list to be added to the symtab later ClassStruct = llvm::ConstantExpr::getBitCast(ClassStruct, PtrToInt8Ty); Classes.push_back(ClassStruct); } llvm::Function *CGObjCGNU::ModuleInitFunction() { // Only emit an ObjC load function if no Objective-C stuff has been called if (Classes.empty() && Categories.empty() && ConstantStrings.empty() && ExistingProtocols.empty() && SelectorTable.empty()) return nullptr; // Add all referenced protocols to a category. GenerateProtocolHolderCategory(); llvm::StructType *selStructTy = dyn_cast(SelectorTy->getElementType()); llvm::Type *selStructPtrTy = SelectorTy; if (!selStructTy) { selStructTy = llvm::StructType::get(CGM.getLLVMContext(), { PtrToInt8Ty, PtrToInt8Ty }); selStructPtrTy = llvm::PointerType::getUnqual(selStructTy); } // Generate statics list: llvm::Constant *statics = NULLPtr; if (!ConstantStrings.empty()) { llvm::GlobalVariable *fileStatics = [&] { ConstantInitBuilder builder(CGM); auto staticsStruct = builder.beginStruct(); StringRef stringClass = CGM.getLangOpts().ObjCConstantStringClass; if (stringClass.empty()) stringClass = "NXConstantString"; staticsStruct.add(MakeConstantString(stringClass, ".objc_static_class_name")); auto array = staticsStruct.beginArray(); array.addAll(ConstantStrings); array.add(NULLPtr); array.finishAndAddTo(staticsStruct); return staticsStruct.finishAndCreateGlobal(".objc_statics", CGM.getPointerAlign()); }(); ConstantInitBuilder builder(CGM); auto allStaticsArray = builder.beginArray(fileStatics->getType()); allStaticsArray.add(fileStatics); allStaticsArray.addNullPointer(fileStatics->getType()); statics = allStaticsArray.finishAndCreateGlobal(".objc_statics_ptr", CGM.getPointerAlign()); statics = llvm::ConstantExpr::getBitCast(statics, PtrTy); } // Array of classes, categories, and constant objects. SmallVector selectorAliases; unsigned selectorCount; // Pointer to an array of selectors used in this module. llvm::GlobalVariable *selectorList = [&] { ConstantInitBuilder builder(CGM); auto selectors = builder.beginArray(selStructTy); auto &table = SelectorTable; // MSVC workaround for (auto &entry : table) { std::string selNameStr = entry.first.getAsString(); llvm::Constant *selName = ExportUniqueString(selNameStr, ".objc_sel_name"); for (TypedSelector &sel : entry.second) { llvm::Constant *selectorTypeEncoding = NULLPtr; if (!sel.first.empty()) selectorTypeEncoding = MakeConstantString(sel.first, ".objc_sel_types"); auto selStruct = selectors.beginStruct(selStructTy); selStruct.add(selName); selStruct.add(selectorTypeEncoding); selStruct.finishAndAddTo(selectors); // Store the selector alias for later replacement selectorAliases.push_back(sel.second); } } // Remember the number of entries in the selector table. selectorCount = selectors.size(); // NULL-terminate the selector list. This should not actually be required, // because the selector list has a length field. Unfortunately, the GCC // runtime decides to ignore the length field and expects a NULL terminator, // and GCC cooperates with this by always setting the length to 0. auto selStruct = selectors.beginStruct(selStructTy); selStruct.add(NULLPtr); selStruct.add(NULLPtr); selStruct.finishAndAddTo(selectors); return selectors.finishAndCreateGlobal(".objc_selector_list", CGM.getPointerAlign()); }(); // Now that all of the static selectors exist, create pointers to them. for (unsigned i = 0; i < selectorCount; ++i) { llvm::Constant *idxs[] = { Zeros[0], llvm::ConstantInt::get(Int32Ty, i) }; // FIXME: We're generating redundant loads and stores here! llvm::Constant *selPtr = llvm::ConstantExpr::getGetElementPtr( selectorList->getValueType(), selectorList, idxs); // If selectors are defined as an opaque type, cast the pointer to this // type. selPtr = llvm::ConstantExpr::getBitCast(selPtr, SelectorTy); selectorAliases[i]->replaceAllUsesWith(selPtr); selectorAliases[i]->eraseFromParent(); } llvm::GlobalVariable *symtab = [&] { ConstantInitBuilder builder(CGM); auto symtab = builder.beginStruct(); // Number of static selectors symtab.addInt(LongTy, selectorCount); symtab.addBitCast(selectorList, selStructPtrTy); // Number of classes defined. symtab.addInt(CGM.Int16Ty, Classes.size()); // Number of categories defined symtab.addInt(CGM.Int16Ty, Categories.size()); // Create an array of classes, then categories, then static object instances auto classList = symtab.beginArray(PtrToInt8Ty); classList.addAll(Classes); classList.addAll(Categories); // NULL-terminated list of static object instances (mainly constant strings) classList.add(statics); classList.add(NULLPtr); classList.finishAndAddTo(symtab); // Construct the symbol table. return symtab.finishAndCreateGlobal("", CGM.getPointerAlign()); }(); // The symbol table is contained in a module which has some version-checking // constants llvm::Constant *module = [&] { llvm::Type *moduleEltTys[] = { LongTy, LongTy, PtrToInt8Ty, symtab->getType(), IntTy }; llvm::StructType *moduleTy = llvm::StructType::get(CGM.getLLVMContext(), makeArrayRef(moduleEltTys).drop_back(unsigned(RuntimeVersion < 10))); ConstantInitBuilder builder(CGM); auto module = builder.beginStruct(moduleTy); // Runtime version, used for ABI compatibility checking. module.addInt(LongTy, RuntimeVersion); // sizeof(ModuleTy) module.addInt(LongTy, CGM.getDataLayout().getTypeStoreSize(moduleTy)); // The path to the source file where this module was declared SourceManager &SM = CGM.getContext().getSourceManager(); const FileEntry *mainFile = SM.getFileEntryForID(SM.getMainFileID()); std::string path = (Twine(mainFile->getDir()->getName()) + "/" + mainFile->getName()).str(); module.add(MakeConstantString(path, ".objc_source_file_name")); module.add(symtab); if (RuntimeVersion >= 10) { switch (CGM.getLangOpts().getGC()) { case LangOptions::GCOnly: module.addInt(IntTy, 2); break; case LangOptions::NonGC: if (CGM.getLangOpts().ObjCAutoRefCount) module.addInt(IntTy, 1); else module.addInt(IntTy, 0); break; case LangOptions::HybridGC: module.addInt(IntTy, 1); break; } } return module.finishAndCreateGlobal("", CGM.getPointerAlign()); }(); // Create the load function calling the runtime entry point with the module // structure llvm::Function * LoadFunction = llvm::Function::Create( llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false), llvm::GlobalValue::InternalLinkage, ".objc_load_function", &TheModule); llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(VMContext, "entry", LoadFunction); CGBuilderTy Builder(CGM, VMContext); Builder.SetInsertPoint(EntryBB); llvm::FunctionType *FT = llvm::FunctionType::get(Builder.getVoidTy(), module->getType(), true); llvm::Value *Register = CGM.CreateRuntimeFunction(FT, "__objc_exec_class"); Builder.CreateCall(Register, module); if (!ClassAliases.empty()) { llvm::Type *ArgTypes[2] = {PtrTy, PtrToInt8Ty}; llvm::FunctionType *RegisterAliasTy = llvm::FunctionType::get(Builder.getVoidTy(), ArgTypes, false); llvm::Function *RegisterAlias = llvm::Function::Create( RegisterAliasTy, llvm::GlobalValue::ExternalWeakLinkage, "class_registerAlias_np", &TheModule); llvm::BasicBlock *AliasBB = llvm::BasicBlock::Create(VMContext, "alias", LoadFunction); llvm::BasicBlock *NoAliasBB = llvm::BasicBlock::Create(VMContext, "no_alias", LoadFunction); // Branch based on whether the runtime provided class_registerAlias_np() llvm::Value *HasRegisterAlias = Builder.CreateICmpNE(RegisterAlias, llvm::Constant::getNullValue(RegisterAlias->getType())); Builder.CreateCondBr(HasRegisterAlias, AliasBB, NoAliasBB); // The true branch (has alias registration function): Builder.SetInsertPoint(AliasBB); // Emit alias registration calls: for (std::vector::iterator iter = ClassAliases.begin(); iter != ClassAliases.end(); ++iter) { llvm::Constant *TheClass = TheModule.getGlobalVariable("_OBJC_CLASS_" + iter->first, true); if (TheClass) { TheClass = llvm::ConstantExpr::getBitCast(TheClass, PtrTy); Builder.CreateCall(RegisterAlias, {TheClass, MakeConstantString(iter->second)}); } } // Jump to end: Builder.CreateBr(NoAliasBB); // Missing alias registration function, just return from the function: Builder.SetInsertPoint(NoAliasBB); } Builder.CreateRetVoid(); return LoadFunction; } llvm::Function *CGObjCGNU::GenerateMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) { const ObjCCategoryImplDecl *OCD = dyn_cast(OMD->getDeclContext()); StringRef CategoryName = OCD ? OCD->getName() : ""; StringRef ClassName = CD->getName(); Selector MethodName = OMD->getSelector(); bool isClassMethod = !OMD->isInstanceMethod(); CodeGenTypes &Types = CGM.getTypes(); llvm::FunctionType *MethodTy = Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); std::string FunctionName = SymbolNameForMethod(ClassName, CategoryName, MethodName, isClassMethod); llvm::Function *Method = llvm::Function::Create(MethodTy, llvm::GlobalValue::InternalLinkage, FunctionName, &TheModule); return Method; } llvm::Constant *CGObjCGNU::GetPropertyGetFunction() { return GetPropertyFn; } llvm::Constant *CGObjCGNU::GetPropertySetFunction() { return SetPropertyFn; } llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, bool copy) { return nullptr; } llvm::Constant *CGObjCGNU::GetGetStructFunction() { return GetStructPropertyFn; } llvm::Constant *CGObjCGNU::GetSetStructFunction() { return SetStructPropertyFn; } llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() { return nullptr; } llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() { return nullptr; } llvm::Constant *CGObjCGNU::EnumerationMutationFunction() { return EnumerationMutationFn; } void CGObjCGNU::EmitSynchronizedStmt(CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S) { EmitAtSynchronizedStmt(CGF, S, SyncEnterFn, SyncExitFn); } void CGObjCGNU::EmitTryStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S) { // Unlike the Apple non-fragile runtimes, which also uses // unwind-based zero cost exceptions, the GNU Objective C runtime's // EH support isn't a veneer over C++ EH. Instead, exception // objects are created by objc_exception_throw and destroyed by // the personality function; this avoids the need for bracketing // catch handlers with calls to __blah_begin_catch/__blah_end_catch // (or even _Unwind_DeleteException), but probably doesn't // interoperate very well with foreign exceptions. // // In Objective-C++ mode, we actually emit something equivalent to the C++ // exception handler. EmitTryCatchStmt(CGF, S, EnterCatchFn, ExitCatchFn, ExceptionReThrowFn); } void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, const ObjCAtThrowStmt &S, bool ClearInsertionPoint) { llvm::Value *ExceptionAsObject; if (const Expr *ThrowExpr = S.getThrowExpr()) { llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr); ExceptionAsObject = Exception; } else { assert((!CGF.ObjCEHValueStack.empty() && CGF.ObjCEHValueStack.back()) && "Unexpected rethrow outside @catch block."); ExceptionAsObject = CGF.ObjCEHValueStack.back(); } ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); llvm::CallSite Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); Throw.setDoesNotReturn(); CGF.Builder.CreateUnreachable(); if (ClearInsertionPoint) CGF.Builder.ClearInsertionPoint(); } llvm::Value * CGObjCGNU::EmitObjCWeakRead(CodeGenFunction &CGF, Address AddrWeakObj) { CGBuilderTy &B = CGF.Builder; AddrWeakObj = EnforceType(B, AddrWeakObj, PtrToIdTy); return B.CreateCall(WeakReadFn.getType(), WeakReadFn, AddrWeakObj.getPointer()); } void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF, llvm::Value *src, Address dst) { CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); B.CreateCall(WeakAssignFn.getType(), WeakAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF, llvm::Value *src, Address dst, bool threadlocal) { CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); // FIXME. Add threadloca assign API assert(!threadlocal && "EmitObjCGlobalAssign - Threal Local API NYI"); B.CreateCall(GlobalAssignFn.getType(), GlobalAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF, llvm::Value *src, Address dst, llvm::Value *ivarOffset) { CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, IdTy); B.CreateCall(IvarAssignFn.getType(), IvarAssignFn, {src, dst.getPointer(), ivarOffset}); } void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF, llvm::Value *src, Address dst) { CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); B.CreateCall(StrongCastAssignFn.getType(), StrongCastAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size) { CGBuilderTy &B = CGF.Builder; DestPtr = EnforceType(B, DestPtr, PtrTy); SrcPtr = EnforceType(B, SrcPtr, PtrTy); B.CreateCall(MemMoveFn.getType(), MemMoveFn, {DestPtr.getPointer(), SrcPtr.getPointer(), Size}); } llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( const ObjCInterfaceDecl *ID, const ObjCIvarDecl *Ivar) { const std::string Name = GetIVarOffsetVariableName(ID, Ivar); // Emit the variable and initialize it with what we think the correct value // is. This allows code compiled with non-fragile ivars to work correctly // when linked against code which isn't (most of the time). llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name); - if (!IvarOffsetPointer) { - // This will cause a run-time crash if we accidentally use it. A value of - // 0 would seem more sensible, but will silently overwrite the isa pointer - // causing a great deal of confusion. - uint64_t Offset = -1; - // We can't call ComputeIvarBaseOffset() here if we have the - // implementation, because it will create an invalid ASTRecordLayout object - // that we are then stuck with forever, so we only initialize the ivar - // offset variable with a guess if we only have the interface. The - // initializer will be reset later anyway, when we are generating the class - // description. - if (!CGM.getContext().getObjCImplementation( - const_cast(ID))) - Offset = ComputeIvarBaseOffset(CGM, ID, Ivar); - - llvm::ConstantInt *OffsetGuess = llvm::ConstantInt::get(Int32Ty, Offset, - /*isSigned*/true); - // Don't emit the guess in non-PIC code because the linker will not be able - // to replace it with the real version for a library. In non-PIC code you - // must compile with the fragile ABI if you want to use ivars from a - // GCC-compiled class. - if (CGM.getLangOpts().PICLevel) { - llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule, - Int32Ty, false, - llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess"); - IvarOffsetPointer = new llvm::GlobalVariable(TheModule, - IvarOffsetGV->getType(), false, llvm::GlobalValue::LinkOnceAnyLinkage, - IvarOffsetGV, Name); - } else { - IvarOffsetPointer = new llvm::GlobalVariable(TheModule, - llvm::Type::getInt32PtrTy(VMContext), false, - llvm::GlobalValue::ExternalLinkage, nullptr, Name); - } - } + if (!IvarOffsetPointer) + IvarOffsetPointer = new llvm::GlobalVariable(TheModule, + llvm::Type::getInt32PtrTy(VMContext), false, + llvm::GlobalValue::ExternalLinkage, nullptr, Name); return IvarOffsetPointer; } LValue CGObjCGNU::EmitObjCValueForIvar(CodeGenFunction &CGF, QualType ObjectTy, llvm::Value *BaseValue, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { const ObjCInterfaceDecl *ID = ObjectTy->getAs()->getInterface(); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, EmitIvarOffset(CGF, ID, Ivar)); } static const ObjCInterfaceDecl *FindIvarInterface(ASTContext &Context, const ObjCInterfaceDecl *OID, const ObjCIvarDecl *OIVD) { for (const ObjCIvarDecl *next = OID->all_declared_ivar_begin(); next; next = next->getNextIvar()) { if (OIVD == next) return OID; } // Otherwise check in the super class. if (const ObjCInterfaceDecl *Super = OID->getSuperClass()) return FindIvarInterface(Context, Super, OIVD); return nullptr; } llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar) { if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar); // The MSVC linker cannot have a single global defined as LinkOnceAnyLinkage // and ExternalLinkage, so create a reference to the ivar global and rely on // the definition being created as part of GenerateClass. if (RuntimeVersion < 10 || CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) return CGF.Builder.CreateZExtOrBitCast( CGF.Builder.CreateAlignedLoad( Int32Ty, CGF.Builder.CreateAlignedLoad( ObjCIvarOffsetVariable(Interface, Ivar), CGF.getPointerAlign(), "ivar"), CharUnits::fromQuantity(4)), PtrDiffTy); std::string name = "__objc_ivar_offset_value_" + Interface->getNameAsString() +"." + Ivar->getNameAsString(); CharUnits Align = CGM.getIntAlign(); llvm::Value *Offset = TheModule.getGlobalVariable(name); if (!Offset) { auto GV = new llvm::GlobalVariable(TheModule, IntTy, false, llvm::GlobalValue::LinkOnceAnyLinkage, llvm::Constant::getNullValue(IntTy), name); GV->setAlignment(Align.getQuantity()); Offset = GV; } Offset = CGF.Builder.CreateAlignedLoad(Offset, Align); if (Offset->getType() != PtrDiffTy) Offset = CGF.Builder.CreateZExtOrBitCast(Offset, PtrDiffTy); return Offset; } uint64_t Offset = ComputeIvarBaseOffset(CGF.CGM, Interface, Ivar); return llvm::ConstantInt::get(PtrDiffTy, Offset, /*isSigned*/true); } CGObjCRuntime * clang::CodeGen::CreateGNUObjCRuntime(CodeGenModule &CGM) { auto Runtime = CGM.getLangOpts().ObjCRuntime; switch (Runtime.getKind()) { case ObjCRuntime::GNUstep: if (Runtime.getVersion() >= VersionTuple(2, 0)) return new CGObjCGNUstep2(CGM); return new CGObjCGNUstep(CGM); case ObjCRuntime::GCC: return new CGObjCGCC(CGM); case ObjCRuntime::ObjFW: return new CGObjCObjFW(CGM); case ObjCRuntime::FragileMacOSX: case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: llvm_unreachable("these runtimes are not GNU runtimes"); } llvm_unreachable("bad runtime"); } Index: projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp (revision 337645) @@ -1,1070 +1,1071 @@ //===--- CodeGenAction.cpp - LLVM Code Generation Frontend Action ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenAction.h" #include "CodeGenModule.h" #include "CoverageMappingGen.h" #include "MacroPPCallbacks.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" #include "clang/CodeGen/ModuleBuilder.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Pass.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Transforms/IPO/Internalize.h" #include using namespace clang; using namespace llvm; namespace clang { class BackendConsumer; class ClangDiagnosticHandler final : public DiagnosticHandler { public: ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon) : CodeGenOpts(CGOpts), BackendCon(BCon) {} bool handleDiagnostics(const DiagnosticInfo &DI) override; bool isAnalysisRemarkEnabled(StringRef PassName) const override { return (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(PassName)); } bool isMissedOptRemarkEnabled(StringRef PassName) const override { return (CodeGenOpts.OptimizationRemarkMissedPattern && CodeGenOpts.OptimizationRemarkMissedPattern->match(PassName)); } bool isPassedOptRemarkEnabled(StringRef PassName) const override { return (CodeGenOpts.OptimizationRemarkPattern && CodeGenOpts.OptimizationRemarkPattern->match(PassName)); } bool isAnyRemarkEnabled() const override { return (CodeGenOpts.OptimizationRemarkAnalysisPattern || CodeGenOpts.OptimizationRemarkMissedPattern || CodeGenOpts.OptimizationRemarkPattern); } private: const CodeGenOptions &CodeGenOpts; BackendConsumer *BackendCon; }; class BackendConsumer : public ASTConsumer { using LinkModule = CodeGenAction::LinkModule; virtual void anchor(); DiagnosticsEngine &Diags; BackendAction Action; const HeaderSearchOptions &HeaderSearchOpts; const CodeGenOptions &CodeGenOpts; const TargetOptions &TargetOpts; const LangOptions &LangOpts; std::unique_ptr AsmOutStream; ASTContext *Context; Timer LLVMIRGeneration; unsigned LLVMIRGenerationRefCount; /// True if we've finished generating IR. This prevents us from generating /// additional LLVM IR after emitting output in HandleTranslationUnit. This /// can happen when Clang plugins trigger additional AST deserialization. bool IRGenFinished = false; std::unique_ptr Gen; SmallVector LinkModules; // This is here so that the diagnostic printer knows the module a diagnostic // refers to. llvm::Module *CurLinkModule = nullptr; public: BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, const TargetOptions &TargetOpts, const LangOptions &LangOpts, bool TimePasses, const std::string &InFile, SmallVector LinkModules, std::unique_ptr OS, LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(std::move(OS)), Context(nullptr), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { FrontendTimesIsEnabled = TimePasses; + llvm::TimePassesIsEnabled = TimePasses; } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr takeModule() { return std::unique_ptr(Gen->ReleaseModule()); } CodeGenerator *getCodeGenerator() { return Gen.get(); } void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override { Gen->HandleCXXStaticMemberVarInstantiation(VD); } void Initialize(ASTContext &Ctx) override { assert(!Context && "initialized multiple times"); Context = &Ctx; if (FrontendTimesIsEnabled) LLVMIRGeneration.startTimer(); Gen->Initialize(Ctx); if (FrontendTimesIsEnabled) LLVMIRGeneration.stopTimer(); } bool HandleTopLevelDecl(DeclGroupRef D) override { PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(), Context->getSourceManager(), "LLVM IR generation of declaration"); // Recurse. if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount += 1; if (LLVMIRGenerationRefCount == 1) LLVMIRGeneration.startTimer(); } Gen->HandleTopLevelDecl(D); if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount -= 1; if (LLVMIRGenerationRefCount == 0) LLVMIRGeneration.stopTimer(); } return true; } void HandleInlineFunctionDefinition(FunctionDecl *D) override { PrettyStackTraceDecl CrashInfo(D, SourceLocation(), Context->getSourceManager(), "LLVM IR generation of inline function"); if (FrontendTimesIsEnabled) LLVMIRGeneration.startTimer(); Gen->HandleInlineFunctionDefinition(D); if (FrontendTimesIsEnabled) LLVMIRGeneration.stopTimer(); } void HandleInterestingDecl(DeclGroupRef D) override { // Ignore interesting decls from the AST reader after IRGen is finished. if (!IRGenFinished) HandleTopLevelDecl(D); } // Links each entry in LinkModules into our module. Returns true on error. bool LinkInModules() { for (auto &LM : LinkModules) { if (LM.PropagateAttrs) for (Function &F : *LM.Module) Gen->CGM().AddDefaultFnAttrs(F); CurLinkModule = LM.Module.get(); bool Err; if (LM.Internalize) { Err = Linker::linkModules( *getModule(), std::move(LM.Module), LM.LinkFlags, [](llvm::Module &M, const llvm::StringSet<> &GVS) { internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { return !GV.hasName() || (GVS.count(GV.getName()) == 0); }); }); } else { Err = Linker::linkModules(*getModule(), std::move(LM.Module), LM.LinkFlags); } if (Err) return true; } return false; // success } void HandleTranslationUnit(ASTContext &C) override { { PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount += 1; if (LLVMIRGenerationRefCount == 1) LLVMIRGeneration.startTimer(); } Gen->HandleTranslationUnit(C); if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount -= 1; if (LLVMIRGenerationRefCount == 0) LLVMIRGeneration.stopTimer(); } IRGenFinished = true; } // Silently ignore if we weren't initialized for some reason. if (!getModule()) return; // Install an inline asm handler so that diagnostics get printed through // our diagnostics hooks. LLVMContext &Ctx = getModule()->getContext(); LLVMContext::InlineAsmDiagHandlerTy OldHandler = Ctx.getInlineAsmDiagnosticHandler(); void *OldContext = Ctx.getInlineAsmDiagnosticContext(); Ctx.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, this); std::unique_ptr OldDiagnosticHandler = Ctx.getDiagnosticHandler(); Ctx.setDiagnosticHandler(llvm::make_unique( CodeGenOpts, this)); Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); if (CodeGenOpts.DiagnosticsHotnessThreshold != 0) Ctx.setDiagnosticsHotnessThreshold( CodeGenOpts.DiagnosticsHotnessThreshold); std::unique_ptr OptRecordFile; if (!CodeGenOpts.OptRecordFile.empty()) { std::error_code EC; OptRecordFile = llvm::make_unique( CodeGenOpts.OptRecordFile, EC, sys::fs::F_None); if (EC) { Diags.Report(diag::err_cannot_open_file) << CodeGenOpts.OptRecordFile << EC.message(); return; } Ctx.setDiagnosticsOutputFile( llvm::make_unique(OptRecordFile->os())); if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) Ctx.setDiagnosticsHotnessRequested(true); } // Link each LinkModule into our module. if (LinkInModules()) return; EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, C.getTargetInfo().getDataLayout(), getModule(), Action, std::move(AsmOutStream)); Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); if (OptRecordFile) OptRecordFile->keep(); } void HandleTagDeclDefinition(TagDecl *D) override { PrettyStackTraceDecl CrashInfo(D, SourceLocation(), Context->getSourceManager(), "LLVM IR generation of declaration"); Gen->HandleTagDeclDefinition(D); } void HandleTagDeclRequiredDefinition(const TagDecl *D) override { Gen->HandleTagDeclRequiredDefinition(D); } void CompleteTentativeDefinition(VarDecl *D) override { Gen->CompleteTentativeDefinition(D); } void AssignInheritanceModel(CXXRecordDecl *RD) override { Gen->AssignInheritanceModel(RD); } void HandleVTable(CXXRecordDecl *RD) override { Gen->HandleVTable(RD); } static void InlineAsmDiagHandler(const llvm::SMDiagnostic &SM,void *Context, unsigned LocCookie) { SourceLocation Loc = SourceLocation::getFromRawEncoding(LocCookie); ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc); } /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const; void InlineAsmDiagHandler2(const llvm::SMDiagnostic &, SourceLocation LocCookie); void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); /// Specialized handler for InlineAsm diagnostic. /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D); /// Specialized handler for StackSize diagnostic. /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); /// Specialized handler for unsupported backend feature diagnostic. void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); /// Specialized handlers for optimization remarks. /// Note that these handlers only accept remarks and they always handle /// them. void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID); void OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D); void OptimizationRemarkHandler( const llvm::OptimizationRemarkAnalysisFPCommute &D); void OptimizationRemarkHandler( const llvm::OptimizationRemarkAnalysisAliasing &D); void OptimizationFailureHandler( const llvm::DiagnosticInfoOptimizationFailure &D); }; void BackendConsumer::anchor() {} } bool ClangDiagnosticHandler::handleDiagnostics(const DiagnosticInfo &DI) { BackendCon->DiagnosticHandlerImpl(DI); return true; } /// ConvertBackendLocation - Convert a location in a temporary llvm::SourceMgr /// buffer to be a valid FullSourceLoc. static FullSourceLoc ConvertBackendLocation(const llvm::SMDiagnostic &D, SourceManager &CSM) { // Get both the clang and llvm source managers. The location is relative to // a memory buffer that the LLVM Source Manager is handling, we need to add // a copy to the Clang source manager. const llvm::SourceMgr &LSM = *D.getSourceMgr(); // We need to copy the underlying LLVM memory buffer because llvm::SourceMgr // already owns its one and clang::SourceManager wants to own its one. const MemoryBuffer *LBuf = LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(D.getLoc())); // Create the copy and transfer ownership to clang::SourceManager. // TODO: Avoid copying files into memory. std::unique_ptr CBuf = llvm::MemoryBuffer::getMemBufferCopy(LBuf->getBuffer(), LBuf->getBufferIdentifier()); // FIXME: Keep a file ID map instead of creating new IDs for each location. FileID FID = CSM.createFileID(std::move(CBuf)); // Translate the offset into the file. unsigned Offset = D.getLoc().getPointer() - LBuf->getBufferStart(); SourceLocation NewLoc = CSM.getLocForStartOfFile(FID).getLocWithOffset(Offset); return FullSourceLoc(NewLoc, CSM); } /// InlineAsmDiagHandler2 - This function is invoked when the backend hits an /// error parsing inline asm. The SMDiagnostic indicates the error relative to /// the temporary memory buffer that the inline asm parser has set up. void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, SourceLocation LocCookie) { // There are a couple of different kinds of errors we could get here. First, // we re-format the SMDiagnostic in terms of a clang diagnostic. // Strip "error: " off the start of the message string. StringRef Message = D.getMessage(); if (Message.startswith("error: ")) Message = Message.substr(7); // If the SMDiagnostic has an inline asm source location, translate it. FullSourceLoc Loc; if (D.getLoc() != SMLoc()) Loc = ConvertBackendLocation(D, Context->getSourceManager()); unsigned DiagID; switch (D.getKind()) { case llvm::SourceMgr::DK_Error: DiagID = diag::err_fe_inline_asm; break; case llvm::SourceMgr::DK_Warning: DiagID = diag::warn_fe_inline_asm; break; case llvm::SourceMgr::DK_Note: DiagID = diag::note_fe_inline_asm; break; case llvm::SourceMgr::DK_Remark: llvm_unreachable("remarks unexpected"); } // If this problem has clang-level source location information, report the // issue in the source with a note showing the instantiated // code. if (LocCookie.isValid()) { Diags.Report(LocCookie, DiagID).AddString(Message); if (D.getLoc().isValid()) { DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here); // Convert the SMDiagnostic ranges into SourceRange and attach them // to the diagnostic. for (const std::pair &Range : D.getRanges()) { unsigned Column = D.getColumnNo(); B << SourceRange(Loc.getLocWithOffset(Range.first - Column), Loc.getLocWithOffset(Range.second - Column)); } } return; } // Otherwise, report the backend issue as occurring in the generated .s file. // If Loc is invalid, we still need to report the issue, it just gets no // location info. Diags.Report(Loc, DiagID).AddString(Message); } #define ComputeDiagID(Severity, GroupName, DiagID) \ do { \ switch (Severity) { \ case llvm::DS_Error: \ DiagID = diag::err_fe_##GroupName; \ break; \ case llvm::DS_Warning: \ DiagID = diag::warn_fe_##GroupName; \ break; \ case llvm::DS_Remark: \ llvm_unreachable("'remark' severity not expected"); \ break; \ case llvm::DS_Note: \ DiagID = diag::note_fe_##GroupName; \ break; \ } \ } while (false) #define ComputeDiagRemarkID(Severity, GroupName, DiagID) \ do { \ switch (Severity) { \ case llvm::DS_Error: \ DiagID = diag::err_fe_##GroupName; \ break; \ case llvm::DS_Warning: \ DiagID = diag::warn_fe_##GroupName; \ break; \ case llvm::DS_Remark: \ DiagID = diag::remark_fe_##GroupName; \ break; \ case llvm::DS_Note: \ DiagID = diag::note_fe_##GroupName; \ break; \ } \ } while (false) bool BackendConsumer::InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D) { unsigned DiagID; ComputeDiagID(D.getSeverity(), inline_asm, DiagID); std::string Message = D.getMsgStr().str(); // If this problem has clang-level source location information, report the // issue as being a problem in the source with a note showing the instantiated // code. SourceLocation LocCookie = SourceLocation::getFromRawEncoding(D.getLocCookie()); if (LocCookie.isValid()) Diags.Report(LocCookie, DiagID).AddString(Message); else { // Otherwise, report the backend diagnostic as occurring in the generated // .s file. // If Loc is invalid, we still need to report the diagnostic, it just gets // no location info. FullSourceLoc Loc; Diags.Report(Loc, DiagID).AddString(Message); } // We handled all the possible severities. return true; } bool BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { if (D.getSeverity() != llvm::DS_Warning) // For now, the only support we have for StackSize diagnostic is warning. // We do not know how to format other severities. return false; if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) { // FIXME: Shouldn't need to truncate to uint32_t Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()), diag::warn_fe_frame_larger_than) << static_cast(D.getStackSize()) << Decl::castToDeclContext(ND); return true; } return false; } const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( const llvm::DiagnosticInfoWithLocationBase &D, bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const { SourceManager &SourceMgr = Context->getSourceManager(); FileManager &FileMgr = SourceMgr.getFileManager(); SourceLocation DILoc; if (D.isLocationAvailable()) { D.getLocation(&Filename, &Line, &Column); const FileEntry *FE = FileMgr.getFile(Filename); if (FE && Line > 0) { // If -gcolumn-info was not used, Column will be 0. This upsets the // source manager, so pass 1 if Column is not set. DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); } BadDebugInfo = DILoc.isInvalid(); } // If a location isn't available, try to approximate it using the associated // function definition. We use the definition's right brace to differentiate // from diagnostics that genuinely relate to the function itself. FullSourceLoc Loc(DILoc, SourceMgr); if (Loc.isInvalid()) if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName())) Loc = FD->getASTContext().getFullLoc(FD->getLocation()); if (DILoc.isInvalid() && D.isLocationAvailable()) // If we were not able to translate the file:line:col information // back to a SourceLocation, at least emit a note stating that // we could not translate this location. This can happen in the // case of #line directives. Diags.Report(Loc, diag::note_fe_backend_invalid_loc) << Filename << Line << Column; return Loc; } void BackendConsumer::UnsupportedDiagHandler( const llvm::DiagnosticInfoUnsupported &D) { // We only support errors. assert(D.getSeverity() == llvm::DS_Error); StringRef Filename; unsigned Line, Column; bool BadDebugInfo = false; FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str(); if (BadDebugInfo) // If we were not able to translate the file:line:col information // back to a SourceLocation, at least emit a note stating that // we could not translate this location. This can happen in the // case of #line directives. Diags.Report(Loc, diag::note_fe_backend_invalid_loc) << Filename << Line << Column; } void BackendConsumer::EmitOptimizationMessage( const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) { // We only support warnings and remarks. assert(D.getSeverity() == llvm::DS_Remark || D.getSeverity() == llvm::DS_Warning); StringRef Filename; unsigned Line, Column; bool BadDebugInfo = false; FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); std::string Msg; raw_string_ostream MsgStream(Msg); MsgStream << D.getMsg(); if (D.getHotness()) MsgStream << " (hotness: " << *D.getHotness() << ")"; Diags.Report(Loc, DiagID) << AddFlagValue(D.getPassName()) << MsgStream.str(); if (BadDebugInfo) // If we were not able to translate the file:line:col information // back to a SourceLocation, at least emit a note stating that // we could not translate this location. This can happen in the // case of #line directives. Diags.Report(Loc, diag::note_fe_backend_invalid_loc) << Filename << Line << Column; } void BackendConsumer::OptimizationRemarkHandler( const llvm::DiagnosticInfoOptimizationBase &D) { // Without hotness information, don't show noisy remarks. if (D.isVerbose() && !D.getHotness()) return; if (D.isPassed()) { // Optimization remarks are active only if the -Rpass flag has a regular // expression that matches the name of the pass name in \p D. if (CodeGenOpts.OptimizationRemarkPattern && CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName())) EmitOptimizationMessage(D, diag::remark_fe_backend_optimization_remark); } else if (D.isMissed()) { // Missed optimization remarks are active only if the -Rpass-missed // flag has a regular expression that matches the name of the pass // name in \p D. if (CodeGenOpts.OptimizationRemarkMissedPattern && CodeGenOpts.OptimizationRemarkMissedPattern->match(D.getPassName())) EmitOptimizationMessage( D, diag::remark_fe_backend_optimization_remark_missed); } else { assert(D.isAnalysis() && "Unknown remark type"); bool ShouldAlwaysPrint = false; if (auto *ORA = dyn_cast(&D)) ShouldAlwaysPrint = ORA->shouldAlwaysPrint(); if (ShouldAlwaysPrint || (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) EmitOptimizationMessage( D, diag::remark_fe_backend_optimization_remark_analysis); } } void BackendConsumer::OptimizationRemarkHandler( const llvm::OptimizationRemarkAnalysisFPCommute &D) { // Optimization analysis remarks are active if the pass name is set to // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a // regular expression that matches the name of the pass name in \p D. if (D.shouldAlwaysPrint() || (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) EmitOptimizationMessage( D, diag::remark_fe_backend_optimization_remark_analysis_fpcommute); } void BackendConsumer::OptimizationRemarkHandler( const llvm::OptimizationRemarkAnalysisAliasing &D) { // Optimization analysis remarks are active if the pass name is set to // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a // regular expression that matches the name of the pass name in \p D. if (D.shouldAlwaysPrint() || (CodeGenOpts.OptimizationRemarkAnalysisPattern && CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))) EmitOptimizationMessage( D, diag::remark_fe_backend_optimization_remark_analysis_aliasing); } void BackendConsumer::OptimizationFailureHandler( const llvm::DiagnosticInfoOptimizationFailure &D) { EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure); } /// This function is invoked when the backend needs /// to report something to the user. void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { unsigned DiagID = diag::err_fe_inline_asm; llvm::DiagnosticSeverity Severity = DI.getSeverity(); // Get the diagnostic ID based. switch (DI.getKind()) { case llvm::DK_InlineAsm: if (InlineAsmDiagHandler(cast(DI))) return; ComputeDiagID(Severity, inline_asm, DiagID); break; case llvm::DK_StackSize: if (StackSizeDiagHandler(cast(DI))) return; ComputeDiagID(Severity, backend_frame_larger_than, DiagID); break; case DK_Linker: assert(CurLinkModule); // FIXME: stop eating the warnings and notes. if (Severity != DS_Error) return; DiagID = diag::err_fe_cannot_link_module; break; case llvm::DK_OptimizationRemark: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_OptimizationRemarkMissed: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_OptimizationRemarkAnalysis: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_OptimizationRemarkAnalysisFPCommute: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_OptimizationRemarkAnalysisAliasing: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_MachineOptimizationRemark: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_MachineOptimizationRemarkMissed: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_MachineOptimizationRemarkAnalysis: // Optimization remarks are always handled completely by this // handler. There is no generic way of emitting them. OptimizationRemarkHandler(cast(DI)); return; case llvm::DK_OptimizationFailure: // Optimization failures are always handled completely by this // handler. OptimizationFailureHandler(cast(DI)); return; case llvm::DK_Unsupported: UnsupportedDiagHandler(cast(DI)); return; default: // Plugin IDs are not bound to any value as they are set dynamically. ComputeDiagRemarkID(Severity, backend_plugin, DiagID); break; } std::string MsgStorage; { raw_string_ostream Stream(MsgStorage); DiagnosticPrinterRawOStream DP(Stream); DI.print(DP); } if (DiagID == diag::err_fe_cannot_link_module) { Diags.Report(diag::err_fe_cannot_link_module) << CurLinkModule->getModuleIdentifier() << MsgStorage; return; } // Report the backend message using the usual diagnostic mechanism. FullSourceLoc Loc; Diags.Report(Loc, DiagID).AddString(MsgStorage); } #undef ComputeDiagID CodeGenAction::CodeGenAction(unsigned _Act, LLVMContext *_VMContext) : Act(_Act), VMContext(_VMContext ? _VMContext : new LLVMContext), OwnsVMContext(!_VMContext) {} CodeGenAction::~CodeGenAction() { TheModule.reset(); if (OwnsVMContext) delete VMContext; } bool CodeGenAction::hasIRSupport() const { return true; } void CodeGenAction::EndSourceFileAction() { // If the consumer creation failed, do nothing. if (!getCompilerInstance().hasASTConsumer()) return; // Steal the module from the consumer. TheModule = BEConsumer->takeModule(); } std::unique_ptr CodeGenAction::takeModule() { return std::move(TheModule); } llvm::LLVMContext *CodeGenAction::takeLLVMContext() { OwnsVMContext = false; return VMContext; } static std::unique_ptr GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) { switch (Action) { case Backend_EmitAssembly: return CI.createDefaultOutputFile(false, InFile, "s"); case Backend_EmitLL: return CI.createDefaultOutputFile(false, InFile, "ll"); case Backend_EmitBC: return CI.createDefaultOutputFile(true, InFile, "bc"); case Backend_EmitNothing: return nullptr; case Backend_EmitMCNull: return CI.createNullOutputFile(); case Backend_EmitObj: return CI.createDefaultOutputFile(true, InFile, "o"); } llvm_unreachable("Invalid action!"); } std::unique_ptr CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { BackendAction BA = static_cast(Act); std::unique_ptr OS = CI.takeOutputStream(); if (!OS) OS = GetOutputStream(CI, InFile, BA); if (BA != Backend_EmitNothing && !OS) return nullptr; // Load bitcode modules to link with, if we need to. if (LinkModules.empty()) for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); if (!BCBuf) { CI.getDiagnostics().Report(diag::err_cannot_open_file) << F.Filename << BCBuf.getError().message(); LinkModules.clear(); return nullptr; } Expected> ModuleOrErr = getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); if (!ModuleOrErr) { handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { CI.getDiagnostics().Report(diag::err_cannot_open_file) << F.Filename << EIB.message(); }); LinkModules.clear(); return nullptr; } LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, F.Internalize, F.LinkFlags}); } CoverageSourceInfo *CoverageInfo = nullptr; // Add the preprocessor callback only when the coverage mapping is generated. if (CI.getCodeGenOpts().CoverageMapping) { CoverageInfo = new CoverageSourceInfo; CI.getPreprocessor().addPPCallbacks( std::unique_ptr(CoverageInfo)); } std::unique_ptr Result(new BackendConsumer( BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); BEConsumer = Result.get(); // Enable generating macro debug info only when debug info is not disabled and // also macro debug info is enabled. if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && CI.getCodeGenOpts().MacroDebugInfo) { std::unique_ptr Callbacks = llvm::make_unique(BEConsumer->getCodeGenerator(), CI.getPreprocessor()); CI.getPreprocessor().addPPCallbacks(std::move(Callbacks)); } return std::move(Result); } static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, void *Context, unsigned LocCookie) { SM.print(nullptr, llvm::errs()); auto Diags = static_cast(Context); unsigned DiagID; switch (SM.getKind()) { case llvm::SourceMgr::DK_Error: DiagID = diag::err_fe_inline_asm; break; case llvm::SourceMgr::DK_Warning: DiagID = diag::warn_fe_inline_asm; break; case llvm::SourceMgr::DK_Note: DiagID = diag::note_fe_inline_asm; break; case llvm::SourceMgr::DK_Remark: llvm_unreachable("remarks unexpected"); } Diags->Report(DiagID).AddString("cannot compile inline asm"); } std::unique_ptr CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); // For ThinLTO backend invocations, ensure that the context // merges types based on ODR identifiers. We also need to read // the correct module out of a multi-module bitcode file. if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) { VMContext->enableDebugTypeODRUniquing(); auto DiagErrors = [&](Error E) -> std::unique_ptr { unsigned DiagID = CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { CI.getDiagnostics().Report(DiagID) << EIB.message(); }); return {}; }; Expected> BMsOrErr = getBitcodeModuleList(MBRef); if (!BMsOrErr) return DiagErrors(BMsOrErr.takeError()); BitcodeModule *Bm = FindThinLTOModule(*BMsOrErr); // We have nothing to do if the file contains no ThinLTO module. This is // possible if ThinLTO compilation was not able to split module. Content of // the file was already processed by indexing and will be passed to the // linker using merged object file. if (!Bm) { auto M = llvm::make_unique("empty", *VMContext); M->setTargetTriple(CI.getTargetOpts().Triple); return M; } Expected> MOrErr = Bm->parseModule(*VMContext); if (!MOrErr) return DiagErrors(MOrErr.takeError()); return std::move(*MOrErr); } llvm::SMDiagnostic Err; if (std::unique_ptr M = parseIR(MBRef, Err, *VMContext)) return M; // Translate from the diagnostic info to the SourceManager location if // available. // TODO: Unify this with ConvertBackendLocation() SourceLocation Loc; if (Err.getLineNo() > 0) { assert(Err.getColumnNo() >= 0); Loc = SM.translateFileLineCol(SM.getFileEntryForID(SM.getMainFileID()), Err.getLineNo(), Err.getColumnNo() + 1); } // Strip off a leading diagnostic code if there is one. StringRef Msg = Err.getMessage(); if (Msg.startswith("error: ")) Msg = Msg.substr(7); unsigned DiagID = CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); CI.getDiagnostics().Report(Loc, DiagID) << Msg; return {}; } void CodeGenAction::ExecuteAction() { // If this is an IR file, we have to treat it specially. if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) { BackendAction BA = static_cast(Act); CompilerInstance &CI = getCompilerInstance(); std::unique_ptr OS = GetOutputStream(CI, getCurrentFile(), BA); if (BA != Backend_EmitNothing && !OS) return; bool Invalid; SourceManager &SM = CI.getSourceManager(); FileID FID = SM.getMainFileID(); llvm::MemoryBuffer *MainFile = SM.getBuffer(FID, &Invalid); if (Invalid) return; TheModule = loadModule(*MainFile); if (!TheModule) return; const TargetOptions &TargetOpts = CI.getTargetOpts(); if (TheModule->getTargetTriple() != TargetOpts.Triple) { CI.getDiagnostics().Report(SourceLocation(), diag::warn_fe_override_module) << TargetOpts.Triple; TheModule->setTargetTriple(TargetOpts.Triple); } EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(), MainFile->getMemBufferRef()); LLVMContext &Ctx = TheModule->getContext(); Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler, &CI.getDiagnostics()); EmitBackendOutput(CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getCodeGenOpts(), TargetOpts, CI.getLangOpts(), CI.getTarget().getDataLayout(), TheModule.get(), BA, std::move(OS)); return; } // Otherwise follow the normal AST path. this->ASTFrontendAction::ExecuteAction(); } // void EmitAssemblyAction::anchor() { } EmitAssemblyAction::EmitAssemblyAction(llvm::LLVMContext *_VMContext) : CodeGenAction(Backend_EmitAssembly, _VMContext) {} void EmitBCAction::anchor() { } EmitBCAction::EmitBCAction(llvm::LLVMContext *_VMContext) : CodeGenAction(Backend_EmitBC, _VMContext) {} void EmitLLVMAction::anchor() { } EmitLLVMAction::EmitLLVMAction(llvm::LLVMContext *_VMContext) : CodeGenAction(Backend_EmitLL, _VMContext) {} void EmitLLVMOnlyAction::anchor() { } EmitLLVMOnlyAction::EmitLLVMOnlyAction(llvm::LLVMContext *_VMContext) : CodeGenAction(Backend_EmitNothing, _VMContext) {} void EmitCodeGenOnlyAction::anchor() { } EmitCodeGenOnlyAction::EmitCodeGenOnlyAction(llvm::LLVMContext *_VMContext) : CodeGenAction(Backend_EmitMCNull, _VMContext) {} void EmitObjAction::anchor() { } EmitObjAction::EmitObjAction(llvm::LLVMContext *_VMContext) : CodeGenAction(Backend_EmitObj, _VMContext) {} Index: projects/clang700-import/contrib/llvm/tools/clang/lib/Headers/unwind.h =================================================================== --- projects/clang700-import/contrib/llvm/tools/clang/lib/Headers/unwind.h (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/clang/lib/Headers/unwind.h (revision 337645) @@ -1,337 +1,341 @@ /*===---- unwind.h - Stack unwinding ----------------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * *===-----------------------------------------------------------------------=== */ /* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/ #ifndef __CLANG_UNWIND_H #define __CLANG_UNWIND_H #if defined(__APPLE__) && __has_include_next() /* Darwin (from 11.x on) provide an unwind.h. If that's available, * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * so define that around the include.*/ # ifndef _GNU_SOURCE # define _SHOULD_UNDEFINE_GNU_SOURCE # define _GNU_SOURCE # endif // libunwind's unwind.h reflects the current visibility. However, Mozilla // builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the // visibility to default and export its contents. gcc also allows users to // override its override by #defining HIDE_EXPORTS (but note, this only obeys // the user's -fvisibility setting; it doesn't hide any exports on its own). We // imitate gcc's header here: # ifdef HIDE_EXPORTS # include_next # else # pragma GCC visibility push(default) # include_next # pragma GCC visibility pop # endif # ifdef _SHOULD_UNDEFINE_GNU_SOURCE # undef _GNU_SOURCE # undef _SHOULD_UNDEFINE_GNU_SOURCE # endif #else #include #ifdef __cplusplus extern "C" { #endif /* It is a bit strange for a header to play with the visibility of the symbols it declares, but this matches gcc's behavior and some programs depend on it */ #ifndef HIDE_EXPORTS #pragma GCC visibility push(default) #endif typedef uintptr_t _Unwind_Word; typedef intptr_t _Unwind_Sword; typedef uintptr_t _Unwind_Ptr; typedef uintptr_t _Unwind_Internal_Ptr; typedef uint64_t _Unwind_Exception_Class; typedef intptr_t _sleb128_t; typedef uintptr_t _uleb128_t; struct _Unwind_Context; #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__)) struct _Unwind_Control_Block; typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */ #else struct _Unwind_Exception; typedef struct _Unwind_Exception _Unwind_Exception; #endif typedef enum { _URC_NO_REASON = 0, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ !defined(__ARM_DWARF_EH__) _URC_OK = 0, /* used by ARM EHABI */ #endif _URC_FOREIGN_EXCEPTION_CAUGHT = 1, _URC_FATAL_PHASE2_ERROR = 2, _URC_FATAL_PHASE1_ERROR = 3, _URC_NORMAL_STOP = 4, _URC_END_OF_STACK = 5, _URC_HANDLER_FOUND = 6, _URC_INSTALL_CONTEXT = 7, _URC_CONTINUE_UNWIND = 8, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ !defined(__ARM_DWARF_EH__) _URC_FAILURE = 9 /* used by ARM EHABI */ #endif } _Unwind_Reason_Code; typedef enum { _UA_SEARCH_PHASE = 1, _UA_CLEANUP_PHASE = 2, _UA_HANDLER_FRAME = 4, _UA_FORCE_UNWIND = 8, _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */ } _Unwind_Action; typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, _Unwind_Exception *); #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__)) typedef struct _Unwind_Control_Block _Unwind_Control_Block; typedef uint32_t _Unwind_EHT_Header; struct _Unwind_Control_Block { uint64_t exception_class; void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *); /* unwinder cache (private fields for the unwinder's use) */ struct { uint32_t reserved1; /* forced unwind stop function, 0 if not forced */ uint32_t reserved2; /* personality routine */ uint32_t reserved3; /* callsite */ uint32_t reserved4; /* forced unwind stop argument */ uint32_t reserved5; } unwinder_cache; /* propagation barrier cache (valid after phase 1) */ struct { uint32_t sp; uint32_t bitpattern[5]; } barrier_cache; /* cleanup cache (preserved over cleanup) */ struct { uint32_t bitpattern[4]; } cleanup_cache; /* personality cache (for personality's benefit) */ struct { uint32_t fnstart; /* function start address */ _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */ uint32_t additional; /* additional data */ uint32_t reserved1; } pr_cache; long long int : 0; /* force alignment of next item to 8-byte boundary */ } __attribute__((__aligned__(8))); #else struct _Unwind_Exception { _Unwind_Exception_Class exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; +#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__) + _Unwind_Word private_[6]; +#else _Unwind_Word private_1; _Unwind_Word private_2; +#endif /* The Itanium ABI requires that _Unwind_Exception objects are "double-word * aligned". GCC has interpreted this to mean "use the maximum useful * alignment for the target"; so do we. */ } __attribute__((__aligned__)); #endif typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, _Unwind_Exception *, struct _Unwind_Context *, void *); typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, _Unwind_Exception *, struct _Unwind_Context *); typedef _Unwind_Personality_Fn __personality_routine; typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, void *); #if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__)) typedef enum { _UVRSC_CORE = 0, /* integer register */ _UVRSC_VFP = 1, /* vfp */ _UVRSC_WMMXD = 3, /* Intel WMMX data register */ _UVRSC_WMMXC = 4 /* Intel WMMX control register */ } _Unwind_VRS_RegClass; typedef enum { _UVRSD_UINT32 = 0, _UVRSD_VFPX = 1, _UVRSD_UINT64 = 3, _UVRSD_FLOAT = 4, _UVRSD_DOUBLE = 5 } _Unwind_VRS_DataRepresentation; typedef enum { _UVRSR_OK = 0, _UVRSR_NOT_IMPLEMENTED = 1, _UVRSR_FAILED = 2 } _Unwind_VRS_Result; typedef uint32_t _Unwind_State; #define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0) #define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1) #define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2) #define _US_ACTION_MASK ((_Unwind_State)3) #define _US_FORCE_UNWIND ((_Unwind_State)8) _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, uint32_t __regno, _Unwind_VRS_DataRepresentation __representation, void *__valuep); _Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, uint32_t __regno, _Unwind_VRS_DataRepresentation __representation, void *__valuep); static __inline__ _Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) { _Unwind_Word __value; _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); return __value; } static __inline__ void _Unwind_SetGR(struct _Unwind_Context *__context, int __index, _Unwind_Word __value) { _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); } static __inline__ _Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) { _Unwind_Word __ip = _Unwind_GetGR(__context, 15); return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */ } static __inline__ void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) { _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1; _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit); } #else _Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int); void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word); _Unwind_Word _Unwind_GetIP(struct _Unwind_Context *); void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word); #endif _Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); _Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); _Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *); void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); /* DWARF EH functions; currently not available on Darwin/ARM */ #if !defined(__APPLE__) || !defined(__arm__) _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, void *); void _Unwind_DeleteException(_Unwind_Exception *); void _Unwind_Resume(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *); #endif _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); /* setjmp(3)/longjmp(3) stuff */ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); _Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, void *); void _Unwind_SjLj_Resume(_Unwind_Exception *); _Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *); void *_Unwind_FindEnclosingFunction(void *); #ifdef __APPLE__ _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) __attribute__((__unavailable__)); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) __attribute__((__unavailable__)); /* Darwin-specific functions */ void __register_frame(const void *); void __deregister_frame(const void *); struct dwarf_eh_bases { uintptr_t tbase; uintptr_t dbase; uintptr_t func; }; void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); void __register_frame_info_bases(const void *, void *, void *, void *) __attribute__((__unavailable__)); void __register_frame_info(const void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table_bases(const void *, void*, void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table(const void *, void *) __attribute__((__unavailable__)); void __register_frame_table(const void *) __attribute__((__unavailable__)); void __deregister_frame_info(const void *) __attribute__((__unavailable__)); void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__)); #else _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *); #endif #ifndef HIDE_EXPORTS #pragma GCC visibility pop #endif #ifdef __cplusplus } #endif #endif #endif /* __CLANG_UNWIND_H */ Index: projects/clang700-import/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp (revision 337645) @@ -1,7916 +1,7922 @@ //===--- SemaExprCXX.cpp - Semantic Analysis for Expressions --------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// Implements semantic analysis for C++ expressions. /// //===----------------------------------------------------------------------===// #include "clang/Sema/SemaInternal.h" #include "TreeTransform.h" #include "TypeLocBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/CharUnits.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/AlignedAllocation.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaLambda.h" #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" using namespace clang; using namespace sema; /// Handle the result of the special case name lookup for inheriting /// constructor declarations. 'NS::X::X' and 'NS::X<...>::X' are treated as /// constructor names in member using declarations, even if 'X' is not the /// name of the corresponding type. ParsedType Sema::getInheritingConstructorName(CXXScopeSpec &SS, SourceLocation NameLoc, IdentifierInfo &Name) { NestedNameSpecifier *NNS = SS.getScopeRep(); // Convert the nested-name-specifier into a type. QualType Type; switch (NNS->getKind()) { case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: Type = QualType(NNS->getAsType(), 0); break; case NestedNameSpecifier::Identifier: // Strip off the last layer of the nested-name-specifier and build a // typename type for it. assert(NNS->getAsIdentifier() == &Name && "not a constructor name"); Type = Context.getDependentNameType(ETK_None, NNS->getPrefix(), NNS->getAsIdentifier()); break; case NestedNameSpecifier::Global: case NestedNameSpecifier::Super: case NestedNameSpecifier::Namespace: case NestedNameSpecifier::NamespaceAlias: llvm_unreachable("Nested name specifier is not a type for inheriting ctor"); } // This reference to the type is located entirely at the location of the // final identifier in the qualified-id. return CreateParsedType(Type, Context.getTrivialTypeSourceInfo(Type, NameLoc)); } ParsedType Sema::getConstructorName(IdentifierInfo &II, SourceLocation NameLoc, Scope *S, CXXScopeSpec &SS, bool EnteringContext) { CXXRecordDecl *CurClass = getCurrentClass(S, &SS); assert(CurClass && &II == CurClass->getIdentifier() && "not a constructor name"); // When naming a constructor as a member of a dependent context (eg, in a // friend declaration or an inherited constructor declaration), form an // unresolved "typename" type. if (CurClass->isDependentContext() && !EnteringContext) { QualType T = Context.getDependentNameType(ETK_None, SS.getScopeRep(), &II); return ParsedType::make(T); } if (SS.isNotEmpty() && RequireCompleteDeclContext(SS, CurClass)) return ParsedType(); // Find the injected-class-name declaration. Note that we make no attempt to // diagnose cases where the injected-class-name is shadowed: the only // declaration that can validly shadow the injected-class-name is a // non-static data member, and if the class contains both a non-static data // member and a constructor then it is ill-formed (we check that in // CheckCompletedCXXClass). CXXRecordDecl *InjectedClassName = nullptr; for (NamedDecl *ND : CurClass->lookup(&II)) { auto *RD = dyn_cast(ND); if (RD && RD->isInjectedClassName()) { InjectedClassName = RD; break; } } - if (!InjectedClassName && CurClass->isInvalidDecl()) + if (!InjectedClassName) { + if (!CurClass->isInvalidDecl()) { + // FIXME: RequireCompleteDeclContext doesn't check dependent contexts + // properly. Work around it here for now. + Diag(SS.getLastQualifierNameLoc(), + diag::err_incomplete_nested_name_spec) << CurClass << SS.getRange(); + } return ParsedType(); - assert(InjectedClassName && "couldn't find injected class name"); + } QualType T = Context.getTypeDeclType(InjectedClassName); DiagnoseUseOfDecl(InjectedClassName, NameLoc); MarkAnyDeclReferenced(NameLoc, InjectedClassName, /*OdrUse=*/false); return ParsedType::make(T); } ParsedType Sema::getDestructorName(SourceLocation TildeLoc, IdentifierInfo &II, SourceLocation NameLoc, Scope *S, CXXScopeSpec &SS, ParsedType ObjectTypePtr, bool EnteringContext) { // Determine where to perform name lookup. // FIXME: This area of the standard is very messy, and the current // wording is rather unclear about which scopes we search for the // destructor name; see core issues 399 and 555. Issue 399 in // particular shows where the current description of destructor name // lookup is completely out of line with existing practice, e.g., // this appears to be ill-formed: // // namespace N { // template struct S { // ~S(); // }; // } // // void f(N::S* s) { // s->N::S::~S(); // } // // See also PR6358 and PR6359. // For this reason, we're currently only doing the C++03 version of this // code; the C++0x version has to wait until we get a proper spec. QualType SearchType; DeclContext *LookupCtx = nullptr; bool isDependent = false; bool LookInScope = false; if (SS.isInvalid()) return nullptr; // If we have an object type, it's because we are in a // pseudo-destructor-expression or a member access expression, and // we know what type we're looking for. if (ObjectTypePtr) SearchType = GetTypeFromParser(ObjectTypePtr); if (SS.isSet()) { NestedNameSpecifier *NNS = SS.getScopeRep(); bool AlreadySearched = false; bool LookAtPrefix = true; // C++11 [basic.lookup.qual]p6: // If a pseudo-destructor-name (5.2.4) contains a nested-name-specifier, // the type-names are looked up as types in the scope designated by the // nested-name-specifier. Similarly, in a qualified-id of the form: // // nested-name-specifier[opt] class-name :: ~ class-name // // the second class-name is looked up in the same scope as the first. // // Here, we determine whether the code below is permitted to look at the // prefix of the nested-name-specifier. DeclContext *DC = computeDeclContext(SS, EnteringContext); if (DC && DC->isFileContext()) { AlreadySearched = true; LookupCtx = DC; isDependent = false; } else if (DC && isa(DC)) { LookAtPrefix = false; LookInScope = true; } // The second case from the C++03 rules quoted further above. NestedNameSpecifier *Prefix = nullptr; if (AlreadySearched) { // Nothing left to do. } else if (LookAtPrefix && (Prefix = NNS->getPrefix())) { CXXScopeSpec PrefixSS; PrefixSS.Adopt(NestedNameSpecifierLoc(Prefix, SS.location_data())); LookupCtx = computeDeclContext(PrefixSS, EnteringContext); isDependent = isDependentScopeSpecifier(PrefixSS); } else if (ObjectTypePtr) { LookupCtx = computeDeclContext(SearchType); isDependent = SearchType->isDependentType(); } else { LookupCtx = computeDeclContext(SS, EnteringContext); isDependent = LookupCtx && LookupCtx->isDependentContext(); } } else if (ObjectTypePtr) { // C++ [basic.lookup.classref]p3: // If the unqualified-id is ~type-name, the type-name is looked up // in the context of the entire postfix-expression. If the type T // of the object expression is of a class type C, the type-name is // also looked up in the scope of class C. At least one of the // lookups shall find a name that refers to (possibly // cv-qualified) T. LookupCtx = computeDeclContext(SearchType); isDependent = SearchType->isDependentType(); assert((isDependent || !SearchType->isIncompleteType()) && "Caller should have completed object type"); LookInScope = true; } else { // Perform lookup into the current scope (only). LookInScope = true; } TypeDecl *NonMatchingTypeDecl = nullptr; LookupResult Found(*this, &II, NameLoc, LookupOrdinaryName); for (unsigned Step = 0; Step != 2; ++Step) { // Look for the name first in the computed lookup context (if we // have one) and, if that fails to find a match, in the scope (if // we're allowed to look there). Found.clear(); if (Step == 0 && LookupCtx) { if (RequireCompleteDeclContext(SS, LookupCtx)) return nullptr; LookupQualifiedName(Found, LookupCtx); } else if (Step == 1 && LookInScope && S) { LookupName(Found, S); } else { continue; } // FIXME: Should we be suppressing ambiguities here? if (Found.isAmbiguous()) return nullptr; if (TypeDecl *Type = Found.getAsSingle()) { QualType T = Context.getTypeDeclType(Type); MarkAnyDeclReferenced(Type->getLocation(), Type, /*OdrUse=*/false); if (SearchType.isNull() || SearchType->isDependentType() || Context.hasSameUnqualifiedType(T, SearchType)) { // We found our type! return CreateParsedType(T, Context.getTrivialTypeSourceInfo(T, NameLoc)); } if (!SearchType.isNull()) NonMatchingTypeDecl = Type; } // If the name that we found is a class template name, and it is // the same name as the template name in the last part of the // nested-name-specifier (if present) or the object type, then // this is the destructor for that class. // FIXME: This is a workaround until we get real drafting for core // issue 399, for which there isn't even an obvious direction. if (ClassTemplateDecl *Template = Found.getAsSingle()) { QualType MemberOfType; if (SS.isSet()) { if (DeclContext *Ctx = computeDeclContext(SS, EnteringContext)) { // Figure out the type of the context, if it has one. if (CXXRecordDecl *Record = dyn_cast(Ctx)) MemberOfType = Context.getTypeDeclType(Record); } } if (MemberOfType.isNull()) MemberOfType = SearchType; if (MemberOfType.isNull()) continue; // We're referring into a class template specialization. If the // class template we found is the same as the template being // specialized, we found what we are looking for. if (const RecordType *Record = MemberOfType->getAs()) { if (ClassTemplateSpecializationDecl *Spec = dyn_cast(Record->getDecl())) { if (Spec->getSpecializedTemplate()->getCanonicalDecl() == Template->getCanonicalDecl()) return CreateParsedType( MemberOfType, Context.getTrivialTypeSourceInfo(MemberOfType, NameLoc)); } continue; } // We're referring to an unresolved class template // specialization. Determine whether we class template we found // is the same as the template being specialized or, if we don't // know which template is being specialized, that it at least // has the same name. if (const TemplateSpecializationType *SpecType = MemberOfType->getAs()) { TemplateName SpecName = SpecType->getTemplateName(); // The class template we found is the same template being // specialized. if (TemplateDecl *SpecTemplate = SpecName.getAsTemplateDecl()) { if (SpecTemplate->getCanonicalDecl() == Template->getCanonicalDecl()) return CreateParsedType( MemberOfType, Context.getTrivialTypeSourceInfo(MemberOfType, NameLoc)); continue; } // The class template we found has the same name as the // (dependent) template name being specialized. if (DependentTemplateName *DepTemplate = SpecName.getAsDependentTemplateName()) { if (DepTemplate->isIdentifier() && DepTemplate->getIdentifier() == Template->getIdentifier()) return CreateParsedType( MemberOfType, Context.getTrivialTypeSourceInfo(MemberOfType, NameLoc)); continue; } } } } if (isDependent) { // We didn't find our type, but that's okay: it's dependent // anyway. // FIXME: What if we have no nested-name-specifier? QualType T = CheckTypenameType(ETK_None, SourceLocation(), SS.getWithLocInContext(Context), II, NameLoc); return ParsedType::make(T); } if (NonMatchingTypeDecl) { QualType T = Context.getTypeDeclType(NonMatchingTypeDecl); Diag(NameLoc, diag::err_destructor_expr_type_mismatch) << T << SearchType; Diag(NonMatchingTypeDecl->getLocation(), diag::note_destructor_type_here) << T; } else if (ObjectTypePtr) Diag(NameLoc, diag::err_ident_in_dtor_not_a_type) << &II; else { SemaDiagnosticBuilder DtorDiag = Diag(NameLoc, diag::err_destructor_class_name); if (S) { const DeclContext *Ctx = S->getEntity(); if (const CXXRecordDecl *Class = dyn_cast_or_null(Ctx)) DtorDiag << FixItHint::CreateReplacement(SourceRange(NameLoc), Class->getNameAsString()); } } return nullptr; } ParsedType Sema::getDestructorTypeForDecltype(const DeclSpec &DS, ParsedType ObjectType) { if (DS.getTypeSpecType() == DeclSpec::TST_error) return nullptr; if (DS.getTypeSpecType() == DeclSpec::TST_decltype_auto) { Diag(DS.getTypeSpecTypeLoc(), diag::err_decltype_auto_invalid); return nullptr; } assert(DS.getTypeSpecType() == DeclSpec::TST_decltype && "unexpected type in getDestructorType"); QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc()); // If we know the type of the object, check that the correct destructor // type was named now; we can give better diagnostics this way. QualType SearchType = GetTypeFromParser(ObjectType); if (!SearchType.isNull() && !SearchType->isDependentType() && !Context.hasSameUnqualifiedType(T, SearchType)) { Diag(DS.getTypeSpecTypeLoc(), diag::err_destructor_expr_type_mismatch) << T << SearchType; return nullptr; } return ParsedType::make(T); } bool Sema::checkLiteralOperatorId(const CXXScopeSpec &SS, const UnqualifiedId &Name) { assert(Name.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId); if (!SS.isValid()) return false; switch (SS.getScopeRep()->getKind()) { case NestedNameSpecifier::Identifier: case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: // Per C++11 [over.literal]p2, literal operators can only be declared at // namespace scope. Therefore, this unqualified-id cannot name anything. // Reject it early, because we have no AST representation for this in the // case where the scope is dependent. Diag(Name.getLocStart(), diag::err_literal_operator_id_outside_namespace) << SS.getScopeRep(); return true; case NestedNameSpecifier::Global: case NestedNameSpecifier::Super: case NestedNameSpecifier::Namespace: case NestedNameSpecifier::NamespaceAlias: return false; } llvm_unreachable("unknown nested name specifier kind"); } /// Build a C++ typeid expression with a type operand. ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType, SourceLocation TypeidLoc, TypeSourceInfo *Operand, SourceLocation RParenLoc) { // C++ [expr.typeid]p4: // The top-level cv-qualifiers of the lvalue expression or the type-id // that is the operand of typeid are always ignored. // If the type of the type-id is a class type or a reference to a class // type, the class shall be completely-defined. Qualifiers Quals; QualType T = Context.getUnqualifiedArrayType(Operand->getType().getNonReferenceType(), Quals); if (T->getAs() && RequireCompleteType(TypeidLoc, T, diag::err_incomplete_typeid)) return ExprError(); if (T->isVariablyModifiedType()) return ExprError(Diag(TypeidLoc, diag::err_variably_modified_typeid) << T); return new (Context) CXXTypeidExpr(TypeInfoType.withConst(), Operand, SourceRange(TypeidLoc, RParenLoc)); } /// Build a C++ typeid expression with an expression operand. ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType, SourceLocation TypeidLoc, Expr *E, SourceLocation RParenLoc) { bool WasEvaluated = false; if (E && !E->isTypeDependent()) { if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } QualType T = E->getType(); if (const RecordType *RecordT = T->getAs()) { CXXRecordDecl *RecordD = cast(RecordT->getDecl()); // C++ [expr.typeid]p3: // [...] If the type of the expression is a class type, the class // shall be completely-defined. if (RequireCompleteType(TypeidLoc, T, diag::err_incomplete_typeid)) return ExprError(); // C++ [expr.typeid]p3: // When typeid is applied to an expression other than an glvalue of a // polymorphic class type [...] [the] expression is an unevaluated // operand. [...] if (RecordD->isPolymorphic() && E->isGLValue()) { // The subexpression is potentially evaluated; switch the context // and recheck the subexpression. ExprResult Result = TransformToPotentiallyEvaluated(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); // We require a vtable to query the type at run time. MarkVTableUsed(TypeidLoc, RecordD); WasEvaluated = true; } } // C++ [expr.typeid]p4: // [...] If the type of the type-id is a reference to a possibly // cv-qualified type, the result of the typeid expression refers to a // std::type_info object representing the cv-unqualified referenced // type. Qualifiers Quals; QualType UnqualT = Context.getUnqualifiedArrayType(T, Quals); if (!Context.hasSameType(T, UnqualT)) { T = UnqualT; E = ImpCastExprToType(E, UnqualT, CK_NoOp, E->getValueKind()).get(); } } if (E->getType()->isVariablyModifiedType()) return ExprError(Diag(TypeidLoc, diag::err_variably_modified_typeid) << E->getType()); else if (!inTemplateInstantiation() && E->HasSideEffects(Context, WasEvaluated)) { // The expression operand for typeid is in an unevaluated expression // context, so side effects could result in unintended consequences. Diag(E->getExprLoc(), WasEvaluated ? diag::warn_side_effects_typeid : diag::warn_side_effects_unevaluated_context); } return new (Context) CXXTypeidExpr(TypeInfoType.withConst(), E, SourceRange(TypeidLoc, RParenLoc)); } /// ActOnCXXTypeidOfType - Parse typeid( type-id ) or typeid (expression); ExprResult Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, bool isType, void *TyOrExpr, SourceLocation RParenLoc) { // OpenCL C++ 1.0 s2.9: typeid is not supported. if (getLangOpts().OpenCLCPlusPlus) { return ExprError(Diag(OpLoc, diag::err_openclcxx_not_supported) << "typeid"); } // Find the std::type_info type. if (!getStdNamespace()) return ExprError(Diag(OpLoc, diag::err_need_header_before_typeid)); if (!CXXTypeInfoDecl) { IdentifierInfo *TypeInfoII = &PP.getIdentifierTable().get("type_info"); LookupResult R(*this, TypeInfoII, SourceLocation(), LookupTagName); LookupQualifiedName(R, getStdNamespace()); CXXTypeInfoDecl = R.getAsSingle(); // Microsoft's typeinfo doesn't have type_info in std but in the global // namespace if _HAS_EXCEPTIONS is defined to 0. See PR13153. if (!CXXTypeInfoDecl && LangOpts.MSVCCompat) { LookupQualifiedName(R, Context.getTranslationUnitDecl()); CXXTypeInfoDecl = R.getAsSingle(); } if (!CXXTypeInfoDecl) return ExprError(Diag(OpLoc, diag::err_need_header_before_typeid)); } if (!getLangOpts().RTTI) { return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti)); } QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl); if (isType) { // The operand is a type; handle it as such. TypeSourceInfo *TInfo = nullptr; QualType T = GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrExpr), &TInfo); if (T.isNull()) return ExprError(); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(T, OpLoc); return BuildCXXTypeId(TypeInfoType, OpLoc, TInfo, RParenLoc); } // The operand is an expression. return BuildCXXTypeId(TypeInfoType, OpLoc, (Expr*)TyOrExpr, RParenLoc); } /// Grabs __declspec(uuid()) off a type, or returns 0 if we cannot resolve to /// a single GUID. static void getUuidAttrOfType(Sema &SemaRef, QualType QT, llvm::SmallSetVector &UuidAttrs) { // Optionally remove one level of pointer, reference or array indirection. const Type *Ty = QT.getTypePtr(); if (QT->isPointerType() || QT->isReferenceType()) Ty = QT->getPointeeType().getTypePtr(); else if (QT->isArrayType()) Ty = Ty->getBaseElementTypeUnsafe(); const auto *TD = Ty->getAsTagDecl(); if (!TD) return; if (const auto *Uuid = TD->getMostRecentDecl()->getAttr()) { UuidAttrs.insert(Uuid); return; } // __uuidof can grab UUIDs from template arguments. if (const auto *CTSD = dyn_cast(TD)) { const TemplateArgumentList &TAL = CTSD->getTemplateArgs(); for (const TemplateArgument &TA : TAL.asArray()) { const UuidAttr *UuidForTA = nullptr; if (TA.getKind() == TemplateArgument::Type) getUuidAttrOfType(SemaRef, TA.getAsType(), UuidAttrs); else if (TA.getKind() == TemplateArgument::Declaration) getUuidAttrOfType(SemaRef, TA.getAsDecl()->getType(), UuidAttrs); if (UuidForTA) UuidAttrs.insert(UuidForTA); } } } /// Build a Microsoft __uuidof expression with a type operand. ExprResult Sema::BuildCXXUuidof(QualType TypeInfoType, SourceLocation TypeidLoc, TypeSourceInfo *Operand, SourceLocation RParenLoc) { StringRef UuidStr; if (!Operand->getType()->isDependentType()) { llvm::SmallSetVector UuidAttrs; getUuidAttrOfType(*this, Operand->getType(), UuidAttrs); if (UuidAttrs.empty()) return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid)); if (UuidAttrs.size() > 1) return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids)); UuidStr = UuidAttrs.back()->getGuid(); } return new (Context) CXXUuidofExpr(TypeInfoType.withConst(), Operand, UuidStr, SourceRange(TypeidLoc, RParenLoc)); } /// Build a Microsoft __uuidof expression with an expression operand. ExprResult Sema::BuildCXXUuidof(QualType TypeInfoType, SourceLocation TypeidLoc, Expr *E, SourceLocation RParenLoc) { StringRef UuidStr; if (!E->getType()->isDependentType()) { if (E->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { UuidStr = "00000000-0000-0000-0000-000000000000"; } else { llvm::SmallSetVector UuidAttrs; getUuidAttrOfType(*this, E->getType(), UuidAttrs); if (UuidAttrs.empty()) return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid)); if (UuidAttrs.size() > 1) return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids)); UuidStr = UuidAttrs.back()->getGuid(); } } return new (Context) CXXUuidofExpr(TypeInfoType.withConst(), E, UuidStr, SourceRange(TypeidLoc, RParenLoc)); } /// ActOnCXXUuidof - Parse __uuidof( type-id ) or __uuidof (expression); ExprResult Sema::ActOnCXXUuidof(SourceLocation OpLoc, SourceLocation LParenLoc, bool isType, void *TyOrExpr, SourceLocation RParenLoc) { // If MSVCGuidDecl has not been cached, do the lookup. if (!MSVCGuidDecl) { IdentifierInfo *GuidII = &PP.getIdentifierTable().get("_GUID"); LookupResult R(*this, GuidII, SourceLocation(), LookupTagName); LookupQualifiedName(R, Context.getTranslationUnitDecl()); MSVCGuidDecl = R.getAsSingle(); if (!MSVCGuidDecl) return ExprError(Diag(OpLoc, diag::err_need_header_before_ms_uuidof)); } QualType GuidType = Context.getTypeDeclType(MSVCGuidDecl); if (isType) { // The operand is a type; handle it as such. TypeSourceInfo *TInfo = nullptr; QualType T = GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrExpr), &TInfo); if (T.isNull()) return ExprError(); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(T, OpLoc); return BuildCXXUuidof(GuidType, OpLoc, TInfo, RParenLoc); } // The operand is an expression. return BuildCXXUuidof(GuidType, OpLoc, (Expr*)TyOrExpr, RParenLoc); } /// ActOnCXXBoolLiteral - Parse {true,false} literals. ExprResult Sema::ActOnCXXBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { assert((Kind == tok::kw_true || Kind == tok::kw_false) && "Unknown C++ Boolean value!"); return new (Context) CXXBoolLiteralExpr(Kind == tok::kw_true, Context.BoolTy, OpLoc); } /// ActOnCXXNullPtrLiteral - Parse 'nullptr'. ExprResult Sema::ActOnCXXNullPtrLiteral(SourceLocation Loc) { return new (Context) CXXNullPtrLiteralExpr(Context.NullPtrTy, Loc); } /// ActOnCXXThrow - Parse throw expressions. ExprResult Sema::ActOnCXXThrow(Scope *S, SourceLocation OpLoc, Expr *Ex) { bool IsThrownVarInScope = false; if (Ex) { // C++0x [class.copymove]p31: // When certain criteria are met, an implementation is allowed to omit the // copy/move construction of a class object [...] // // - in a throw-expression, when the operand is the name of a // non-volatile automatic object (other than a function or catch- // clause parameter) whose scope does not extend beyond the end of the // innermost enclosing try-block (if there is one), the copy/move // operation from the operand to the exception object (15.1) can be // omitted by constructing the automatic object directly into the // exception object if (DeclRefExpr *DRE = dyn_cast(Ex->IgnoreParens())) if (VarDecl *Var = dyn_cast(DRE->getDecl())) { if (Var->hasLocalStorage() && !Var->getType().isVolatileQualified()) { for( ; S; S = S->getParent()) { if (S->isDeclScope(Var)) { IsThrownVarInScope = true; break; } if (S->getFlags() & (Scope::FnScope | Scope::ClassScope | Scope::BlockScope | Scope::FunctionPrototypeScope | Scope::ObjCMethodScope | Scope::TryScope)) break; } } } } return BuildCXXThrow(OpLoc, Ex, IsThrownVarInScope); } ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex, bool IsThrownVarInScope) { // Don't report an error if 'throw' is used in system headers. if (!getLangOpts().CXXExceptions && !getSourceManager().isInSystemHeader(OpLoc) && (!getLangOpts().OpenMPIsDevice || !getLangOpts().OpenMPHostCXXExceptions || isInOpenMPTargetExecutionDirective() || isInOpenMPDeclareTargetContext())) Diag(OpLoc, diag::err_exceptions_disabled) << "throw"; // Exceptions aren't allowed in CUDA device code. if (getLangOpts().CUDA) CUDADiagIfDeviceCode(OpLoc, diag::err_cuda_device_exceptions) << "throw" << CurrentCUDATarget(); if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope()) Diag(OpLoc, diag::err_omp_simd_region_cannot_use_stmt) << "throw"; if (Ex && !Ex->isTypeDependent()) { QualType ExceptionObjectTy = Context.getExceptionObjectType(Ex->getType()); if (CheckCXXThrowOperand(OpLoc, ExceptionObjectTy, Ex)) return ExprError(); // Initialize the exception result. This implicitly weeds out // abstract types or types with inaccessible copy constructors. // C++0x [class.copymove]p31: // When certain criteria are met, an implementation is allowed to omit the // copy/move construction of a class object [...] // // - in a throw-expression, when the operand is the name of a // non-volatile automatic object (other than a function or // catch-clause // parameter) whose scope does not extend beyond the end of the // innermost enclosing try-block (if there is one), the copy/move // operation from the operand to the exception object (15.1) can be // omitted by constructing the automatic object directly into the // exception object const VarDecl *NRVOVariable = nullptr; if (IsThrownVarInScope) NRVOVariable = getCopyElisionCandidate(QualType(), Ex, CES_Strict); InitializedEntity Entity = InitializedEntity::InitializeException( OpLoc, ExceptionObjectTy, /*NRVO=*/NRVOVariable != nullptr); ExprResult Res = PerformMoveOrCopyInitialization( Entity, NRVOVariable, QualType(), Ex, IsThrownVarInScope); if (Res.isInvalid()) return ExprError(); Ex = Res.get(); } return new (Context) CXXThrowExpr(Ex, Context.VoidTy, OpLoc, IsThrownVarInScope); } static void collectPublicBases(CXXRecordDecl *RD, llvm::DenseMap &SubobjectsSeen, llvm::SmallPtrSetImpl &VBases, llvm::SetVector &PublicSubobjectsSeen, bool ParentIsPublic) { for (const CXXBaseSpecifier &BS : RD->bases()) { CXXRecordDecl *BaseDecl = BS.getType()->getAsCXXRecordDecl(); bool NewSubobject; // Virtual bases constitute the same subobject. Non-virtual bases are // always distinct subobjects. if (BS.isVirtual()) NewSubobject = VBases.insert(BaseDecl).second; else NewSubobject = true; if (NewSubobject) ++SubobjectsSeen[BaseDecl]; // Only add subobjects which have public access throughout the entire chain. bool PublicPath = ParentIsPublic && BS.getAccessSpecifier() == AS_public; if (PublicPath) PublicSubobjectsSeen.insert(BaseDecl); // Recurse on to each base subobject. collectPublicBases(BaseDecl, SubobjectsSeen, VBases, PublicSubobjectsSeen, PublicPath); } } static void getUnambiguousPublicSubobjects( CXXRecordDecl *RD, llvm::SmallVectorImpl &Objects) { llvm::DenseMap SubobjectsSeen; llvm::SmallSet VBases; llvm::SetVector PublicSubobjectsSeen; SubobjectsSeen[RD] = 1; PublicSubobjectsSeen.insert(RD); collectPublicBases(RD, SubobjectsSeen, VBases, PublicSubobjectsSeen, /*ParentIsPublic=*/true); for (CXXRecordDecl *PublicSubobject : PublicSubobjectsSeen) { // Skip ambiguous objects. if (SubobjectsSeen[PublicSubobject] > 1) continue; Objects.push_back(PublicSubobject); } } /// CheckCXXThrowOperand - Validate the operand of a throw. bool Sema::CheckCXXThrowOperand(SourceLocation ThrowLoc, QualType ExceptionObjectTy, Expr *E) { // If the type of the exception would be an incomplete type or a pointer // to an incomplete type other than (cv) void the program is ill-formed. QualType Ty = ExceptionObjectTy; bool isPointer = false; if (const PointerType* Ptr = Ty->getAs()) { Ty = Ptr->getPointeeType(); isPointer = true; } if (!isPointer || !Ty->isVoidType()) { if (RequireCompleteType(ThrowLoc, Ty, isPointer ? diag::err_throw_incomplete_ptr : diag::err_throw_incomplete, E->getSourceRange())) return true; if (RequireNonAbstractType(ThrowLoc, ExceptionObjectTy, diag::err_throw_abstract_type, E)) return true; } // If the exception has class type, we need additional handling. CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); if (!RD) return false; // If we are throwing a polymorphic class type or pointer thereof, // exception handling will make use of the vtable. MarkVTableUsed(ThrowLoc, RD); // If a pointer is thrown, the referenced object will not be destroyed. if (isPointer) return false; // If the class has a destructor, we must be able to call it. if (!RD->hasIrrelevantDestructor()) { if (CXXDestructorDecl *Destructor = LookupDestructor(RD)) { MarkFunctionReferenced(E->getExprLoc(), Destructor); CheckDestructorAccess(E->getExprLoc(), Destructor, PDiag(diag::err_access_dtor_exception) << Ty); if (DiagnoseUseOfDecl(Destructor, E->getExprLoc())) return true; } } // The MSVC ABI creates a list of all types which can catch the exception // object. This list also references the appropriate copy constructor to call // if the object is caught by value and has a non-trivial copy constructor. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { // We are only interested in the public, unambiguous bases contained within // the exception object. Bases which are ambiguous or otherwise // inaccessible are not catchable types. llvm::SmallVector UnambiguousPublicSubobjects; getUnambiguousPublicSubobjects(RD, UnambiguousPublicSubobjects); for (CXXRecordDecl *Subobject : UnambiguousPublicSubobjects) { // Attempt to lookup the copy constructor. Various pieces of machinery // will spring into action, like template instantiation, which means this // cannot be a simple walk of the class's decls. Instead, we must perform // lookup and overload resolution. CXXConstructorDecl *CD = LookupCopyingConstructor(Subobject, 0); if (!CD) continue; // Mark the constructor referenced as it is used by this throw expression. MarkFunctionReferenced(E->getExprLoc(), CD); // Skip this copy constructor if it is trivial, we don't need to record it // in the catchable type data. if (CD->isTrivial()) continue; // The copy constructor is non-trivial, create a mapping from this class // type to this constructor. // N.B. The selection of copy constructor is not sensitive to this // particular throw-site. Lookup will be performed at the catch-site to // ensure that the copy constructor is, in fact, accessible (via // friendship or any other means). Context.addCopyConstructorForExceptionObject(Subobject, CD); // We don't keep the instantiated default argument expressions around so // we must rebuild them here. for (unsigned I = 1, E = CD->getNumParams(); I != E; ++I) { if (CheckCXXDefaultArgExpr(ThrowLoc, CD, CD->getParamDecl(I))) return true; } } } return false; } static QualType adjustCVQualifiersForCXXThisWithinLambda( ArrayRef FunctionScopes, QualType ThisTy, DeclContext *CurSemaContext, ASTContext &ASTCtx) { QualType ClassType = ThisTy->getPointeeType(); LambdaScopeInfo *CurLSI = nullptr; DeclContext *CurDC = CurSemaContext; // Iterate through the stack of lambdas starting from the innermost lambda to // the outermost lambda, checking if '*this' is ever captured by copy - since // that could change the cv-qualifiers of the '*this' object. // The object referred to by '*this' starts out with the cv-qualifiers of its // member function. We then start with the innermost lambda and iterate // outward checking to see if any lambda performs a by-copy capture of '*this' // - and if so, any nested lambda must respect the 'constness' of that // capturing lamdbda's call operator. // // Since the FunctionScopeInfo stack is representative of the lexical // nesting of the lambda expressions during initial parsing (and is the best // place for querying information about captures about lambdas that are // partially processed) and perhaps during instantiation of function templates // that contain lambda expressions that need to be transformed BUT not // necessarily during instantiation of a nested generic lambda's function call // operator (which might even be instantiated at the end of the TU) - at which // time the DeclContext tree is mature enough to query capture information // reliably - we use a two pronged approach to walk through all the lexically // enclosing lambda expressions: // // 1) Climb down the FunctionScopeInfo stack as long as each item represents // a Lambda (i.e. LambdaScopeInfo) AND each LSI's 'closure-type' is lexically // enclosed by the call-operator of the LSI below it on the stack (while // tracking the enclosing DC for step 2 if needed). Note the topmost LSI on // the stack represents the innermost lambda. // // 2) If we run out of enclosing LSI's, check if the enclosing DeclContext // represents a lambda's call operator. If it does, we must be instantiating // a generic lambda's call operator (represented by the Current LSI, and // should be the only scenario where an inconsistency between the LSI and the // DeclContext should occur), so climb out the DeclContexts if they // represent lambdas, while querying the corresponding closure types // regarding capture information. // 1) Climb down the function scope info stack. for (int I = FunctionScopes.size(); I-- && isa(FunctionScopes[I]) && (!CurLSI || !CurLSI->Lambda || CurLSI->Lambda->getDeclContext() == cast(FunctionScopes[I])->CallOperator); CurDC = getLambdaAwareParentOfDeclContext(CurDC)) { CurLSI = cast(FunctionScopes[I]); if (!CurLSI->isCXXThisCaptured()) continue; auto C = CurLSI->getCXXThisCapture(); if (C.isCopyCapture()) { ClassType.removeLocalCVRQualifiers(Qualifiers::CVRMask); if (CurLSI->CallOperator->isConst()) ClassType.addConst(); return ASTCtx.getPointerType(ClassType); } } // 2) We've run out of ScopeInfos but check if CurDC is a lambda (which can // happen during instantiation of its nested generic lambda call operator) if (isLambdaCallOperator(CurDC)) { assert(CurLSI && "While computing 'this' capture-type for a generic " "lambda, we must have a corresponding LambdaScopeInfo"); assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator) && "While computing 'this' capture-type for a generic lambda, when we " "run out of enclosing LSI's, yet the enclosing DC is a " "lambda-call-operator we must be (i.e. Current LSI) in a generic " "lambda call oeprator"); assert(CurDC == getLambdaAwareParentOfDeclContext(CurLSI->CallOperator)); auto IsThisCaptured = [](CXXRecordDecl *Closure, bool &IsByCopy, bool &IsConst) { IsConst = false; IsByCopy = false; for (auto &&C : Closure->captures()) { if (C.capturesThis()) { if (C.getCaptureKind() == LCK_StarThis) IsByCopy = true; if (Closure->getLambdaCallOperator()->isConst()) IsConst = true; return true; } } return false; }; bool IsByCopyCapture = false; bool IsConstCapture = false; CXXRecordDecl *Closure = cast(CurDC->getParent()); while (Closure && IsThisCaptured(Closure, IsByCopyCapture, IsConstCapture)) { if (IsByCopyCapture) { ClassType.removeLocalCVRQualifiers(Qualifiers::CVRMask); if (IsConstCapture) ClassType.addConst(); return ASTCtx.getPointerType(ClassType); } Closure = isLambdaCallOperator(Closure->getParent()) ? cast(Closure->getParent()->getParent()) : nullptr; } } return ASTCtx.getPointerType(ClassType); } QualType Sema::getCurrentThisType() { DeclContext *DC = getFunctionLevelDeclContext(); QualType ThisTy = CXXThisTypeOverride; if (CXXMethodDecl *method = dyn_cast(DC)) { if (method && method->isInstance()) ThisTy = method->getThisType(Context); } if (ThisTy.isNull() && isLambdaCallOperator(CurContext) && inTemplateInstantiation()) { assert(isa(DC) && "Trying to get 'this' type from static method?"); // This is a lambda call operator that is being instantiated as a default // initializer. DC must point to the enclosing class type, so we can recover // the 'this' type from it. QualType ClassTy = Context.getTypeDeclType(cast(DC)); // There are no cv-qualifiers for 'this' within default initializers, // per [expr.prim.general]p4. ThisTy = Context.getPointerType(ClassTy); } // If we are within a lambda's call operator, the cv-qualifiers of 'this' // might need to be adjusted if the lambda or any of its enclosing lambda's // captures '*this' by copy. if (!ThisTy.isNull() && isLambdaCallOperator(CurContext)) return adjustCVQualifiersForCXXThisWithinLambda(FunctionScopes, ThisTy, CurContext, Context); return ThisTy; } Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema &S, Decl *ContextDecl, unsigned CXXThisTypeQuals, bool Enabled) : S(S), OldCXXThisTypeOverride(S.CXXThisTypeOverride), Enabled(false) { if (!Enabled || !ContextDecl) return; CXXRecordDecl *Record = nullptr; if (ClassTemplateDecl *Template = dyn_cast(ContextDecl)) Record = Template->getTemplatedDecl(); else Record = cast(ContextDecl); // We care only for CVR qualifiers here, so cut everything else. CXXThisTypeQuals &= Qualifiers::FastMask; S.CXXThisTypeOverride = S.Context.getPointerType( S.Context.getRecordType(Record).withCVRQualifiers(CXXThisTypeQuals)); this->Enabled = true; } Sema::CXXThisScopeRAII::~CXXThisScopeRAII() { if (Enabled) { S.CXXThisTypeOverride = OldCXXThisTypeOverride; } } static Expr *captureThis(Sema &S, ASTContext &Context, RecordDecl *RD, QualType ThisTy, SourceLocation Loc, const bool ByCopy) { QualType AdjustedThisTy = ThisTy; // The type of the corresponding data member (not a 'this' pointer if 'by // copy'). QualType CaptureThisFieldTy = ThisTy; if (ByCopy) { // If we are capturing the object referred to by '*this' by copy, ignore any // cv qualifiers inherited from the type of the member function for the type // of the closure-type's corresponding data member and any use of 'this'. CaptureThisFieldTy = ThisTy->getPointeeType(); CaptureThisFieldTy.removeLocalCVRQualifiers(Qualifiers::CVRMask); AdjustedThisTy = Context.getPointerType(CaptureThisFieldTy); } FieldDecl *Field = FieldDecl::Create( Context, RD, Loc, Loc, nullptr, CaptureThisFieldTy, Context.getTrivialTypeSourceInfo(CaptureThisFieldTy, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); Expr *This = new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/ true); if (ByCopy) { Expr *StarThis = S.CreateBuiltinUnaryOp(Loc, UO_Deref, This).get(); InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture( nullptr, CaptureThisFieldTy, Loc); InitializationKind InitKind = InitializationKind::CreateDirect(Loc, Loc, Loc); InitializationSequence Init(S, Entity, InitKind, StarThis); ExprResult ER = Init.Perform(S, Entity, InitKind, StarThis); if (ER.isInvalid()) return nullptr; return ER.get(); } return This; } bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, bool BuildAndDiagnose, const unsigned *const FunctionScopeIndexToStopAt, const bool ByCopy) { // We don't need to capture this in an unevaluated context. if (isUnevaluatedContext() && !Explicit) return true; assert((!ByCopy || Explicit) && "cannot implicitly capture *this by value"); const int MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1; // Check that we can capture the *enclosing object* (referred to by '*this') // by the capturing-entity/closure (lambda/block/etc) at // MaxFunctionScopesIndex-deep on the FunctionScopes stack. // Note: The *enclosing object* can only be captured by-value by a // closure that is a lambda, using the explicit notation: // [*this] { ... }. // Every other capture of the *enclosing object* results in its by-reference // capture. // For a closure 'L' (at MaxFunctionScopesIndex in the FunctionScopes // stack), we can capture the *enclosing object* only if: // - 'L' has an explicit byref or byval capture of the *enclosing object* // - or, 'L' has an implicit capture. // AND // -- there is no enclosing closure // -- or, there is some enclosing closure 'E' that has already captured the // *enclosing object*, and every intervening closure (if any) between 'E' // and 'L' can implicitly capture the *enclosing object*. // -- or, every enclosing closure can implicitly capture the // *enclosing object* unsigned NumCapturingClosures = 0; for (int idx = MaxFunctionScopesIndex; idx >= 0; idx--) { if (CapturingScopeInfo *CSI = dyn_cast(FunctionScopes[idx])) { if (CSI->CXXThisCaptureIndex != 0) { // 'this' is already being captured; there isn't anything more to do. CSI->Captures[CSI->CXXThisCaptureIndex - 1].markUsed(BuildAndDiagnose); break; } LambdaScopeInfo *LSI = dyn_cast(CSI); if (LSI && isGenericLambdaCallOperatorSpecialization(LSI->CallOperator)) { // This context can't implicitly capture 'this'; fail out. if (BuildAndDiagnose) Diag(Loc, diag::err_this_capture) << (Explicit && idx == MaxFunctionScopesIndex); return true; } if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_LambdaByref || CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_LambdaByval || CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_Block || CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_CapturedRegion || (Explicit && idx == MaxFunctionScopesIndex)) { // Regarding (Explicit && idx == MaxFunctionScopesIndex): only the first // iteration through can be an explicit capture, all enclosing closures, // if any, must perform implicit captures. // This closure can capture 'this'; continue looking upwards. NumCapturingClosures++; continue; } // This context can't implicitly capture 'this'; fail out. if (BuildAndDiagnose) Diag(Loc, diag::err_this_capture) << (Explicit && idx == MaxFunctionScopesIndex); return true; } break; } if (!BuildAndDiagnose) return false; // If we got here, then the closure at MaxFunctionScopesIndex on the // FunctionScopes stack, can capture the *enclosing object*, so capture it // (including implicit by-reference captures in any enclosing closures). // In the loop below, respect the ByCopy flag only for the closure requesting // the capture (i.e. first iteration through the loop below). Ignore it for // all enclosing closure's up to NumCapturingClosures (since they must be // implicitly capturing the *enclosing object* by reference (see loop // above)). assert((!ByCopy || dyn_cast(FunctionScopes[MaxFunctionScopesIndex])) && "Only a lambda can capture the enclosing object (referred to by " "*this) by copy"); // FIXME: We need to delay this marking in PotentiallyPotentiallyEvaluated // contexts. QualType ThisTy = getCurrentThisType(); for (int idx = MaxFunctionScopesIndex; NumCapturingClosures; --idx, --NumCapturingClosures) { CapturingScopeInfo *CSI = cast(FunctionScopes[idx]); Expr *ThisExpr = nullptr; if (LambdaScopeInfo *LSI = dyn_cast(CSI)) { // For lambda expressions, build a field and an initializing expression, // and capture the *enclosing object* by copy only if this is the first // iteration. ThisExpr = captureThis(*this, Context, LSI->Lambda, ThisTy, Loc, ByCopy && idx == MaxFunctionScopesIndex); } else if (CapturedRegionScopeInfo *RSI = dyn_cast(FunctionScopes[idx])) ThisExpr = captureThis(*this, Context, RSI->TheRecordDecl, ThisTy, Loc, false/*ByCopy*/); bool isNested = NumCapturingClosures > 1; CSI->addThisCapture(isNested, Loc, ThisExpr, ByCopy); } return false; } ExprResult Sema::ActOnCXXThis(SourceLocation Loc) { /// C++ 9.3.2: In the body of a non-static member function, the keyword this /// is a non-lvalue expression whose value is the address of the object for /// which the function is called. QualType ThisTy = getCurrentThisType(); if (ThisTy.isNull()) return Diag(Loc, diag::err_invalid_this_use); CheckCXXThisCapture(Loc); return new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit=*/false); } bool Sema::isThisOutsideMemberFunctionBody(QualType BaseType) { // If we're outside the body of a member function, then we'll have a specified // type for 'this'. if (CXXThisTypeOverride.isNull()) return false; // Determine whether we're looking into a class that's currently being // defined. CXXRecordDecl *Class = BaseType->getAsCXXRecordDecl(); return Class && Class->isBeingDefined(); } /// Parse construction of a specified type. /// Can be interpreted either as function-style casting ("int(x)") /// or class type construction ("ClassType(x,y,z)") /// or creation of a value-initialized type ("int()"). ExprResult Sema::ActOnCXXTypeConstructExpr(ParsedType TypeRep, SourceLocation LParenOrBraceLoc, MultiExprArg exprs, SourceLocation RParenOrBraceLoc, bool ListInitialization) { if (!TypeRep) return ExprError(); TypeSourceInfo *TInfo; QualType Ty = GetTypeFromParser(TypeRep, &TInfo); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(Ty, SourceLocation()); auto Result = BuildCXXTypeConstructExpr(TInfo, LParenOrBraceLoc, exprs, RParenOrBraceLoc, ListInitialization); // Avoid creating a non-type-dependent expression that contains typos. // Non-type-dependent expressions are liable to be discarded without // checking for embedded typos. if (!Result.isInvalid() && Result.get()->isInstantiationDependent() && !Result.get()->isTypeDependent()) Result = CorrectDelayedTyposInExpr(Result.get()); return Result; } ExprResult Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, SourceLocation LParenOrBraceLoc, MultiExprArg Exprs, SourceLocation RParenOrBraceLoc, bool ListInitialization) { QualType Ty = TInfo->getType(); SourceLocation TyBeginLoc = TInfo->getTypeLoc().getBeginLoc(); if (Ty->isDependentType() || CallExpr::hasAnyTypeDependentArguments(Exprs)) { // FIXME: CXXUnresolvedConstructExpr does not model list-initialization // directly. We work around this by dropping the locations of the braces. SourceRange Locs = ListInitialization ? SourceRange() : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc); return CXXUnresolvedConstructExpr::Create(Context, TInfo, Locs.getBegin(), Exprs, Locs.getEnd()); } assert((!ListInitialization || (Exprs.size() == 1 && isa(Exprs[0]))) && "List initialization must have initializer list as expression."); SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc); InitializedEntity Entity = InitializedEntity::InitializeTemporary(TInfo); InitializationKind Kind = Exprs.size() ? ListInitialization ? InitializationKind::CreateDirectList( TyBeginLoc, LParenOrBraceLoc, RParenOrBraceLoc) : InitializationKind::CreateDirect(TyBeginLoc, LParenOrBraceLoc, RParenOrBraceLoc) : InitializationKind::CreateValue(TyBeginLoc, LParenOrBraceLoc, RParenOrBraceLoc); // C++1z [expr.type.conv]p1: // If the type is a placeholder for a deduced class type, [...perform class // template argument deduction...] DeducedType *Deduced = Ty->getContainedDeducedType(); if (Deduced && isa(Deduced)) { Ty = DeduceTemplateSpecializationFromInitializer(TInfo, Entity, Kind, Exprs); if (Ty.isNull()) return ExprError(); Entity = InitializedEntity::InitializeTemporary(TInfo, Ty); } // C++ [expr.type.conv]p1: // If the expression list is a parenthesized single expression, the type // conversion expression is equivalent (in definedness, and if defined in // meaning) to the corresponding cast expression. if (Exprs.size() == 1 && !ListInitialization && !isa(Exprs[0])) { Expr *Arg = Exprs[0]; return BuildCXXFunctionalCastExpr(TInfo, Ty, LParenOrBraceLoc, Arg, RParenOrBraceLoc); } // For an expression of the form T(), T shall not be an array type. QualType ElemTy = Ty; if (Ty->isArrayType()) { if (!ListInitialization) return ExprError(Diag(TyBeginLoc, diag::err_value_init_for_array_type) << FullRange); ElemTy = Context.getBaseElementType(Ty); } // There doesn't seem to be an explicit rule against this but sanity demands // we only construct objects with object types. if (Ty->isFunctionType()) return ExprError(Diag(TyBeginLoc, diag::err_init_for_function_type) << Ty << FullRange); // C++17 [expr.type.conv]p2: // If the type is cv void and the initializer is (), the expression is a // prvalue of the specified type that performs no initialization. if (!Ty->isVoidType() && RequireCompleteType(TyBeginLoc, ElemTy, diag::err_invalid_incomplete_type_use, FullRange)) return ExprError(); // Otherwise, the expression is a prvalue of the specified type whose // result object is direct-initialized (11.6) with the initializer. InitializationSequence InitSeq(*this, Entity, Kind, Exprs); ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Exprs); if (Result.isInvalid()) return Result; Expr *Inner = Result.get(); if (CXXBindTemporaryExpr *BTE = dyn_cast_or_null(Inner)) Inner = BTE->getSubExpr(); if (!isa(Inner) && !isa(Inner)) { // If we created a CXXTemporaryObjectExpr, that node also represents the // functional cast. Otherwise, create an explicit cast to represent // the syntactic form of a functional-style cast that was used here. // // FIXME: Creating a CXXFunctionalCastExpr around a CXXConstructExpr // would give a more consistent AST representation than using a // CXXTemporaryObjectExpr. It's also weird that the functional cast // is sometimes handled by initialization and sometimes not. QualType ResultType = Result.get()->getType(); SourceRange Locs = ListInitialization ? SourceRange() : SourceRange(LParenOrBraceLoc, RParenOrBraceLoc); Result = CXXFunctionalCastExpr::Create( Context, ResultType, Expr::getValueKindForType(Ty), TInfo, CK_NoOp, Result.get(), /*Path=*/nullptr, Locs.getBegin(), Locs.getEnd()); } return Result; } /// Determine whether the given function is a non-placement /// deallocation function. static bool isNonPlacementDeallocationFunction(Sema &S, FunctionDecl *FD) { if (CXXMethodDecl *Method = dyn_cast(FD)) return Method->isUsualDeallocationFunction(); if (FD->getOverloadedOperator() != OO_Delete && FD->getOverloadedOperator() != OO_Array_Delete) return false; unsigned UsualParams = 1; if (S.getLangOpts().SizedDeallocation && UsualParams < FD->getNumParams() && S.Context.hasSameUnqualifiedType( FD->getParamDecl(UsualParams)->getType(), S.Context.getSizeType())) ++UsualParams; if (S.getLangOpts().AlignedAllocation && UsualParams < FD->getNumParams() && S.Context.hasSameUnqualifiedType( FD->getParamDecl(UsualParams)->getType(), S.Context.getTypeDeclType(S.getStdAlignValT()))) ++UsualParams; return UsualParams == FD->getNumParams(); } namespace { struct UsualDeallocFnInfo { UsualDeallocFnInfo() : Found(), FD(nullptr) {} UsualDeallocFnInfo(Sema &S, DeclAccessPair Found) : Found(Found), FD(dyn_cast(Found->getUnderlyingDecl())), Destroying(false), HasSizeT(false), HasAlignValT(false), CUDAPref(Sema::CFP_Native) { // A function template declaration is never a usual deallocation function. if (!FD) return; unsigned NumBaseParams = 1; if (FD->isDestroyingOperatorDelete()) { Destroying = true; ++NumBaseParams; } if (FD->getNumParams() == NumBaseParams + 2) HasAlignValT = HasSizeT = true; else if (FD->getNumParams() == NumBaseParams + 1) { HasSizeT = FD->getParamDecl(NumBaseParams)->getType()->isIntegerType(); HasAlignValT = !HasSizeT; } // In CUDA, determine how much we'd like / dislike to call this. if (S.getLangOpts().CUDA) if (auto *Caller = dyn_cast(S.CurContext)) CUDAPref = S.IdentifyCUDAPreference(Caller, FD); } explicit operator bool() const { return FD; } bool isBetterThan(const UsualDeallocFnInfo &Other, bool WantSize, bool WantAlign) const { // C++ P0722: // A destroying operator delete is preferred over a non-destroying // operator delete. if (Destroying != Other.Destroying) return Destroying; // C++17 [expr.delete]p10: // If the type has new-extended alignment, a function with a parameter // of type std::align_val_t is preferred; otherwise a function without // such a parameter is preferred if (HasAlignValT != Other.HasAlignValT) return HasAlignValT == WantAlign; if (HasSizeT != Other.HasSizeT) return HasSizeT == WantSize; // Use CUDA call preference as a tiebreaker. return CUDAPref > Other.CUDAPref; } DeclAccessPair Found; FunctionDecl *FD; bool Destroying, HasSizeT, HasAlignValT; Sema::CUDAFunctionPreference CUDAPref; }; } /// Determine whether a type has new-extended alignment. This may be called when /// the type is incomplete (for a delete-expression with an incomplete pointee /// type), in which case it will conservatively return false if the alignment is /// not known. static bool hasNewExtendedAlignment(Sema &S, QualType AllocType) { return S.getLangOpts().AlignedAllocation && S.getASTContext().getTypeAlignIfKnown(AllocType) > S.getASTContext().getTargetInfo().getNewAlign(); } /// Select the correct "usual" deallocation function to use from a selection of /// deallocation functions (either global or class-scope). static UsualDeallocFnInfo resolveDeallocationOverload( Sema &S, LookupResult &R, bool WantSize, bool WantAlign, llvm::SmallVectorImpl *BestFns = nullptr) { UsualDeallocFnInfo Best; for (auto I = R.begin(), E = R.end(); I != E; ++I) { UsualDeallocFnInfo Info(S, I.getPair()); if (!Info || !isNonPlacementDeallocationFunction(S, Info.FD) || Info.CUDAPref == Sema::CFP_Never) continue; if (!Best) { Best = Info; if (BestFns) BestFns->push_back(Info); continue; } if (Best.isBetterThan(Info, WantSize, WantAlign)) continue; // If more than one preferred function is found, all non-preferred // functions are eliminated from further consideration. if (BestFns && Info.isBetterThan(Best, WantSize, WantAlign)) BestFns->clear(); Best = Info; if (BestFns) BestFns->push_back(Info); } return Best; } /// Determine whether a given type is a class for which 'delete[]' would call /// a member 'operator delete[]' with a 'size_t' parameter. This implies that /// we need to store the array size (even if the type is /// trivially-destructible). static bool doesUsualArrayDeleteWantSize(Sema &S, SourceLocation loc, QualType allocType) { const RecordType *record = allocType->getBaseElementTypeUnsafe()->getAs(); if (!record) return false; // Try to find an operator delete[] in class scope. DeclarationName deleteName = S.Context.DeclarationNames.getCXXOperatorName(OO_Array_Delete); LookupResult ops(S, deleteName, loc, Sema::LookupOrdinaryName); S.LookupQualifiedName(ops, record->getDecl()); // We're just doing this for information. ops.suppressDiagnostics(); // Very likely: there's no operator delete[]. if (ops.empty()) return false; // If it's ambiguous, it should be illegal to call operator delete[] // on this thing, so it doesn't matter if we allocate extra space or not. if (ops.isAmbiguous()) return false; // C++17 [expr.delete]p10: // If the deallocation functions have class scope, the one without a // parameter of type std::size_t is selected. auto Best = resolveDeallocationOverload( S, ops, /*WantSize*/false, /*WantAlign*/hasNewExtendedAlignment(S, allocType)); return Best && Best.HasSizeT; } /// Parsed a C++ 'new' expression (C++ 5.3.4). /// /// E.g.: /// @code new (memory) int[size][4] @endcode /// or /// @code ::new Foo(23, "hello") @endcode /// /// \param StartLoc The first location of the expression. /// \param UseGlobal True if 'new' was prefixed with '::'. /// \param PlacementLParen Opening paren of the placement arguments. /// \param PlacementArgs Placement new arguments. /// \param PlacementRParen Closing paren of the placement arguments. /// \param TypeIdParens If the type is in parens, the source range. /// \param D The type to be allocated, as well as array dimensions. /// \param Initializer The initializing expression or initializer-list, or null /// if there is none. ExprResult Sema::ActOnCXXNew(SourceLocation StartLoc, bool UseGlobal, SourceLocation PlacementLParen, MultiExprArg PlacementArgs, SourceLocation PlacementRParen, SourceRange TypeIdParens, Declarator &D, Expr *Initializer) { Expr *ArraySize = nullptr; // If the specified type is an array, unwrap it and save the expression. if (D.getNumTypeObjects() > 0 && D.getTypeObject(0).Kind == DeclaratorChunk::Array) { DeclaratorChunk &Chunk = D.getTypeObject(0); if (D.getDeclSpec().hasAutoTypeSpec()) return ExprError(Diag(Chunk.Loc, diag::err_new_array_of_auto) << D.getSourceRange()); if (Chunk.Arr.hasStatic) return ExprError(Diag(Chunk.Loc, diag::err_static_illegal_in_new) << D.getSourceRange()); if (!Chunk.Arr.NumElts) return ExprError(Diag(Chunk.Loc, diag::err_array_new_needs_size) << D.getSourceRange()); ArraySize = static_cast(Chunk.Arr.NumElts); D.DropFirstTypeObject(); } // Every dimension shall be of constant size. if (ArraySize) { for (unsigned I = 0, N = D.getNumTypeObjects(); I < N; ++I) { if (D.getTypeObject(I).Kind != DeclaratorChunk::Array) break; DeclaratorChunk::ArrayTypeInfo &Array = D.getTypeObject(I).Arr; if (Expr *NumElts = (Expr *)Array.NumElts) { if (!NumElts->isTypeDependent() && !NumElts->isValueDependent()) { if (getLangOpts().CPlusPlus14) { // C++1y [expr.new]p6: Every constant-expression in a noptr-new-declarator // shall be a converted constant expression (5.19) of type std::size_t // and shall evaluate to a strictly positive value. unsigned IntWidth = Context.getTargetInfo().getIntWidth(); assert(IntWidth && "Builtin type of size 0?"); llvm::APSInt Value(IntWidth); Array.NumElts = CheckConvertedConstantExpression(NumElts, Context.getSizeType(), Value, CCEK_NewExpr) .get(); } else { Array.NumElts = VerifyIntegerConstantExpression(NumElts, nullptr, diag::err_new_array_nonconst) .get(); } if (!Array.NumElts) return ExprError(); } } } } TypeSourceInfo *TInfo = GetTypeForDeclarator(D, /*Scope=*/nullptr); QualType AllocType = TInfo->getType(); if (D.isInvalidType()) return ExprError(); SourceRange DirectInitRange; if (ParenListExpr *List = dyn_cast_or_null(Initializer)) DirectInitRange = List->getSourceRange(); return BuildCXXNew(SourceRange(StartLoc, D.getLocEnd()), UseGlobal, PlacementLParen, PlacementArgs, PlacementRParen, TypeIdParens, AllocType, TInfo, ArraySize, DirectInitRange, Initializer); } static bool isLegalArrayNewInitializer(CXXNewExpr::InitializationStyle Style, Expr *Init) { if (!Init) return true; if (ParenListExpr *PLE = dyn_cast(Init)) return PLE->getNumExprs() == 0; if (isa(Init)) return true; else if (CXXConstructExpr *CCE = dyn_cast(Init)) return !CCE->isListInitialization() && CCE->getConstructor()->isDefaultConstructor(); else if (Style == CXXNewExpr::ListInit) { assert(isa(Init) && "Shouldn't create list CXXConstructExprs for arrays."); return true; } return false; } // Emit a diagnostic if an aligned allocation/deallocation function that is not // implemented in the standard library is selected. static void diagnoseUnavailableAlignedAllocation(const FunctionDecl &FD, SourceLocation Loc, bool IsDelete, Sema &S) { if (!S.getLangOpts().AlignedAllocationUnavailable) return; // Return if there is a definition. if (FD.isDefined()) return; bool IsAligned = false; if (FD.isReplaceableGlobalAllocationFunction(&IsAligned) && IsAligned) { const llvm::Triple &T = S.getASTContext().getTargetInfo().getTriple(); StringRef OSName = AvailabilityAttr::getPlatformNameSourceSpelling( S.getASTContext().getTargetInfo().getPlatformName()); S.Diag(Loc, diag::warn_aligned_allocation_unavailable) << IsDelete << FD.getType().getAsString() << OSName << alignedAllocMinVersion(T.getOS()).getAsString(); S.Diag(Loc, diag::note_silence_unligned_allocation_unavailable); } } ExprResult Sema::BuildCXXNew(SourceRange Range, bool UseGlobal, SourceLocation PlacementLParen, MultiExprArg PlacementArgs, SourceLocation PlacementRParen, SourceRange TypeIdParens, QualType AllocType, TypeSourceInfo *AllocTypeInfo, Expr *ArraySize, SourceRange DirectInitRange, Expr *Initializer) { SourceRange TypeRange = AllocTypeInfo->getTypeLoc().getSourceRange(); SourceLocation StartLoc = Range.getBegin(); CXXNewExpr::InitializationStyle initStyle; if (DirectInitRange.isValid()) { assert(Initializer && "Have parens but no initializer."); initStyle = CXXNewExpr::CallInit; } else if (Initializer && isa(Initializer)) initStyle = CXXNewExpr::ListInit; else { assert((!Initializer || isa(Initializer) || isa(Initializer)) && "Initializer expression that cannot have been implicitly created."); initStyle = CXXNewExpr::NoInit; } Expr **Inits = &Initializer; unsigned NumInits = Initializer ? 1 : 0; if (ParenListExpr *List = dyn_cast_or_null(Initializer)) { assert(initStyle == CXXNewExpr::CallInit && "paren init for non-call init"); Inits = List->getExprs(); NumInits = List->getNumExprs(); } // C++11 [expr.new]p15: // A new-expression that creates an object of type T initializes that // object as follows: InitializationKind Kind // - If the new-initializer is omitted, the object is default- // initialized (8.5); if no initialization is performed, // the object has indeterminate value = initStyle == CXXNewExpr::NoInit ? InitializationKind::CreateDefault(TypeRange.getBegin()) // - Otherwise, the new-initializer is interpreted according to the // initialization rules of 8.5 for direct-initialization. : initStyle == CXXNewExpr::ListInit ? InitializationKind::CreateDirectList(TypeRange.getBegin(), Initializer->getLocStart(), Initializer->getLocEnd()) : InitializationKind::CreateDirect(TypeRange.getBegin(), DirectInitRange.getBegin(), DirectInitRange.getEnd()); // C++11 [dcl.spec.auto]p6. Deduce the type which 'auto' stands in for. auto *Deduced = AllocType->getContainedDeducedType(); if (Deduced && isa(Deduced)) { if (ArraySize) return ExprError(Diag(ArraySize->getExprLoc(), diag::err_deduced_class_template_compound_type) << /*array*/ 2 << ArraySize->getSourceRange()); InitializedEntity Entity = InitializedEntity::InitializeNew(StartLoc, AllocType); AllocType = DeduceTemplateSpecializationFromInitializer( AllocTypeInfo, Entity, Kind, MultiExprArg(Inits, NumInits)); if (AllocType.isNull()) return ExprError(); } else if (Deduced) { bool Braced = (initStyle == CXXNewExpr::ListInit); if (NumInits == 1) { if (auto p = dyn_cast_or_null(Inits[0])) { Inits = p->getInits(); NumInits = p->getNumInits(); Braced = true; } } if (initStyle == CXXNewExpr::NoInit || NumInits == 0) return ExprError(Diag(StartLoc, diag::err_auto_new_requires_ctor_arg) << AllocType << TypeRange); if (NumInits > 1) { Expr *FirstBad = Inits[1]; return ExprError(Diag(FirstBad->getLocStart(), diag::err_auto_new_ctor_multiple_expressions) << AllocType << TypeRange); } if (Braced && !getLangOpts().CPlusPlus17) Diag(Initializer->getLocStart(), diag::ext_auto_new_list_init) << AllocType << TypeRange; Expr *Deduce = Inits[0]; QualType DeducedType; if (DeduceAutoType(AllocTypeInfo, Deduce, DeducedType) == DAR_Failed) return ExprError(Diag(StartLoc, diag::err_auto_new_deduction_failure) << AllocType << Deduce->getType() << TypeRange << Deduce->getSourceRange()); if (DeducedType.isNull()) return ExprError(); AllocType = DeducedType; } // Per C++0x [expr.new]p5, the type being constructed may be a // typedef of an array type. if (!ArraySize) { if (const ConstantArrayType *Array = Context.getAsConstantArrayType(AllocType)) { ArraySize = IntegerLiteral::Create(Context, Array->getSize(), Context.getSizeType(), TypeRange.getEnd()); AllocType = Array->getElementType(); } } if (CheckAllocatedType(AllocType, TypeRange.getBegin(), TypeRange)) return ExprError(); // In ARC, infer 'retaining' for the allocated if (getLangOpts().ObjCAutoRefCount && AllocType.getObjCLifetime() == Qualifiers::OCL_None && AllocType->isObjCLifetimeType()) { AllocType = Context.getLifetimeQualifiedType(AllocType, AllocType->getObjCARCImplicitLifetime()); } QualType ResultType = Context.getPointerType(AllocType); if (ArraySize && ArraySize->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(ArraySize); if (result.isInvalid()) return ExprError(); ArraySize = result.get(); } // C++98 5.3.4p6: "The expression in a direct-new-declarator shall have // integral or enumeration type with a non-negative value." // C++11 [expr.new]p6: The expression [...] shall be of integral or unscoped // enumeration type, or a class type for which a single non-explicit // conversion function to integral or unscoped enumeration type exists. // C++1y [expr.new]p6: The expression [...] is implicitly converted to // std::size_t. llvm::Optional KnownArraySize; if (ArraySize && !ArraySize->isTypeDependent()) { ExprResult ConvertedSize; if (getLangOpts().CPlusPlus14) { assert(Context.getTargetInfo().getIntWidth() && "Builtin type of size 0?"); ConvertedSize = PerformImplicitConversion(ArraySize, Context.getSizeType(), AA_Converting); if (!ConvertedSize.isInvalid() && ArraySize->getType()->getAs()) // Diagnose the compatibility of this conversion. Diag(StartLoc, diag::warn_cxx98_compat_array_size_conversion) << ArraySize->getType() << 0 << "'size_t'"; } else { class SizeConvertDiagnoser : public ICEConvertDiagnoser { protected: Expr *ArraySize; public: SizeConvertDiagnoser(Expr *ArraySize) : ICEConvertDiagnoser(/*AllowScopedEnumerations*/false, false, false), ArraySize(ArraySize) {} SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_array_size_not_integral) << S.getLangOpts().CPlusPlus11 << T; } SemaDiagnosticBuilder diagnoseIncomplete( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_array_size_incomplete_type) << T << ArraySize->getSourceRange(); } SemaDiagnosticBuilder diagnoseExplicitConv( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_array_size_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_array_size_conversion) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_array_size_ambiguous_conversion) << T; } SemaDiagnosticBuilder noteAmbiguous( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_array_size_conversion) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseConversion(Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, S.getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_array_size_conversion : diag::ext_array_size_conversion) << T << ConvTy->isEnumeralType() << ConvTy; } } SizeDiagnoser(ArraySize); ConvertedSize = PerformContextualImplicitConversion(StartLoc, ArraySize, SizeDiagnoser); } if (ConvertedSize.isInvalid()) return ExprError(); ArraySize = ConvertedSize.get(); QualType SizeType = ArraySize->getType(); if (!SizeType->isIntegralOrUnscopedEnumerationType()) return ExprError(); // C++98 [expr.new]p7: // The expression in a direct-new-declarator shall have integral type // with a non-negative value. // // Let's see if this is a constant < 0. If so, we reject it out of hand, // per CWG1464. Otherwise, if it's not a constant, we must have an // unparenthesized array type. if (!ArraySize->isValueDependent()) { llvm::APSInt Value; // We've already performed any required implicit conversion to integer or // unscoped enumeration type. // FIXME: Per CWG1464, we are required to check the value prior to // converting to size_t. This will never find a negative array size in // C++14 onwards, because Value is always unsigned here! if (ArraySize->isIntegerConstantExpr(Value, Context)) { if (Value.isSigned() && Value.isNegative()) { return ExprError(Diag(ArraySize->getLocStart(), diag::err_typecheck_negative_array_size) << ArraySize->getSourceRange()); } if (!AllocType->isDependentType()) { unsigned ActiveSizeBits = ConstantArrayType::getNumAddressingBits(Context, AllocType, Value); if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context)) return ExprError(Diag(ArraySize->getLocStart(), diag::err_array_too_large) << Value.toString(10) << ArraySize->getSourceRange()); } KnownArraySize = Value.getZExtValue(); } else if (TypeIdParens.isValid()) { // Can't have dynamic array size when the type-id is in parentheses. Diag(ArraySize->getLocStart(), diag::ext_new_paren_array_nonconst) << ArraySize->getSourceRange() << FixItHint::CreateRemoval(TypeIdParens.getBegin()) << FixItHint::CreateRemoval(TypeIdParens.getEnd()); TypeIdParens = SourceRange(); } } // Note that we do *not* convert the argument in any way. It can // be signed, larger than size_t, whatever. } FunctionDecl *OperatorNew = nullptr; FunctionDecl *OperatorDelete = nullptr; unsigned Alignment = AllocType->isDependentType() ? 0 : Context.getTypeAlign(AllocType); unsigned NewAlignment = Context.getTargetInfo().getNewAlign(); bool PassAlignment = getLangOpts().AlignedAllocation && Alignment > NewAlignment; AllocationFunctionScope Scope = UseGlobal ? AFS_Global : AFS_Both; if (!AllocType->isDependentType() && !Expr::hasAnyTypeDependentArguments(PlacementArgs) && FindAllocationFunctions(StartLoc, SourceRange(PlacementLParen, PlacementRParen), Scope, Scope, AllocType, ArraySize, PassAlignment, PlacementArgs, OperatorNew, OperatorDelete)) return ExprError(); // If this is an array allocation, compute whether the usual array // deallocation function for the type has a size_t parameter. bool UsualArrayDeleteWantsSize = false; if (ArraySize && !AllocType->isDependentType()) UsualArrayDeleteWantsSize = doesUsualArrayDeleteWantSize(*this, StartLoc, AllocType); SmallVector AllPlaceArgs; if (OperatorNew) { const FunctionProtoType *Proto = OperatorNew->getType()->getAs(); VariadicCallType CallType = Proto->isVariadic() ? VariadicFunction : VariadicDoesNotApply; // We've already converted the placement args, just fill in any default // arguments. Skip the first parameter because we don't have a corresponding // argument. Skip the second parameter too if we're passing in the // alignment; we've already filled it in. if (GatherArgumentsForCall(PlacementLParen, OperatorNew, Proto, PassAlignment ? 2 : 1, PlacementArgs, AllPlaceArgs, CallType)) return ExprError(); if (!AllPlaceArgs.empty()) PlacementArgs = AllPlaceArgs; // FIXME: This is wrong: PlacementArgs misses out the first (size) argument. DiagnoseSentinelCalls(OperatorNew, PlacementLParen, PlacementArgs); // FIXME: Missing call to CheckFunctionCall or equivalent // Warn if the type is over-aligned and is being allocated by (unaligned) // global operator new. if (PlacementArgs.empty() && !PassAlignment && (OperatorNew->isImplicit() || (OperatorNew->getLocStart().isValid() && getSourceManager().isInSystemHeader(OperatorNew->getLocStart())))) { if (Alignment > NewAlignment) Diag(StartLoc, diag::warn_overaligned_type) << AllocType << unsigned(Alignment / Context.getCharWidth()) << unsigned(NewAlignment / Context.getCharWidth()); } } // Array 'new' can't have any initializers except empty parentheses. // Initializer lists are also allowed, in C++11. Rely on the parser for the // dialect distinction. if (ArraySize && !isLegalArrayNewInitializer(initStyle, Initializer)) { SourceRange InitRange(Inits[0]->getLocStart(), Inits[NumInits - 1]->getLocEnd()); Diag(StartLoc, diag::err_new_array_init_args) << InitRange; return ExprError(); } // If we can perform the initialization, and we've not already done so, // do it now. if (!AllocType->isDependentType() && !Expr::hasAnyTypeDependentArguments( llvm::makeArrayRef(Inits, NumInits))) { // The type we initialize is the complete type, including the array bound. QualType InitType; if (KnownArraySize) InitType = Context.getConstantArrayType( AllocType, llvm::APInt(Context.getTypeSize(Context.getSizeType()), *KnownArraySize), ArrayType::Normal, 0); else if (ArraySize) InitType = Context.getIncompleteArrayType(AllocType, ArrayType::Normal, 0); else InitType = AllocType; InitializedEntity Entity = InitializedEntity::InitializeNew(StartLoc, InitType); InitializationSequence InitSeq(*this, Entity, Kind, MultiExprArg(Inits, NumInits)); ExprResult FullInit = InitSeq.Perform(*this, Entity, Kind, MultiExprArg(Inits, NumInits)); if (FullInit.isInvalid()) return ExprError(); // FullInit is our initializer; strip off CXXBindTemporaryExprs, because // we don't want the initialized object to be destructed. // FIXME: We should not create these in the first place. if (CXXBindTemporaryExpr *Binder = dyn_cast_or_null(FullInit.get())) FullInit = Binder->getSubExpr(); Initializer = FullInit.get(); } // Mark the new and delete operators as referenced. if (OperatorNew) { if (DiagnoseUseOfDecl(OperatorNew, StartLoc)) return ExprError(); MarkFunctionReferenced(StartLoc, OperatorNew); diagnoseUnavailableAlignedAllocation(*OperatorNew, StartLoc, false, *this); } if (OperatorDelete) { if (DiagnoseUseOfDecl(OperatorDelete, StartLoc)) return ExprError(); MarkFunctionReferenced(StartLoc, OperatorDelete); diagnoseUnavailableAlignedAllocation(*OperatorDelete, StartLoc, true, *this); } // C++0x [expr.new]p17: // If the new expression creates an array of objects of class type, // access and ambiguity control are done for the destructor. QualType BaseAllocType = Context.getBaseElementType(AllocType); if (ArraySize && !BaseAllocType->isDependentType()) { if (const RecordType *BaseRecordType = BaseAllocType->getAs()) { if (CXXDestructorDecl *dtor = LookupDestructor( cast(BaseRecordType->getDecl()))) { MarkFunctionReferenced(StartLoc, dtor); CheckDestructorAccess(StartLoc, dtor, PDiag(diag::err_access_dtor) << BaseAllocType); if (DiagnoseUseOfDecl(dtor, StartLoc)) return ExprError(); } } } return new (Context) CXXNewExpr(Context, UseGlobal, OperatorNew, OperatorDelete, PassAlignment, UsualArrayDeleteWantsSize, PlacementArgs, TypeIdParens, ArraySize, initStyle, Initializer, ResultType, AllocTypeInfo, Range, DirectInitRange); } /// Checks that a type is suitable as the allocated type /// in a new-expression. bool Sema::CheckAllocatedType(QualType AllocType, SourceLocation Loc, SourceRange R) { // C++ 5.3.4p1: "[The] type shall be a complete object type, but not an // abstract class type or array thereof. if (AllocType->isFunctionType()) return Diag(Loc, diag::err_bad_new_type) << AllocType << 0 << R; else if (AllocType->isReferenceType()) return Diag(Loc, diag::err_bad_new_type) << AllocType << 1 << R; else if (!AllocType->isDependentType() && RequireCompleteType(Loc, AllocType, diag::err_new_incomplete_type,R)) return true; else if (RequireNonAbstractType(Loc, AllocType, diag::err_allocation_of_abstract_type)) return true; else if (AllocType->isVariablyModifiedType()) return Diag(Loc, diag::err_variably_modified_new_type) << AllocType; else if (AllocType.getAddressSpace() != LangAS::Default && !getLangOpts().OpenCLCPlusPlus) return Diag(Loc, diag::err_address_space_qualified_new) << AllocType.getUnqualifiedType() << AllocType.getQualifiers().getAddressSpaceAttributePrintValue(); else if (getLangOpts().ObjCAutoRefCount) { if (const ArrayType *AT = Context.getAsArrayType(AllocType)) { QualType BaseAllocType = Context.getBaseElementType(AT); if (BaseAllocType.getObjCLifetime() == Qualifiers::OCL_None && BaseAllocType->isObjCLifetimeType()) return Diag(Loc, diag::err_arc_new_array_without_ownership) << BaseAllocType; } } return false; } static bool resolveAllocationOverload( Sema &S, LookupResult &R, SourceRange Range, SmallVectorImpl &Args, bool &PassAlignment, FunctionDecl *&Operator, OverloadCandidateSet *AlignedCandidates, Expr *AlignArg, bool Diagnose) { OverloadCandidateSet Candidates(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); for (LookupResult::iterator Alloc = R.begin(), AllocEnd = R.end(); Alloc != AllocEnd; ++Alloc) { // Even member operator new/delete are implicitly treated as // static, so don't use AddMemberCandidate. NamedDecl *D = (*Alloc)->getUnderlyingDecl(); if (FunctionTemplateDecl *FnTemplate = dyn_cast(D)) { S.AddTemplateOverloadCandidate(FnTemplate, Alloc.getPair(), /*ExplicitTemplateArgs=*/nullptr, Args, Candidates, /*SuppressUserConversions=*/false); continue; } FunctionDecl *Fn = cast(D); S.AddOverloadCandidate(Fn, Alloc.getPair(), Args, Candidates, /*SuppressUserConversions=*/false); } // Do the resolution. OverloadCandidateSet::iterator Best; switch (Candidates.BestViableFunction(S, R.getNameLoc(), Best)) { case OR_Success: { // Got one! FunctionDecl *FnDecl = Best->Function; if (S.CheckAllocationAccess(R.getNameLoc(), Range, R.getNamingClass(), Best->FoundDecl) == Sema::AR_inaccessible) return true; Operator = FnDecl; return false; } case OR_No_Viable_Function: // C++17 [expr.new]p13: // If no matching function is found and the allocated object type has // new-extended alignment, the alignment argument is removed from the // argument list, and overload resolution is performed again. if (PassAlignment) { PassAlignment = false; AlignArg = Args[1]; Args.erase(Args.begin() + 1); return resolveAllocationOverload(S, R, Range, Args, PassAlignment, Operator, &Candidates, AlignArg, Diagnose); } // MSVC will fall back on trying to find a matching global operator new // if operator new[] cannot be found. Also, MSVC will leak by not // generating a call to operator delete or operator delete[], but we // will not replicate that bug. // FIXME: Find out how this interacts with the std::align_val_t fallback // once MSVC implements it. if (R.getLookupName().getCXXOverloadedOperator() == OO_Array_New && S.Context.getLangOpts().MSVCCompat) { R.clear(); R.setLookupName(S.Context.DeclarationNames.getCXXOperatorName(OO_New)); S.LookupQualifiedName(R, S.Context.getTranslationUnitDecl()); // FIXME: This will give bad diagnostics pointing at the wrong functions. return resolveAllocationOverload(S, R, Range, Args, PassAlignment, Operator, /*Candidates=*/nullptr, /*AlignArg=*/nullptr, Diagnose); } if (Diagnose) { S.Diag(R.getNameLoc(), diag::err_ovl_no_viable_function_in_call) << R.getLookupName() << Range; // If we have aligned candidates, only note the align_val_t candidates // from AlignedCandidates and the non-align_val_t candidates from // Candidates. if (AlignedCandidates) { auto IsAligned = [](OverloadCandidate &C) { return C.Function->getNumParams() > 1 && C.Function->getParamDecl(1)->getType()->isAlignValT(); }; auto IsUnaligned = [&](OverloadCandidate &C) { return !IsAligned(C); }; // This was an overaligned allocation, so list the aligned candidates // first. Args.insert(Args.begin() + 1, AlignArg); AlignedCandidates->NoteCandidates(S, OCD_AllCandidates, Args, "", R.getNameLoc(), IsAligned); Args.erase(Args.begin() + 1); Candidates.NoteCandidates(S, OCD_AllCandidates, Args, "", R.getNameLoc(), IsUnaligned); } else { Candidates.NoteCandidates(S, OCD_AllCandidates, Args); } } return true; case OR_Ambiguous: if (Diagnose) { S.Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName() << Range; Candidates.NoteCandidates(S, OCD_ViableCandidates, Args); } return true; case OR_Deleted: { if (Diagnose) { S.Diag(R.getNameLoc(), diag::err_ovl_deleted_call) << Best->Function->isDeleted() << R.getLookupName() << S.getDeletedOrUnavailableSuffix(Best->Function) << Range; Candidates.NoteCandidates(S, OCD_AllCandidates, Args); } return true; } } llvm_unreachable("Unreachable, bad result from BestViableFunction"); } bool Sema::FindAllocationFunctions(SourceLocation StartLoc, SourceRange Range, AllocationFunctionScope NewScope, AllocationFunctionScope DeleteScope, QualType AllocType, bool IsArray, bool &PassAlignment, MultiExprArg PlaceArgs, FunctionDecl *&OperatorNew, FunctionDecl *&OperatorDelete, bool Diagnose) { // --- Choosing an allocation function --- // C++ 5.3.4p8 - 14 & 18 // 1) If looking in AFS_Global scope for allocation functions, only look in // the global scope. Else, if AFS_Class, only look in the scope of the // allocated class. If AFS_Both, look in both. // 2) If an array size is given, look for operator new[], else look for // operator new. // 3) The first argument is always size_t. Append the arguments from the // placement form. SmallVector AllocArgs; AllocArgs.reserve((PassAlignment ? 2 : 1) + PlaceArgs.size()); // We don't care about the actual value of these arguments. // FIXME: Should the Sema create the expression and embed it in the syntax // tree? Or should the consumer just recalculate the value? // FIXME: Using a dummy value will interact poorly with attribute enable_if. IntegerLiteral Size(Context, llvm::APInt::getNullValue( Context.getTargetInfo().getPointerWidth(0)), Context.getSizeType(), SourceLocation()); AllocArgs.push_back(&Size); QualType AlignValT = Context.VoidTy; if (PassAlignment) { DeclareGlobalNewDelete(); AlignValT = Context.getTypeDeclType(getStdAlignValT()); } CXXScalarValueInitExpr Align(AlignValT, nullptr, SourceLocation()); if (PassAlignment) AllocArgs.push_back(&Align); AllocArgs.insert(AllocArgs.end(), PlaceArgs.begin(), PlaceArgs.end()); // C++ [expr.new]p8: // If the allocated type is a non-array type, the allocation // function's name is operator new and the deallocation function's // name is operator delete. If the allocated type is an array // type, the allocation function's name is operator new[] and the // deallocation function's name is operator delete[]. DeclarationName NewName = Context.DeclarationNames.getCXXOperatorName( IsArray ? OO_Array_New : OO_New); QualType AllocElemType = Context.getBaseElementType(AllocType); // Find the allocation function. { LookupResult R(*this, NewName, StartLoc, LookupOrdinaryName); // C++1z [expr.new]p9: // If the new-expression begins with a unary :: operator, the allocation // function's name is looked up in the global scope. Otherwise, if the // allocated type is a class type T or array thereof, the allocation // function's name is looked up in the scope of T. if (AllocElemType->isRecordType() && NewScope != AFS_Global) LookupQualifiedName(R, AllocElemType->getAsCXXRecordDecl()); // We can see ambiguity here if the allocation function is found in // multiple base classes. if (R.isAmbiguous()) return true; // If this lookup fails to find the name, or if the allocated type is not // a class type, the allocation function's name is looked up in the // global scope. if (R.empty()) { if (NewScope == AFS_Class) return true; LookupQualifiedName(R, Context.getTranslationUnitDecl()); } if (getLangOpts().OpenCLCPlusPlus && R.empty()) { Diag(StartLoc, diag::err_openclcxx_not_supported) << "default new"; return true; } assert(!R.empty() && "implicitly declared allocation functions not found"); assert(!R.isAmbiguous() && "global allocation functions are ambiguous"); // We do our own custom access checks below. R.suppressDiagnostics(); if (resolveAllocationOverload(*this, R, Range, AllocArgs, PassAlignment, OperatorNew, /*Candidates=*/nullptr, /*AlignArg=*/nullptr, Diagnose)) return true; } // We don't need an operator delete if we're running under -fno-exceptions. if (!getLangOpts().Exceptions) { OperatorDelete = nullptr; return false; } // Note, the name of OperatorNew might have been changed from array to // non-array by resolveAllocationOverload. DeclarationName DeleteName = Context.DeclarationNames.getCXXOperatorName( OperatorNew->getDeclName().getCXXOverloadedOperator() == OO_Array_New ? OO_Array_Delete : OO_Delete); // C++ [expr.new]p19: // // If the new-expression begins with a unary :: operator, the // deallocation function's name is looked up in the global // scope. Otherwise, if the allocated type is a class type T or an // array thereof, the deallocation function's name is looked up in // the scope of T. If this lookup fails to find the name, or if // the allocated type is not a class type or array thereof, the // deallocation function's name is looked up in the global scope. LookupResult FoundDelete(*this, DeleteName, StartLoc, LookupOrdinaryName); if (AllocElemType->isRecordType() && DeleteScope != AFS_Global) { CXXRecordDecl *RD = cast(AllocElemType->getAs()->getDecl()); LookupQualifiedName(FoundDelete, RD); } if (FoundDelete.isAmbiguous()) return true; // FIXME: clean up expressions? bool FoundGlobalDelete = FoundDelete.empty(); if (FoundDelete.empty()) { if (DeleteScope == AFS_Class) return true; DeclareGlobalNewDelete(); LookupQualifiedName(FoundDelete, Context.getTranslationUnitDecl()); } FoundDelete.suppressDiagnostics(); SmallVector, 2> Matches; // Whether we're looking for a placement operator delete is dictated // by whether we selected a placement operator new, not by whether // we had explicit placement arguments. This matters for things like // struct A { void *operator new(size_t, int = 0); ... }; // A *a = new A() // // We don't have any definition for what a "placement allocation function" // is, but we assume it's any allocation function whose // parameter-declaration-clause is anything other than (size_t). // // FIXME: Should (size_t, std::align_val_t) also be considered non-placement? // This affects whether an exception from the constructor of an overaligned // type uses the sized or non-sized form of aligned operator delete. bool isPlacementNew = !PlaceArgs.empty() || OperatorNew->param_size() != 1 || OperatorNew->isVariadic(); if (isPlacementNew) { // C++ [expr.new]p20: // A declaration of a placement deallocation function matches the // declaration of a placement allocation function if it has the // same number of parameters and, after parameter transformations // (8.3.5), all parameter types except the first are // identical. [...] // // To perform this comparison, we compute the function type that // the deallocation function should have, and use that type both // for template argument deduction and for comparison purposes. QualType ExpectedFunctionType; { const FunctionProtoType *Proto = OperatorNew->getType()->getAs(); SmallVector ArgTypes; ArgTypes.push_back(Context.VoidPtrTy); for (unsigned I = 1, N = Proto->getNumParams(); I < N; ++I) ArgTypes.push_back(Proto->getParamType(I)); FunctionProtoType::ExtProtoInfo EPI; // FIXME: This is not part of the standard's rule. EPI.Variadic = Proto->isVariadic(); ExpectedFunctionType = Context.getFunctionType(Context.VoidTy, ArgTypes, EPI); } for (LookupResult::iterator D = FoundDelete.begin(), DEnd = FoundDelete.end(); D != DEnd; ++D) { FunctionDecl *Fn = nullptr; if (FunctionTemplateDecl *FnTmpl = dyn_cast((*D)->getUnderlyingDecl())) { // Perform template argument deduction to try to match the // expected function type. TemplateDeductionInfo Info(StartLoc); if (DeduceTemplateArguments(FnTmpl, nullptr, ExpectedFunctionType, Fn, Info)) continue; } else Fn = cast((*D)->getUnderlyingDecl()); if (Context.hasSameType(adjustCCAndNoReturn(Fn->getType(), ExpectedFunctionType, /*AdjustExcpetionSpec*/true), ExpectedFunctionType)) Matches.push_back(std::make_pair(D.getPair(), Fn)); } if (getLangOpts().CUDA) EraseUnwantedCUDAMatches(dyn_cast(CurContext), Matches); } else { // C++1y [expr.new]p22: // For a non-placement allocation function, the normal deallocation // function lookup is used // // Per [expr.delete]p10, this lookup prefers a member operator delete // without a size_t argument, but prefers a non-member operator delete // with a size_t where possible (which it always is in this case). llvm::SmallVector BestDeallocFns; UsualDeallocFnInfo Selected = resolveDeallocationOverload( *this, FoundDelete, /*WantSize*/ FoundGlobalDelete, /*WantAlign*/ hasNewExtendedAlignment(*this, AllocElemType), &BestDeallocFns); if (Selected) Matches.push_back(std::make_pair(Selected.Found, Selected.FD)); else { // If we failed to select an operator, all remaining functions are viable // but ambiguous. for (auto Fn : BestDeallocFns) Matches.push_back(std::make_pair(Fn.Found, Fn.FD)); } } // C++ [expr.new]p20: // [...] If the lookup finds a single matching deallocation // function, that function will be called; otherwise, no // deallocation function will be called. if (Matches.size() == 1) { OperatorDelete = Matches[0].second; // C++1z [expr.new]p23: // If the lookup finds a usual deallocation function (3.7.4.2) // with a parameter of type std::size_t and that function, considered // as a placement deallocation function, would have been // selected as a match for the allocation function, the program // is ill-formed. if (getLangOpts().CPlusPlus11 && isPlacementNew && isNonPlacementDeallocationFunction(*this, OperatorDelete)) { UsualDeallocFnInfo Info(*this, DeclAccessPair::make(OperatorDelete, AS_public)); // Core issue, per mail to core reflector, 2016-10-09: // If this is a member operator delete, and there is a corresponding // non-sized member operator delete, this isn't /really/ a sized // deallocation function, it just happens to have a size_t parameter. bool IsSizedDelete = Info.HasSizeT; if (IsSizedDelete && !FoundGlobalDelete) { auto NonSizedDelete = resolveDeallocationOverload(*this, FoundDelete, /*WantSize*/false, /*WantAlign*/Info.HasAlignValT); if (NonSizedDelete && !NonSizedDelete.HasSizeT && NonSizedDelete.HasAlignValT == Info.HasAlignValT) IsSizedDelete = false; } if (IsSizedDelete) { SourceRange R = PlaceArgs.empty() ? SourceRange() : SourceRange(PlaceArgs.front()->getLocStart(), PlaceArgs.back()->getLocEnd()); Diag(StartLoc, diag::err_placement_new_non_placement_delete) << R; if (!OperatorDelete->isImplicit()) Diag(OperatorDelete->getLocation(), diag::note_previous_decl) << DeleteName; } } CheckAllocationAccess(StartLoc, Range, FoundDelete.getNamingClass(), Matches[0].first); } else if (!Matches.empty()) { // We found multiple suitable operators. Per [expr.new]p20, that means we // call no 'operator delete' function, but we should at least warn the user. // FIXME: Suppress this warning if the construction cannot throw. Diag(StartLoc, diag::warn_ambiguous_suitable_delete_function_found) << DeleteName << AllocElemType; for (auto &Match : Matches) Diag(Match.second->getLocation(), diag::note_member_declared_here) << DeleteName; } return false; } /// DeclareGlobalNewDelete - Declare the global forms of operator new and /// delete. These are: /// @code /// // C++03: /// void* operator new(std::size_t) throw(std::bad_alloc); /// void* operator new[](std::size_t) throw(std::bad_alloc); /// void operator delete(void *) throw(); /// void operator delete[](void *) throw(); /// // C++11: /// void* operator new(std::size_t); /// void* operator new[](std::size_t); /// void operator delete(void *) noexcept; /// void operator delete[](void *) noexcept; /// // C++1y: /// void* operator new(std::size_t); /// void* operator new[](std::size_t); /// void operator delete(void *) noexcept; /// void operator delete[](void *) noexcept; /// void operator delete(void *, std::size_t) noexcept; /// void operator delete[](void *, std::size_t) noexcept; /// @endcode /// Note that the placement and nothrow forms of new are *not* implicitly /// declared. Their use requires including \. void Sema::DeclareGlobalNewDelete() { if (GlobalNewDeleteDeclared) return; // OpenCL C++ 1.0 s2.9: the implicitly declared new and delete operators // are not supported. if (getLangOpts().OpenCLCPlusPlus) return; // C++ [basic.std.dynamic]p2: // [...] The following allocation and deallocation functions (18.4) are // implicitly declared in global scope in each translation unit of a // program // // C++03: // void* operator new(std::size_t) throw(std::bad_alloc); // void* operator new[](std::size_t) throw(std::bad_alloc); // void operator delete(void*) throw(); // void operator delete[](void*) throw(); // C++11: // void* operator new(std::size_t); // void* operator new[](std::size_t); // void operator delete(void*) noexcept; // void operator delete[](void*) noexcept; // C++1y: // void* operator new(std::size_t); // void* operator new[](std::size_t); // void operator delete(void*) noexcept; // void operator delete[](void*) noexcept; // void operator delete(void*, std::size_t) noexcept; // void operator delete[](void*, std::size_t) noexcept; // // These implicit declarations introduce only the function names operator // new, operator new[], operator delete, operator delete[]. // // Here, we need to refer to std::bad_alloc, so we will implicitly declare // "std" or "bad_alloc" as necessary to form the exception specification. // However, we do not make these implicit declarations visible to name // lookup. if (!StdBadAlloc && !getLangOpts().CPlusPlus11) { // The "std::bad_alloc" class has not yet been declared, so build it // implicitly. StdBadAlloc = CXXRecordDecl::Create(Context, TTK_Class, getOrCreateStdNamespace(), SourceLocation(), SourceLocation(), &PP.getIdentifierTable().get("bad_alloc"), nullptr); getStdBadAlloc()->setImplicit(true); } if (!StdAlignValT && getLangOpts().AlignedAllocation) { // The "std::align_val_t" enum class has not yet been declared, so build it // implicitly. auto *AlignValT = EnumDecl::Create( Context, getOrCreateStdNamespace(), SourceLocation(), SourceLocation(), &PP.getIdentifierTable().get("align_val_t"), nullptr, true, true, true); AlignValT->setIntegerType(Context.getSizeType()); AlignValT->setPromotionType(Context.getSizeType()); AlignValT->setImplicit(true); StdAlignValT = AlignValT; } GlobalNewDeleteDeclared = true; QualType VoidPtr = Context.getPointerType(Context.VoidTy); QualType SizeT = Context.getSizeType(); auto DeclareGlobalAllocationFunctions = [&](OverloadedOperatorKind Kind, QualType Return, QualType Param) { llvm::SmallVector Params; Params.push_back(Param); // Create up to four variants of the function (sized/aligned). bool HasSizedVariant = getLangOpts().SizedDeallocation && (Kind == OO_Delete || Kind == OO_Array_Delete); bool HasAlignedVariant = getLangOpts().AlignedAllocation; int NumSizeVariants = (HasSizedVariant ? 2 : 1); int NumAlignVariants = (HasAlignedVariant ? 2 : 1); for (int Sized = 0; Sized < NumSizeVariants; ++Sized) { if (Sized) Params.push_back(SizeT); for (int Aligned = 0; Aligned < NumAlignVariants; ++Aligned) { if (Aligned) Params.push_back(Context.getTypeDeclType(getStdAlignValT())); DeclareGlobalAllocationFunction( Context.DeclarationNames.getCXXOperatorName(Kind), Return, Params); if (Aligned) Params.pop_back(); } } }; DeclareGlobalAllocationFunctions(OO_New, VoidPtr, SizeT); DeclareGlobalAllocationFunctions(OO_Array_New, VoidPtr, SizeT); DeclareGlobalAllocationFunctions(OO_Delete, Context.VoidTy, VoidPtr); DeclareGlobalAllocationFunctions(OO_Array_Delete, Context.VoidTy, VoidPtr); } /// DeclareGlobalAllocationFunction - Declares a single implicit global /// allocation function if it doesn't already exist. void Sema::DeclareGlobalAllocationFunction(DeclarationName Name, QualType Return, ArrayRef Params) { DeclContext *GlobalCtx = Context.getTranslationUnitDecl(); // Check if this function is already declared. DeclContext::lookup_result R = GlobalCtx->lookup(Name); for (DeclContext::lookup_iterator Alloc = R.begin(), AllocEnd = R.end(); Alloc != AllocEnd; ++Alloc) { // Only look at non-template functions, as it is the predefined, // non-templated allocation function we are trying to declare here. if (FunctionDecl *Func = dyn_cast(*Alloc)) { if (Func->getNumParams() == Params.size()) { llvm::SmallVector FuncParams; for (auto *P : Func->parameters()) FuncParams.push_back( Context.getCanonicalType(P->getType().getUnqualifiedType())); if (llvm::makeArrayRef(FuncParams) == Params) { // Make the function visible to name lookup, even if we found it in // an unimported module. It either is an implicitly-declared global // allocation function, or is suppressing that function. Func->setVisibleDespiteOwningModule(); return; } } } } FunctionProtoType::ExtProtoInfo EPI; QualType BadAllocType; bool HasBadAllocExceptionSpec = (Name.getCXXOverloadedOperator() == OO_New || Name.getCXXOverloadedOperator() == OO_Array_New); if (HasBadAllocExceptionSpec) { if (!getLangOpts().CPlusPlus11) { BadAllocType = Context.getTypeDeclType(getStdBadAlloc()); assert(StdBadAlloc && "Must have std::bad_alloc declared"); EPI.ExceptionSpec.Type = EST_Dynamic; EPI.ExceptionSpec.Exceptions = llvm::makeArrayRef(BadAllocType); } } else { EPI.ExceptionSpec = getLangOpts().CPlusPlus11 ? EST_BasicNoexcept : EST_DynamicNone; } auto CreateAllocationFunctionDecl = [&](Attr *ExtraAttr) { QualType FnType = Context.getFunctionType(Return, Params, EPI); FunctionDecl *Alloc = FunctionDecl::Create( Context, GlobalCtx, SourceLocation(), SourceLocation(), Name, FnType, /*TInfo=*/nullptr, SC_None, false, true); Alloc->setImplicit(); // Global allocation functions should always be visible. Alloc->setVisibleDespiteOwningModule(); // Implicit sized deallocation functions always have default visibility. Alloc->addAttr( VisibilityAttr::CreateImplicit(Context, VisibilityAttr::Default)); llvm::SmallVector ParamDecls; for (QualType T : Params) { ParamDecls.push_back(ParmVarDecl::Create( Context, Alloc, SourceLocation(), SourceLocation(), nullptr, T, /*TInfo=*/nullptr, SC_None, nullptr)); ParamDecls.back()->setImplicit(); } Alloc->setParams(ParamDecls); if (ExtraAttr) Alloc->addAttr(ExtraAttr); Context.getTranslationUnitDecl()->addDecl(Alloc); IdResolver.tryAddTopLevelDecl(Alloc, Name); }; if (!LangOpts.CUDA) CreateAllocationFunctionDecl(nullptr); else { // Host and device get their own declaration so each can be // defined or re-declared independently. CreateAllocationFunctionDecl(CUDAHostAttr::CreateImplicit(Context)); CreateAllocationFunctionDecl(CUDADeviceAttr::CreateImplicit(Context)); } } FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc, bool CanProvideSize, bool Overaligned, DeclarationName Name) { DeclareGlobalNewDelete(); LookupResult FoundDelete(*this, Name, StartLoc, LookupOrdinaryName); LookupQualifiedName(FoundDelete, Context.getTranslationUnitDecl()); // FIXME: It's possible for this to result in ambiguity, through a // user-declared variadic operator delete or the enable_if attribute. We // should probably not consider those cases to be usual deallocation // functions. But for now we just make an arbitrary choice in that case. auto Result = resolveDeallocationOverload(*this, FoundDelete, CanProvideSize, Overaligned); assert(Result.FD && "operator delete missing from global scope?"); return Result.FD; } FunctionDecl *Sema::FindDeallocationFunctionForDestructor(SourceLocation Loc, CXXRecordDecl *RD) { DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Delete); FunctionDecl *OperatorDelete = nullptr; if (FindDeallocationFunction(Loc, RD, Name, OperatorDelete)) return nullptr; if (OperatorDelete) return OperatorDelete; // If there's no class-specific operator delete, look up the global // non-array delete. return FindUsualDeallocationFunction( Loc, true, hasNewExtendedAlignment(*this, Context.getRecordType(RD)), Name); } bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD, DeclarationName Name, FunctionDecl *&Operator, bool Diagnose) { LookupResult Found(*this, Name, StartLoc, LookupOrdinaryName); // Try to find operator delete/operator delete[] in class scope. LookupQualifiedName(Found, RD); if (Found.isAmbiguous()) return true; Found.suppressDiagnostics(); bool Overaligned = hasNewExtendedAlignment(*this, Context.getRecordType(RD)); // C++17 [expr.delete]p10: // If the deallocation functions have class scope, the one without a // parameter of type std::size_t is selected. llvm::SmallVector Matches; resolveDeallocationOverload(*this, Found, /*WantSize*/ false, /*WantAlign*/ Overaligned, &Matches); // If we could find an overload, use it. if (Matches.size() == 1) { Operator = cast(Matches[0].FD); // FIXME: DiagnoseUseOfDecl? if (Operator->isDeleted()) { if (Diagnose) { Diag(StartLoc, diag::err_deleted_function_use); NoteDeletedFunction(Operator); } return true; } if (CheckAllocationAccess(StartLoc, SourceRange(), Found.getNamingClass(), Matches[0].Found, Diagnose) == AR_inaccessible) return true; return false; } // We found multiple suitable operators; complain about the ambiguity. // FIXME: The standard doesn't say to do this; it appears that the intent // is that this should never happen. if (!Matches.empty()) { if (Diagnose) { Diag(StartLoc, diag::err_ambiguous_suitable_delete_member_function_found) << Name << RD; for (auto &Match : Matches) Diag(Match.FD->getLocation(), diag::note_member_declared_here) << Name; } return true; } // We did find operator delete/operator delete[] declarations, but // none of them were suitable. if (!Found.empty()) { if (Diagnose) { Diag(StartLoc, diag::err_no_suitable_delete_member_function_found) << Name << RD; for (NamedDecl *D : Found) Diag(D->getUnderlyingDecl()->getLocation(), diag::note_member_declared_here) << Name; } return true; } Operator = nullptr; return false; } namespace { /// Checks whether delete-expression, and new-expression used for /// initializing deletee have the same array form. class MismatchingNewDeleteDetector { public: enum MismatchResult { /// Indicates that there is no mismatch or a mismatch cannot be proven. NoMismatch, /// Indicates that variable is initialized with mismatching form of \a new. VarInitMismatches, /// Indicates that member is initialized with mismatching form of \a new. MemberInitMismatches, /// Indicates that 1 or more constructors' definitions could not been /// analyzed, and they will be checked again at the end of translation unit. AnalyzeLater }; /// \param EndOfTU True, if this is the final analysis at the end of /// translation unit. False, if this is the initial analysis at the point /// delete-expression was encountered. explicit MismatchingNewDeleteDetector(bool EndOfTU) : Field(nullptr), IsArrayForm(false), EndOfTU(EndOfTU), HasUndefinedConstructors(false) {} /// Checks whether pointee of a delete-expression is initialized with /// matching form of new-expression. /// /// If return value is \c VarInitMismatches or \c MemberInitMismatches at the /// point where delete-expression is encountered, then a warning will be /// issued immediately. If return value is \c AnalyzeLater at the point where /// delete-expression is seen, then member will be analyzed at the end of /// translation unit. \c AnalyzeLater is returned iff at least one constructor /// couldn't be analyzed. If at least one constructor initializes the member /// with matching type of new, the return value is \c NoMismatch. MismatchResult analyzeDeleteExpr(const CXXDeleteExpr *DE); /// Analyzes a class member. /// \param Field Class member to analyze. /// \param DeleteWasArrayForm Array form-ness of the delete-expression used /// for deleting the \p Field. MismatchResult analyzeField(FieldDecl *Field, bool DeleteWasArrayForm); FieldDecl *Field; /// List of mismatching new-expressions used for initialization of the pointee llvm::SmallVector NewExprs; /// Indicates whether delete-expression was in array form. bool IsArrayForm; private: const bool EndOfTU; /// Indicates that there is at least one constructor without body. bool HasUndefinedConstructors; /// Returns \c CXXNewExpr from given initialization expression. /// \param E Expression used for initializing pointee in delete-expression. /// E can be a single-element \c InitListExpr consisting of new-expression. const CXXNewExpr *getNewExprFromInitListOrExpr(const Expr *E); /// Returns whether member is initialized with mismatching form of /// \c new either by the member initializer or in-class initialization. /// /// If bodies of all constructors are not visible at the end of translation /// unit or at least one constructor initializes member with the matching /// form of \c new, mismatch cannot be proven, and this function will return /// \c NoMismatch. MismatchResult analyzeMemberExpr(const MemberExpr *ME); /// Returns whether variable is initialized with mismatching form of /// \c new. /// /// If variable is initialized with matching form of \c new or variable is not /// initialized with a \c new expression, this function will return true. /// If variable is initialized with mismatching form of \c new, returns false. /// \param D Variable to analyze. bool hasMatchingVarInit(const DeclRefExpr *D); /// Checks whether the constructor initializes pointee with mismatching /// form of \c new. /// /// Returns true, if member is initialized with matching form of \c new in /// member initializer list. Returns false, if member is initialized with the /// matching form of \c new in this constructor's initializer or given /// constructor isn't defined at the point where delete-expression is seen, or /// member isn't initialized by the constructor. bool hasMatchingNewInCtor(const CXXConstructorDecl *CD); /// Checks whether member is initialized with matching form of /// \c new in member initializer list. bool hasMatchingNewInCtorInit(const CXXCtorInitializer *CI); /// Checks whether member is initialized with mismatching form of \c new by /// in-class initializer. MismatchResult analyzeInClassInitializer(); }; } MismatchingNewDeleteDetector::MismatchResult MismatchingNewDeleteDetector::analyzeDeleteExpr(const CXXDeleteExpr *DE) { NewExprs.clear(); assert(DE && "Expected delete-expression"); IsArrayForm = DE->isArrayForm(); const Expr *E = DE->getArgument()->IgnoreParenImpCasts(); if (const MemberExpr *ME = dyn_cast(E)) { return analyzeMemberExpr(ME); } else if (const DeclRefExpr *D = dyn_cast(E)) { if (!hasMatchingVarInit(D)) return VarInitMismatches; } return NoMismatch; } const CXXNewExpr * MismatchingNewDeleteDetector::getNewExprFromInitListOrExpr(const Expr *E) { assert(E != nullptr && "Expected a valid initializer expression"); E = E->IgnoreParenImpCasts(); if (const InitListExpr *ILE = dyn_cast(E)) { if (ILE->getNumInits() == 1) E = dyn_cast(ILE->getInit(0)->IgnoreParenImpCasts()); } return dyn_cast_or_null(E); } bool MismatchingNewDeleteDetector::hasMatchingNewInCtorInit( const CXXCtorInitializer *CI) { const CXXNewExpr *NE = nullptr; if (Field == CI->getMember() && (NE = getNewExprFromInitListOrExpr(CI->getInit()))) { if (NE->isArray() == IsArrayForm) return true; else NewExprs.push_back(NE); } return false; } bool MismatchingNewDeleteDetector::hasMatchingNewInCtor( const CXXConstructorDecl *CD) { if (CD->isImplicit()) return false; const FunctionDecl *Definition = CD; if (!CD->isThisDeclarationADefinition() && !CD->isDefined(Definition)) { HasUndefinedConstructors = true; return EndOfTU; } for (const auto *CI : cast(Definition)->inits()) { if (hasMatchingNewInCtorInit(CI)) return true; } return false; } MismatchingNewDeleteDetector::MismatchResult MismatchingNewDeleteDetector::analyzeInClassInitializer() { assert(Field != nullptr && "This should be called only for members"); const Expr *InitExpr = Field->getInClassInitializer(); if (!InitExpr) return EndOfTU ? NoMismatch : AnalyzeLater; if (const CXXNewExpr *NE = getNewExprFromInitListOrExpr(InitExpr)) { if (NE->isArray() != IsArrayForm) { NewExprs.push_back(NE); return MemberInitMismatches; } } return NoMismatch; } MismatchingNewDeleteDetector::MismatchResult MismatchingNewDeleteDetector::analyzeField(FieldDecl *Field, bool DeleteWasArrayForm) { assert(Field != nullptr && "Analysis requires a valid class member."); this->Field = Field; IsArrayForm = DeleteWasArrayForm; const CXXRecordDecl *RD = cast(Field->getParent()); for (const auto *CD : RD->ctors()) { if (hasMatchingNewInCtor(CD)) return NoMismatch; } if (HasUndefinedConstructors) return EndOfTU ? NoMismatch : AnalyzeLater; if (!NewExprs.empty()) return MemberInitMismatches; return Field->hasInClassInitializer() ? analyzeInClassInitializer() : NoMismatch; } MismatchingNewDeleteDetector::MismatchResult MismatchingNewDeleteDetector::analyzeMemberExpr(const MemberExpr *ME) { assert(ME != nullptr && "Expected a member expression"); if (FieldDecl *F = dyn_cast(ME->getMemberDecl())) return analyzeField(F, IsArrayForm); return NoMismatch; } bool MismatchingNewDeleteDetector::hasMatchingVarInit(const DeclRefExpr *D) { const CXXNewExpr *NE = nullptr; if (const VarDecl *VD = dyn_cast(D->getDecl())) { if (VD->hasInit() && (NE = getNewExprFromInitListOrExpr(VD->getInit())) && NE->isArray() != IsArrayForm) { NewExprs.push_back(NE); } } return NewExprs.empty(); } static void DiagnoseMismatchedNewDelete(Sema &SemaRef, SourceLocation DeleteLoc, const MismatchingNewDeleteDetector &Detector) { SourceLocation EndOfDelete = SemaRef.getLocForEndOfToken(DeleteLoc); FixItHint H; if (!Detector.IsArrayForm) H = FixItHint::CreateInsertion(EndOfDelete, "[]"); else { SourceLocation RSquare = Lexer::findLocationAfterToken( DeleteLoc, tok::l_square, SemaRef.getSourceManager(), SemaRef.getLangOpts(), true); if (RSquare.isValid()) H = FixItHint::CreateRemoval(SourceRange(EndOfDelete, RSquare)); } SemaRef.Diag(DeleteLoc, diag::warn_mismatched_delete_new) << Detector.IsArrayForm << H; for (const auto *NE : Detector.NewExprs) SemaRef.Diag(NE->getExprLoc(), diag::note_allocated_here) << Detector.IsArrayForm; } void Sema::AnalyzeDeleteExprMismatch(const CXXDeleteExpr *DE) { if (Diags.isIgnored(diag::warn_mismatched_delete_new, SourceLocation())) return; MismatchingNewDeleteDetector Detector(/*EndOfTU=*/false); switch (Detector.analyzeDeleteExpr(DE)) { case MismatchingNewDeleteDetector::VarInitMismatches: case MismatchingNewDeleteDetector::MemberInitMismatches: { DiagnoseMismatchedNewDelete(*this, DE->getLocStart(), Detector); break; } case MismatchingNewDeleteDetector::AnalyzeLater: { DeleteExprs[Detector.Field].push_back( std::make_pair(DE->getLocStart(), DE->isArrayForm())); break; } case MismatchingNewDeleteDetector::NoMismatch: break; } } void Sema::AnalyzeDeleteExprMismatch(FieldDecl *Field, SourceLocation DeleteLoc, bool DeleteWasArrayForm) { MismatchingNewDeleteDetector Detector(/*EndOfTU=*/true); switch (Detector.analyzeField(Field, DeleteWasArrayForm)) { case MismatchingNewDeleteDetector::VarInitMismatches: llvm_unreachable("This analysis should have been done for class members."); case MismatchingNewDeleteDetector::AnalyzeLater: llvm_unreachable("Analysis cannot be postponed any point beyond end of " "translation unit."); case MismatchingNewDeleteDetector::MemberInitMismatches: DiagnoseMismatchedNewDelete(*this, DeleteLoc, Detector); break; case MismatchingNewDeleteDetector::NoMismatch: break; } } /// ActOnCXXDelete - Parsed a C++ 'delete' expression (C++ 5.3.5), as in: /// @code ::delete ptr; @endcode /// or /// @code delete [] ptr; @endcode ExprResult Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, bool ArrayForm, Expr *ExE) { // C++ [expr.delete]p1: // The operand shall have a pointer type, or a class type having a single // non-explicit conversion function to a pointer type. The result has type // void. // // DR599 amends "pointer type" to "pointer to object type" in both cases. ExprResult Ex = ExE; FunctionDecl *OperatorDelete = nullptr; bool ArrayFormAsWritten = ArrayForm; bool UsualArrayDeleteWantsSize = false; if (!Ex.get()->isTypeDependent()) { // Perform lvalue-to-rvalue cast, if needed. Ex = DefaultLvalueConversion(Ex.get()); if (Ex.isInvalid()) return ExprError(); QualType Type = Ex.get()->getType(); class DeleteConverter : public ContextualImplicitConverter { public: DeleteConverter() : ContextualImplicitConverter(false, true) {} bool match(QualType ConvType) override { // FIXME: If we have an operator T* and an operator void*, we must pick // the operator T*. if (const PointerType *ConvPtrType = ConvType->getAs()) if (ConvPtrType->getPointeeType()->isIncompleteOrObjectType()) return true; return false; } SemaDiagnosticBuilder diagnoseNoMatch(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_delete_operand) << T; } SemaDiagnosticBuilder diagnoseIncomplete(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_delete_incomplete_class_type) << T; } SemaDiagnosticBuilder diagnoseExplicitConv(Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_delete_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv(Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_delete_conversion) << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ambiguous_delete_operand) << T; } SemaDiagnosticBuilder noteAmbiguous(Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_delete_conversion) << ConvTy; } SemaDiagnosticBuilder diagnoseConversion(Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { llvm_unreachable("conversion functions are permitted"); } } Converter; Ex = PerformContextualImplicitConversion(StartLoc, Ex.get(), Converter); if (Ex.isInvalid()) return ExprError(); Type = Ex.get()->getType(); if (!Converter.match(Type)) // FIXME: PerformContextualImplicitConversion should return ExprError // itself in this case. return ExprError(); QualType Pointee = Type->getAs()->getPointeeType(); QualType PointeeElem = Context.getBaseElementType(Pointee); if (Pointee.getAddressSpace() != LangAS::Default && !getLangOpts().OpenCLCPlusPlus) return Diag(Ex.get()->getLocStart(), diag::err_address_space_qualified_delete) << Pointee.getUnqualifiedType() << Pointee.getQualifiers().getAddressSpaceAttributePrintValue(); CXXRecordDecl *PointeeRD = nullptr; if (Pointee->isVoidType() && !isSFINAEContext()) { // The C++ standard bans deleting a pointer to a non-object type, which // effectively bans deletion of "void*". However, most compilers support // this, so we treat it as a warning unless we're in a SFINAE context. Diag(StartLoc, diag::ext_delete_void_ptr_operand) << Type << Ex.get()->getSourceRange(); } else if (Pointee->isFunctionType() || Pointee->isVoidType()) { return ExprError(Diag(StartLoc, diag::err_delete_operand) << Type << Ex.get()->getSourceRange()); } else if (!Pointee->isDependentType()) { // FIXME: This can result in errors if the definition was imported from a // module but is hidden. if (!RequireCompleteType(StartLoc, Pointee, diag::warn_delete_incomplete, Ex.get())) { if (const RecordType *RT = PointeeElem->getAs()) PointeeRD = cast(RT->getDecl()); } } if (Pointee->isArrayType() && !ArrayForm) { Diag(StartLoc, diag::warn_delete_array_type) << Type << Ex.get()->getSourceRange() << FixItHint::CreateInsertion(getLocForEndOfToken(StartLoc), "[]"); ArrayForm = true; } DeclarationName DeleteName = Context.DeclarationNames.getCXXOperatorName( ArrayForm ? OO_Array_Delete : OO_Delete); if (PointeeRD) { if (!UseGlobal && FindDeallocationFunction(StartLoc, PointeeRD, DeleteName, OperatorDelete)) return ExprError(); // If we're allocating an array of records, check whether the // usual operator delete[] has a size_t parameter. if (ArrayForm) { // If the user specifically asked to use the global allocator, // we'll need to do the lookup into the class. if (UseGlobal) UsualArrayDeleteWantsSize = doesUsualArrayDeleteWantSize(*this, StartLoc, PointeeElem); // Otherwise, the usual operator delete[] should be the // function we just found. else if (OperatorDelete && isa(OperatorDelete)) UsualArrayDeleteWantsSize = UsualDeallocFnInfo(*this, DeclAccessPair::make(OperatorDelete, AS_public)) .HasSizeT; } if (!PointeeRD->hasIrrelevantDestructor()) if (CXXDestructorDecl *Dtor = LookupDestructor(PointeeRD)) { MarkFunctionReferenced(StartLoc, const_cast(Dtor)); if (DiagnoseUseOfDecl(Dtor, StartLoc)) return ExprError(); } CheckVirtualDtorCall(PointeeRD->getDestructor(), StartLoc, /*IsDelete=*/true, /*CallCanBeVirtual=*/true, /*WarnOnNonAbstractTypes=*/!ArrayForm, SourceLocation()); } if (!OperatorDelete) { if (getLangOpts().OpenCLCPlusPlus) { Diag(StartLoc, diag::err_openclcxx_not_supported) << "default delete"; return ExprError(); } bool IsComplete = isCompleteType(StartLoc, Pointee); bool CanProvideSize = IsComplete && (!ArrayForm || UsualArrayDeleteWantsSize || Pointee.isDestructedType()); bool Overaligned = hasNewExtendedAlignment(*this, Pointee); // Look for a global declaration. OperatorDelete = FindUsualDeallocationFunction(StartLoc, CanProvideSize, Overaligned, DeleteName); } MarkFunctionReferenced(StartLoc, OperatorDelete); // Check access and ambiguity of destructor if we're going to call it. // Note that this is required even for a virtual delete. bool IsVirtualDelete = false; if (PointeeRD) { if (CXXDestructorDecl *Dtor = LookupDestructor(PointeeRD)) { CheckDestructorAccess(Ex.get()->getExprLoc(), Dtor, PDiag(diag::err_access_dtor) << PointeeElem); IsVirtualDelete = Dtor->isVirtual(); } } diagnoseUnavailableAlignedAllocation(*OperatorDelete, StartLoc, true, *this); // Convert the operand to the type of the first parameter of operator // delete. This is only necessary if we selected a destroying operator // delete that we are going to call (non-virtually); converting to void* // is trivial and left to AST consumers to handle. QualType ParamType = OperatorDelete->getParamDecl(0)->getType(); if (!IsVirtualDelete && !ParamType->getPointeeType()->isVoidType()) { Qualifiers Qs = Pointee.getQualifiers(); if (Qs.hasCVRQualifiers()) { // Qualifiers are irrelevant to this conversion; we're only looking // for access and ambiguity. Qs.removeCVRQualifiers(); QualType Unqual = Context.getPointerType( Context.getQualifiedType(Pointee.getUnqualifiedType(), Qs)); Ex = ImpCastExprToType(Ex.get(), Unqual, CK_NoOp); } Ex = PerformImplicitConversion(Ex.get(), ParamType, AA_Passing); if (Ex.isInvalid()) return ExprError(); } } CXXDeleteExpr *Result = new (Context) CXXDeleteExpr( Context.VoidTy, UseGlobal, ArrayForm, ArrayFormAsWritten, UsualArrayDeleteWantsSize, OperatorDelete, Ex.get(), StartLoc); AnalyzeDeleteExprMismatch(Result); return Result; } static bool resolveBuiltinNewDeleteOverload(Sema &S, CallExpr *TheCall, bool IsDelete, FunctionDecl *&Operator) { DeclarationName NewName = S.Context.DeclarationNames.getCXXOperatorName( IsDelete ? OO_Delete : OO_New); LookupResult R(S, NewName, TheCall->getLocStart(), Sema::LookupOrdinaryName); S.LookupQualifiedName(R, S.Context.getTranslationUnitDecl()); assert(!R.empty() && "implicitly declared allocation functions not found"); assert(!R.isAmbiguous() && "global allocation functions are ambiguous"); // We do our own custom access checks below. R.suppressDiagnostics(); SmallVector Args(TheCall->arg_begin(), TheCall->arg_end()); OverloadCandidateSet Candidates(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); for (LookupResult::iterator FnOvl = R.begin(), FnOvlEnd = R.end(); FnOvl != FnOvlEnd; ++FnOvl) { // Even member operator new/delete are implicitly treated as // static, so don't use AddMemberCandidate. NamedDecl *D = (*FnOvl)->getUnderlyingDecl(); if (FunctionTemplateDecl *FnTemplate = dyn_cast(D)) { S.AddTemplateOverloadCandidate(FnTemplate, FnOvl.getPair(), /*ExplicitTemplateArgs=*/nullptr, Args, Candidates, /*SuppressUserConversions=*/false); continue; } FunctionDecl *Fn = cast(D); S.AddOverloadCandidate(Fn, FnOvl.getPair(), Args, Candidates, /*SuppressUserConversions=*/false); } SourceRange Range = TheCall->getSourceRange(); // Do the resolution. OverloadCandidateSet::iterator Best; switch (Candidates.BestViableFunction(S, R.getNameLoc(), Best)) { case OR_Success: { // Got one! FunctionDecl *FnDecl = Best->Function; assert(R.getNamingClass() == nullptr && "class members should not be considered"); if (!FnDecl->isReplaceableGlobalAllocationFunction()) { S.Diag(R.getNameLoc(), diag::err_builtin_operator_new_delete_not_usual) << (IsDelete ? 1 : 0) << Range; S.Diag(FnDecl->getLocation(), diag::note_non_usual_function_declared_here) << R.getLookupName() << FnDecl->getSourceRange(); return true; } Operator = FnDecl; return false; } case OR_No_Viable_Function: S.Diag(R.getNameLoc(), diag::err_ovl_no_viable_function_in_call) << R.getLookupName() << Range; Candidates.NoteCandidates(S, OCD_AllCandidates, Args); return true; case OR_Ambiguous: S.Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName() << Range; Candidates.NoteCandidates(S, OCD_ViableCandidates, Args); return true; case OR_Deleted: { S.Diag(R.getNameLoc(), diag::err_ovl_deleted_call) << Best->Function->isDeleted() << R.getLookupName() << S.getDeletedOrUnavailableSuffix(Best->Function) << Range; Candidates.NoteCandidates(S, OCD_AllCandidates, Args); return true; } } llvm_unreachable("Unreachable, bad result from BestViableFunction"); } ExprResult Sema::SemaBuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult, bool IsDelete) { CallExpr *TheCall = cast(TheCallResult.get()); if (!getLangOpts().CPlusPlus) { Diag(TheCall->getExprLoc(), diag::err_builtin_requires_language) << (IsDelete ? "__builtin_operator_delete" : "__builtin_operator_new") << "C++"; return ExprError(); } // CodeGen assumes it can find the global new and delete to call, // so ensure that they are declared. DeclareGlobalNewDelete(); FunctionDecl *OperatorNewOrDelete = nullptr; if (resolveBuiltinNewDeleteOverload(*this, TheCall, IsDelete, OperatorNewOrDelete)) return ExprError(); assert(OperatorNewOrDelete && "should be found"); TheCall->setType(OperatorNewOrDelete->getReturnType()); for (unsigned i = 0; i != TheCall->getNumArgs(); ++i) { QualType ParamTy = OperatorNewOrDelete->getParamDecl(i)->getType(); InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, ParamTy, false); ExprResult Arg = PerformCopyInitialization( Entity, TheCall->getArg(i)->getLocStart(), TheCall->getArg(i)); if (Arg.isInvalid()) return ExprError(); TheCall->setArg(i, Arg.get()); } auto Callee = dyn_cast(TheCall->getCallee()); assert(Callee && Callee->getCastKind() == CK_BuiltinFnToFnPtr && "Callee expected to be implicit cast to a builtin function pointer"); Callee->setType(OperatorNewOrDelete->getType()); return TheCallResult; } void Sema::CheckVirtualDtorCall(CXXDestructorDecl *dtor, SourceLocation Loc, bool IsDelete, bool CallCanBeVirtual, bool WarnOnNonAbstractTypes, SourceLocation DtorLoc) { if (!dtor || dtor->isVirtual() || !CallCanBeVirtual || isUnevaluatedContext()) return; // C++ [expr.delete]p3: // In the first alternative (delete object), if the static type of the // object to be deleted is different from its dynamic type, the static // type shall be a base class of the dynamic type of the object to be // deleted and the static type shall have a virtual destructor or the // behavior is undefined. // const CXXRecordDecl *PointeeRD = dtor->getParent(); // Note: a final class cannot be derived from, no issue there if (!PointeeRD->isPolymorphic() || PointeeRD->hasAttr()) return; // If the superclass is in a system header, there's nothing that can be done. // The `delete` (where we emit the warning) can be in a system header, // what matters for this warning is where the deleted type is defined. if (getSourceManager().isInSystemHeader(PointeeRD->getLocation())) return; QualType ClassType = dtor->getThisType(Context)->getPointeeType(); if (PointeeRD->isAbstract()) { // If the class is abstract, we warn by default, because we're // sure the code has undefined behavior. Diag(Loc, diag::warn_delete_abstract_non_virtual_dtor) << (IsDelete ? 0 : 1) << ClassType; } else if (WarnOnNonAbstractTypes) { // Otherwise, if this is not an array delete, it's a bit suspect, // but not necessarily wrong. Diag(Loc, diag::warn_delete_non_virtual_dtor) << (IsDelete ? 0 : 1) << ClassType; } if (!IsDelete) { std::string TypeStr; ClassType.getAsStringInternal(TypeStr, getPrintingPolicy()); Diag(DtorLoc, diag::note_delete_non_virtual) << FixItHint::CreateInsertion(DtorLoc, TypeStr + "::"); } } Sema::ConditionResult Sema::ActOnConditionVariable(Decl *ConditionVar, SourceLocation StmtLoc, ConditionKind CK) { ExprResult E = CheckConditionVariable(cast(ConditionVar), StmtLoc, CK); if (E.isInvalid()) return ConditionError(); return ConditionResult(*this, ConditionVar, MakeFullExpr(E.get(), StmtLoc), CK == ConditionKind::ConstexprIf); } /// Check the use of the given variable as a C++ condition in an if, /// while, do-while, or switch statement. ExprResult Sema::CheckConditionVariable(VarDecl *ConditionVar, SourceLocation StmtLoc, ConditionKind CK) { if (ConditionVar->isInvalidDecl()) return ExprError(); QualType T = ConditionVar->getType(); // C++ [stmt.select]p2: // The declarator shall not specify a function or an array. if (T->isFunctionType()) return ExprError(Diag(ConditionVar->getLocation(), diag::err_invalid_use_of_function_type) << ConditionVar->getSourceRange()); else if (T->isArrayType()) return ExprError(Diag(ConditionVar->getLocation(), diag::err_invalid_use_of_array_type) << ConditionVar->getSourceRange()); ExprResult Condition = DeclRefExpr::Create( Context, NestedNameSpecifierLoc(), SourceLocation(), ConditionVar, /*enclosing*/ false, ConditionVar->getLocation(), ConditionVar->getType().getNonReferenceType(), VK_LValue); MarkDeclRefReferenced(cast(Condition.get())); switch (CK) { case ConditionKind::Boolean: return CheckBooleanCondition(StmtLoc, Condition.get()); case ConditionKind::ConstexprIf: return CheckBooleanCondition(StmtLoc, Condition.get(), true); case ConditionKind::Switch: return CheckSwitchCondition(StmtLoc, Condition.get()); } llvm_unreachable("unexpected condition kind"); } /// CheckCXXBooleanCondition - Returns true if a conversion to bool is invalid. ExprResult Sema::CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr) { // C++ 6.4p4: // The value of a condition that is an initialized declaration in a statement // other than a switch statement is the value of the declared variable // implicitly converted to type bool. If that conversion is ill-formed, the // program is ill-formed. // The value of a condition that is an expression is the value of the // expression, implicitly converted to bool. // // FIXME: Return this value to the caller so they don't need to recompute it. llvm::APSInt Value(/*BitWidth*/1); return (IsConstexpr && !CondExpr->isValueDependent()) ? CheckConvertedConstantExpression(CondExpr, Context.BoolTy, Value, CCEK_ConstexprIf) : PerformContextuallyConvertToBool(CondExpr); } /// Helper function to determine whether this is the (deprecated) C++ /// conversion from a string literal to a pointer to non-const char or /// non-const wchar_t (for narrow and wide string literals, /// respectively). bool Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) { // Look inside the implicit cast, if it exists. if (ImplicitCastExpr *Cast = dyn_cast(From)) From = Cast->getSubExpr(); // A string literal (2.13.4) that is not a wide string literal can // be converted to an rvalue of type "pointer to char"; a wide // string literal can be converted to an rvalue of type "pointer // to wchar_t" (C++ 4.2p2). if (StringLiteral *StrLit = dyn_cast(From->IgnoreParens())) if (const PointerType *ToPtrType = ToType->getAs()) if (const BuiltinType *ToPointeeType = ToPtrType->getPointeeType()->getAs()) { // This conversion is considered only when there is an // explicit appropriate pointer target type (C++ 4.2p2). if (!ToPtrType->getPointeeType().hasQualifiers()) { switch (StrLit->getKind()) { case StringLiteral::UTF8: case StringLiteral::UTF16: case StringLiteral::UTF32: // We don't allow UTF literals to be implicitly converted break; case StringLiteral::Ascii: return (ToPointeeType->getKind() == BuiltinType::Char_U || ToPointeeType->getKind() == BuiltinType::Char_S); case StringLiteral::Wide: return Context.typesAreCompatible(Context.getWideCharType(), QualType(ToPointeeType, 0)); } } } return false; } static ExprResult BuildCXXCastArgument(Sema &S, SourceLocation CastLoc, QualType Ty, CastKind Kind, CXXMethodDecl *Method, DeclAccessPair FoundDecl, bool HadMultipleCandidates, Expr *From) { switch (Kind) { default: llvm_unreachable("Unhandled cast kind!"); case CK_ConstructorConversion: { CXXConstructorDecl *Constructor = cast(Method); SmallVector ConstructorArgs; if (S.RequireNonAbstractType(CastLoc, Ty, diag::err_allocation_of_abstract_type)) return ExprError(); if (S.CompleteConstructorCall(Constructor, From, CastLoc, ConstructorArgs)) return ExprError(); S.CheckConstructorAccess(CastLoc, Constructor, FoundDecl, InitializedEntity::InitializeTemporary(Ty)); if (S.DiagnoseUseOfDecl(Method, CastLoc)) return ExprError(); ExprResult Result = S.BuildCXXConstructExpr( CastLoc, Ty, FoundDecl, cast(Method), ConstructorArgs, HadMultipleCandidates, /*ListInit*/ false, /*StdInitListInit*/ false, /*ZeroInit*/ false, CXXConstructExpr::CK_Complete, SourceRange()); if (Result.isInvalid()) return ExprError(); return S.MaybeBindToTemporary(Result.getAs()); } case CK_UserDefinedConversion: { assert(!From->getType()->isPointerType() && "Arg can't have pointer type!"); S.CheckMemberOperatorAccess(CastLoc, From, /*arg*/ nullptr, FoundDecl); if (S.DiagnoseUseOfDecl(Method, CastLoc)) return ExprError(); // Create an implicit call expr that calls it. CXXConversionDecl *Conv = cast(Method); ExprResult Result = S.BuildCXXMemberCallExpr(From, FoundDecl, Conv, HadMultipleCandidates); if (Result.isInvalid()) return ExprError(); // Record usage of conversion in an implicit cast. Result = ImplicitCastExpr::Create(S.Context, Result.get()->getType(), CK_UserDefinedConversion, Result.get(), nullptr, Result.get()->getValueKind()); return S.MaybeBindToTemporary(Result.get()); } } } /// PerformImplicitConversion - Perform an implicit conversion of the /// expression From to the type ToType using the pre-computed implicit /// conversion sequence ICS. Returns the converted /// expression. Action is the kind of conversion we're performing, /// used in the error message. ExprResult Sema::PerformImplicitConversion(Expr *From, QualType ToType, const ImplicitConversionSequence &ICS, AssignmentAction Action, CheckedConversionKind CCK) { // C++ [over.match.oper]p7: [...] operands of class type are converted [...] if (CCK == CCK_ForBuiltinOverloadedOp && !From->getType()->isRecordType()) return From; switch (ICS.getKind()) { case ImplicitConversionSequence::StandardConversion: { ExprResult Res = PerformImplicitConversion(From, ToType, ICS.Standard, Action, CCK); if (Res.isInvalid()) return ExprError(); From = Res.get(); break; } case ImplicitConversionSequence::UserDefinedConversion: { FunctionDecl *FD = ICS.UserDefined.ConversionFunction; CastKind CastKind; QualType BeforeToType; assert(FD && "no conversion function for user-defined conversion seq"); if (const CXXConversionDecl *Conv = dyn_cast(FD)) { CastKind = CK_UserDefinedConversion; // If the user-defined conversion is specified by a conversion function, // the initial standard conversion sequence converts the source type to // the implicit object parameter of the conversion function. BeforeToType = Context.getTagDeclType(Conv->getParent()); } else { const CXXConstructorDecl *Ctor = cast(FD); CastKind = CK_ConstructorConversion; // Do no conversion if dealing with ... for the first conversion. if (!ICS.UserDefined.EllipsisConversion) { // If the user-defined conversion is specified by a constructor, the // initial standard conversion sequence converts the source type to // the type required by the argument of the constructor BeforeToType = Ctor->getParamDecl(0)->getType().getNonReferenceType(); } } // Watch out for ellipsis conversion. if (!ICS.UserDefined.EllipsisConversion) { ExprResult Res = PerformImplicitConversion(From, BeforeToType, ICS.UserDefined.Before, AA_Converting, CCK); if (Res.isInvalid()) return ExprError(); From = Res.get(); } ExprResult CastArg = BuildCXXCastArgument(*this, From->getLocStart(), ToType.getNonReferenceType(), CastKind, cast(FD), ICS.UserDefined.FoundConversionFunction, ICS.UserDefined.HadMultipleCandidates, From); if (CastArg.isInvalid()) return ExprError(); From = CastArg.get(); // C++ [over.match.oper]p7: // [...] the second standard conversion sequence of a user-defined // conversion sequence is not applied. if (CCK == CCK_ForBuiltinOverloadedOp) return From; return PerformImplicitConversion(From, ToType, ICS.UserDefined.After, AA_Converting, CCK); } case ImplicitConversionSequence::AmbiguousConversion: ICS.DiagnoseAmbiguousConversion(*this, From->getExprLoc(), PDiag(diag::err_typecheck_ambiguous_condition) << From->getSourceRange()); return ExprError(); case ImplicitConversionSequence::EllipsisConversion: llvm_unreachable("Cannot perform an ellipsis conversion"); case ImplicitConversionSequence::BadConversion: bool Diagnosed = DiagnoseAssignmentResult(Incompatible, From->getExprLoc(), ToType, From->getType(), From, Action); assert(Diagnosed && "failed to diagnose bad conversion"); (void)Diagnosed; return ExprError(); } // Everything went well. return From; } /// PerformImplicitConversion - Perform an implicit conversion of the /// expression From to the type ToType by following the standard /// conversion sequence SCS. Returns the converted /// expression. Flavor is the context in which we're performing this /// conversion, for use in error messages. ExprResult Sema::PerformImplicitConversion(Expr *From, QualType ToType, const StandardConversionSequence& SCS, AssignmentAction Action, CheckedConversionKind CCK) { bool CStyle = (CCK == CCK_CStyleCast || CCK == CCK_FunctionalCast); // Overall FIXME: we are recomputing too many types here and doing far too // much extra work. What this means is that we need to keep track of more // information that is computed when we try the implicit conversion initially, // so that we don't need to recompute anything here. QualType FromType = From->getType(); if (SCS.CopyConstructor) { // FIXME: When can ToType be a reference type? assert(!ToType->isReferenceType()); if (SCS.Second == ICK_Derived_To_Base) { SmallVector ConstructorArgs; if (CompleteConstructorCall(cast(SCS.CopyConstructor), From, /*FIXME:ConstructLoc*/SourceLocation(), ConstructorArgs)) return ExprError(); return BuildCXXConstructExpr( /*FIXME:ConstructLoc*/ SourceLocation(), ToType, SCS.FoundCopyConstructor, SCS.CopyConstructor, ConstructorArgs, /*HadMultipleCandidates*/ false, /*ListInit*/ false, /*StdInitListInit*/ false, /*ZeroInit*/ false, CXXConstructExpr::CK_Complete, SourceRange()); } return BuildCXXConstructExpr( /*FIXME:ConstructLoc*/ SourceLocation(), ToType, SCS.FoundCopyConstructor, SCS.CopyConstructor, From, /*HadMultipleCandidates*/ false, /*ListInit*/ false, /*StdInitListInit*/ false, /*ZeroInit*/ false, CXXConstructExpr::CK_Complete, SourceRange()); } // Resolve overloaded function references. if (Context.hasSameType(FromType, Context.OverloadTy)) { DeclAccessPair Found; FunctionDecl *Fn = ResolveAddressOfOverloadedFunction(From, ToType, true, Found); if (!Fn) return ExprError(); if (DiagnoseUseOfDecl(Fn, From->getLocStart())) return ExprError(); From = FixOverloadedFunctionReference(From, Found, Fn); FromType = From->getType(); } // If we're converting to an atomic type, first convert to the corresponding // non-atomic type. QualType ToAtomicType; if (const AtomicType *ToAtomic = ToType->getAs()) { ToAtomicType = ToType; ToType = ToAtomic->getValueType(); } QualType InitialFromType = FromType; // Perform the first implicit conversion. switch (SCS.First) { case ICK_Identity: if (const AtomicType *FromAtomic = FromType->getAs()) { FromType = FromAtomic->getValueType().getUnqualifiedType(); From = ImplicitCastExpr::Create(Context, FromType, CK_AtomicToNonAtomic, From, /*BasePath=*/nullptr, VK_RValue); } break; case ICK_Lvalue_To_Rvalue: { assert(From->getObjectKind() != OK_ObjCProperty); ExprResult FromRes = DefaultLvalueConversion(From); assert(!FromRes.isInvalid() && "Can't perform deduced conversion?!"); From = FromRes.get(); FromType = From->getType(); break; } case ICK_Array_To_Pointer: FromType = Context.getArrayDecayedType(FromType); From = ImpCastExprToType(From, FromType, CK_ArrayToPointerDecay, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Function_To_Pointer: FromType = Context.getPointerType(FromType); From = ImpCastExprToType(From, FromType, CK_FunctionToPointerDecay, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; default: llvm_unreachable("Improper first standard conversion"); } // Perform the second implicit conversion switch (SCS.Second) { case ICK_Identity: // C++ [except.spec]p5: // [For] assignment to and initialization of pointers to functions, // pointers to member functions, and references to functions: the // target entity shall allow at least the exceptions allowed by the // source value in the assignment or initialization. switch (Action) { case AA_Assigning: case AA_Initializing: // Note, function argument passing and returning are initialization. case AA_Passing: case AA_Returning: case AA_Sending: case AA_Passing_CFAudited: if (CheckExceptionSpecCompatibility(From, ToType)) return ExprError(); break; case AA_Casting: case AA_Converting: // Casts and implicit conversions are not initialization, so are not // checked for exception specification mismatches. break; } // Nothing else to do. break; case ICK_Integral_Promotion: case ICK_Integral_Conversion: if (ToType->isBooleanType()) { assert(FromType->castAs()->getDecl()->isFixed() && SCS.Second == ICK_Integral_Promotion && "only enums with fixed underlying type can promote to bool"); From = ImpCastExprToType(From, ToType, CK_IntegralToBoolean, VK_RValue, /*BasePath=*/nullptr, CCK).get(); } else { From = ImpCastExprToType(From, ToType, CK_IntegralCast, VK_RValue, /*BasePath=*/nullptr, CCK).get(); } break; case ICK_Floating_Promotion: case ICK_Floating_Conversion: From = ImpCastExprToType(From, ToType, CK_FloatingCast, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Complex_Promotion: case ICK_Complex_Conversion: { QualType FromEl = From->getType()->getAs()->getElementType(); QualType ToEl = ToType->getAs()->getElementType(); CastKind CK; if (FromEl->isRealFloatingType()) { if (ToEl->isRealFloatingType()) CK = CK_FloatingComplexCast; else CK = CK_FloatingComplexToIntegralComplex; } else if (ToEl->isRealFloatingType()) { CK = CK_IntegralComplexToFloatingComplex; } else { CK = CK_IntegralComplexCast; } From = ImpCastExprToType(From, ToType, CK, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; } case ICK_Floating_Integral: if (ToType->isRealFloatingType()) From = ImpCastExprToType(From, ToType, CK_IntegralToFloating, VK_RValue, /*BasePath=*/nullptr, CCK).get(); else From = ImpCastExprToType(From, ToType, CK_FloatingToIntegral, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Compatible_Conversion: From = ImpCastExprToType(From, ToType, CK_NoOp, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Writeback_Conversion: case ICK_Pointer_Conversion: { if (SCS.IncompatibleObjC && Action != AA_Casting) { // Diagnose incompatible Objective-C conversions if (Action == AA_Initializing || Action == AA_Assigning) Diag(From->getLocStart(), diag::ext_typecheck_convert_incompatible_pointer) << ToType << From->getType() << Action << From->getSourceRange() << 0; else Diag(From->getLocStart(), diag::ext_typecheck_convert_incompatible_pointer) << From->getType() << ToType << Action << From->getSourceRange() << 0; if (From->getType()->isObjCObjectPointerType() && ToType->isObjCObjectPointerType()) EmitRelatedResultTypeNote(From); } else if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && !CheckObjCARCUnavailableWeakConversion(ToType, From->getType())) { if (Action == AA_Initializing) Diag(From->getLocStart(), diag::err_arc_weak_unavailable_assign); else Diag(From->getLocStart(), diag::err_arc_convesion_of_weak_unavailable) << (Action == AA_Casting) << From->getType() << ToType << From->getSourceRange(); } CastKind Kind; CXXCastPath BasePath; if (CheckPointerConversion(From, ToType, Kind, BasePath, CStyle)) return ExprError(); // Make sure we extend blocks if necessary. // FIXME: doing this here is really ugly. if (Kind == CK_BlockPointerToObjCPointerCast) { ExprResult E = From; (void) PrepareCastToObjCObjectPointer(E); From = E.get(); } if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers()) CheckObjCConversion(SourceRange(), ToType, From, CCK); From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK) .get(); break; } case ICK_Pointer_Member: { CastKind Kind; CXXCastPath BasePath; if (CheckMemberPointerConversion(From, ToType, Kind, BasePath, CStyle)) return ExprError(); if (CheckExceptionSpecCompatibility(From, ToType)) return ExprError(); // We may not have been able to figure out what this member pointer resolved // to up until this exact point. Attempt to lock-in it's inheritance model. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { (void)isCompleteType(From->getExprLoc(), From->getType()); (void)isCompleteType(From->getExprLoc(), ToType); } From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK) .get(); break; } case ICK_Boolean_Conversion: // Perform half-to-boolean conversion via float. if (From->getType()->isHalfType()) { From = ImpCastExprToType(From, Context.FloatTy, CK_FloatingCast).get(); FromType = Context.FloatTy; } From = ImpCastExprToType(From, Context.BoolTy, ScalarTypeToBooleanCastKind(FromType), VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Derived_To_Base: { CXXCastPath BasePath; if (CheckDerivedToBaseConversion(From->getType(), ToType.getNonReferenceType(), From->getLocStart(), From->getSourceRange(), &BasePath, CStyle)) return ExprError(); From = ImpCastExprToType(From, ToType.getNonReferenceType(), CK_DerivedToBase, From->getValueKind(), &BasePath, CCK).get(); break; } case ICK_Vector_Conversion: From = ImpCastExprToType(From, ToType, CK_BitCast, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Vector_Splat: { // Vector splat from any arithmetic type to a vector. Expr *Elem = prepareVectorSplat(ToType, From).get(); From = ImpCastExprToType(Elem, ToType, CK_VectorSplat, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; } case ICK_Complex_Real: // Case 1. x -> _Complex y if (const ComplexType *ToComplex = ToType->getAs()) { QualType ElType = ToComplex->getElementType(); bool isFloatingComplex = ElType->isRealFloatingType(); // x -> y if (Context.hasSameUnqualifiedType(ElType, From->getType())) { // do nothing } else if (From->getType()->isRealFloatingType()) { From = ImpCastExprToType(From, ElType, isFloatingComplex ? CK_FloatingCast : CK_FloatingToIntegral).get(); } else { assert(From->getType()->isIntegerType()); From = ImpCastExprToType(From, ElType, isFloatingComplex ? CK_IntegralToFloating : CK_IntegralCast).get(); } // y -> _Complex y From = ImpCastExprToType(From, ToType, isFloatingComplex ? CK_FloatingRealToComplex : CK_IntegralRealToComplex).get(); // Case 2. _Complex x -> y } else { const ComplexType *FromComplex = From->getType()->getAs(); assert(FromComplex); QualType ElType = FromComplex->getElementType(); bool isFloatingComplex = ElType->isRealFloatingType(); // _Complex x -> x From = ImpCastExprToType(From, ElType, isFloatingComplex ? CK_FloatingComplexToReal : CK_IntegralComplexToReal, VK_RValue, /*BasePath=*/nullptr, CCK).get(); // x -> y if (Context.hasSameUnqualifiedType(ElType, ToType)) { // do nothing } else if (ToType->isRealFloatingType()) { From = ImpCastExprToType(From, ToType, isFloatingComplex ? CK_FloatingCast : CK_IntegralToFloating, VK_RValue, /*BasePath=*/nullptr, CCK).get(); } else { assert(ToType->isIntegerType()); From = ImpCastExprToType(From, ToType, isFloatingComplex ? CK_FloatingToIntegral : CK_IntegralCast, VK_RValue, /*BasePath=*/nullptr, CCK).get(); } } break; case ICK_Block_Pointer_Conversion: { From = ImpCastExprToType(From, ToType.getUnqualifiedType(), CK_BitCast, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; } case ICK_TransparentUnionConversion: { ExprResult FromRes = From; Sema::AssignConvertType ConvTy = CheckTransparentUnionArgumentConstraints(ToType, FromRes); if (FromRes.isInvalid()) return ExprError(); From = FromRes.get(); assert ((ConvTy == Sema::Compatible) && "Improper transparent union conversion"); (void)ConvTy; break; } case ICK_Zero_Event_Conversion: From = ImpCastExprToType(From, ToType, CK_ZeroToOCLEvent, From->getValueKind()).get(); break; case ICK_Zero_Queue_Conversion: From = ImpCastExprToType(From, ToType, CK_ZeroToOCLQueue, From->getValueKind()).get(); break; case ICK_Lvalue_To_Rvalue: case ICK_Array_To_Pointer: case ICK_Function_To_Pointer: case ICK_Function_Conversion: case ICK_Qualification: case ICK_Num_Conversion_Kinds: case ICK_C_Only_Conversion: case ICK_Incompatible_Pointer_Conversion: llvm_unreachable("Improper second standard conversion"); } switch (SCS.Third) { case ICK_Identity: // Nothing to do. break; case ICK_Function_Conversion: // If both sides are functions (or pointers/references to them), there could // be incompatible exception declarations. if (CheckExceptionSpecCompatibility(From, ToType)) return ExprError(); From = ImpCastExprToType(From, ToType, CK_NoOp, VK_RValue, /*BasePath=*/nullptr, CCK).get(); break; case ICK_Qualification: { // The qualification keeps the category of the inner expression, unless the // target type isn't a reference. ExprValueKind VK = ToType->isReferenceType() ? From->getValueKind() : VK_RValue; From = ImpCastExprToType(From, ToType.getNonLValueExprType(Context), CK_NoOp, VK, /*BasePath=*/nullptr, CCK).get(); if (SCS.DeprecatedStringLiteralToCharPtr && !getLangOpts().WritableStrings) { Diag(From->getLocStart(), getLangOpts().CPlusPlus11 ? diag::ext_deprecated_string_literal_conversion : diag::warn_deprecated_string_literal_conversion) << ToType.getNonReferenceType(); } break; } default: llvm_unreachable("Improper third standard conversion"); } // If this conversion sequence involved a scalar -> atomic conversion, perform // that conversion now. if (!ToAtomicType.isNull()) { assert(Context.hasSameType( ToAtomicType->castAs()->getValueType(), From->getType())); From = ImpCastExprToType(From, ToAtomicType, CK_NonAtomicToAtomic, VK_RValue, nullptr, CCK).get(); } // If this conversion sequence succeeded and involved implicitly converting a // _Nullable type to a _Nonnull one, complain. if (!isCast(CCK)) diagnoseNullableToNonnullConversion(ToType, InitialFromType, From->getLocStart()); return From; } /// Check the completeness of a type in a unary type trait. /// /// If the particular type trait requires a complete type, tries to complete /// it. If completing the type fails, a diagnostic is emitted and false /// returned. If completing the type succeeds or no completion was required, /// returns true. static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT, SourceLocation Loc, QualType ArgTy) { // C++0x [meta.unary.prop]p3: // For all of the class templates X declared in this Clause, instantiating // that template with a template argument that is a class template // specialization may result in the implicit instantiation of the template // argument if and only if the semantics of X require that the argument // must be a complete type. // We apply this rule to all the type trait expressions used to implement // these class templates. We also try to follow any GCC documented behavior // in these expressions to ensure portability of standard libraries. switch (UTT) { default: llvm_unreachable("not a UTT"); // is_complete_type somewhat obviously cannot require a complete type. case UTT_IsCompleteType: // Fall-through // These traits are modeled on the type predicates in C++0x // [meta.unary.cat] and [meta.unary.comp]. They are not specified as // requiring a complete type, as whether or not they return true cannot be // impacted by the completeness of the type. case UTT_IsVoid: case UTT_IsIntegral: case UTT_IsFloatingPoint: case UTT_IsArray: case UTT_IsPointer: case UTT_IsLvalueReference: case UTT_IsRvalueReference: case UTT_IsMemberFunctionPointer: case UTT_IsMemberObjectPointer: case UTT_IsEnum: case UTT_IsUnion: case UTT_IsClass: case UTT_IsFunction: case UTT_IsReference: case UTT_IsArithmetic: case UTT_IsFundamental: case UTT_IsObject: case UTT_IsScalar: case UTT_IsCompound: case UTT_IsMemberPointer: // Fall-through // These traits are modeled on type predicates in C++0x [meta.unary.prop] // which requires some of its traits to have the complete type. However, // the completeness of the type cannot impact these traits' semantics, and // so they don't require it. This matches the comments on these traits in // Table 49. case UTT_IsConst: case UTT_IsVolatile: case UTT_IsSigned: case UTT_IsUnsigned: // This type trait always returns false, checking the type is moot. case UTT_IsInterfaceClass: return true; // C++14 [meta.unary.prop]: // If T is a non-union class type, T shall be a complete type. case UTT_IsEmpty: case UTT_IsPolymorphic: case UTT_IsAbstract: if (const auto *RD = ArgTy->getAsCXXRecordDecl()) if (!RD->isUnion()) return !S.RequireCompleteType( Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr); return true; // C++14 [meta.unary.prop]: // If T is a class type, T shall be a complete type. case UTT_IsFinal: case UTT_IsSealed: if (ArgTy->getAsCXXRecordDecl()) return !S.RequireCompleteType( Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr); return true; // C++1z [meta.unary.prop]: // remove_all_extents_t shall be a complete type or cv void. case UTT_IsAggregate: case UTT_IsTrivial: case UTT_IsTriviallyCopyable: case UTT_IsStandardLayout: case UTT_IsPOD: case UTT_IsLiteral: // Per the GCC type traits documentation, T shall be a complete type, cv void, // or an array of unknown bound. But GCC actually imposes the same constraints // as above. case UTT_HasNothrowAssign: case UTT_HasNothrowMoveAssign: case UTT_HasNothrowConstructor: case UTT_HasNothrowCopy: case UTT_HasTrivialAssign: case UTT_HasTrivialMoveAssign: case UTT_HasTrivialDefaultConstructor: case UTT_HasTrivialMoveConstructor: case UTT_HasTrivialCopy: case UTT_HasTrivialDestructor: case UTT_HasVirtualDestructor: ArgTy = QualType(ArgTy->getBaseElementTypeUnsafe(), 0); LLVM_FALLTHROUGH; // C++1z [meta.unary.prop]: // T shall be a complete type, cv void, or an array of unknown bound. case UTT_IsDestructible: case UTT_IsNothrowDestructible: case UTT_IsTriviallyDestructible: case UTT_HasUniqueObjectRepresentations: if (ArgTy->isIncompleteArrayType() || ArgTy->isVoidType()) return true; return !S.RequireCompleteType( Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr); } } static bool HasNoThrowOperator(const RecordType *RT, OverloadedOperatorKind Op, Sema &Self, SourceLocation KeyLoc, ASTContext &C, bool (CXXRecordDecl::*HasTrivial)() const, bool (CXXRecordDecl::*HasNonTrivial)() const, bool (CXXMethodDecl::*IsDesiredOp)() const) { CXXRecordDecl *RD = cast(RT->getDecl()); if ((RD->*HasTrivial)() && !(RD->*HasNonTrivial)()) return true; DeclarationName Name = C.DeclarationNames.getCXXOperatorName(Op); DeclarationNameInfo NameInfo(Name, KeyLoc); LookupResult Res(Self, NameInfo, Sema::LookupOrdinaryName); if (Self.LookupQualifiedName(Res, RD)) { bool FoundOperator = false; Res.suppressDiagnostics(); for (LookupResult::iterator Op = Res.begin(), OpEnd = Res.end(); Op != OpEnd; ++Op) { if (isa(*Op)) continue; CXXMethodDecl *Operator = cast(*Op); if((Operator->*IsDesiredOp)()) { FoundOperator = true; const FunctionProtoType *CPT = Operator->getType()->getAs(); CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT || !CPT->isNothrow()) return false; } } return FoundOperator; } return false; } static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT, SourceLocation KeyLoc, QualType T) { assert(!T->isDependentType() && "Cannot evaluate traits of dependent type"); ASTContext &C = Self.Context; switch(UTT) { default: llvm_unreachable("not a UTT"); // Type trait expressions corresponding to the primary type category // predicates in C++0x [meta.unary.cat]. case UTT_IsVoid: return T->isVoidType(); case UTT_IsIntegral: return T->isIntegralType(C); case UTT_IsFloatingPoint: return T->isFloatingType(); case UTT_IsArray: return T->isArrayType(); case UTT_IsPointer: return T->isPointerType(); case UTT_IsLvalueReference: return T->isLValueReferenceType(); case UTT_IsRvalueReference: return T->isRValueReferenceType(); case UTT_IsMemberFunctionPointer: return T->isMemberFunctionPointerType(); case UTT_IsMemberObjectPointer: return T->isMemberDataPointerType(); case UTT_IsEnum: return T->isEnumeralType(); case UTT_IsUnion: return T->isUnionType(); case UTT_IsClass: return T->isClassType() || T->isStructureType() || T->isInterfaceType(); case UTT_IsFunction: return T->isFunctionType(); // Type trait expressions which correspond to the convenient composition // predicates in C++0x [meta.unary.comp]. case UTT_IsReference: return T->isReferenceType(); case UTT_IsArithmetic: return T->isArithmeticType() && !T->isEnumeralType(); case UTT_IsFundamental: return T->isFundamentalType(); case UTT_IsObject: return T->isObjectType(); case UTT_IsScalar: // Note: semantic analysis depends on Objective-C lifetime types to be // considered scalar types. However, such types do not actually behave // like scalar types at run time (since they may require retain/release // operations), so we report them as non-scalar. if (T->isObjCLifetimeType()) { switch (T.getObjCLifetime()) { case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: return true; case Qualifiers::OCL_Strong: case Qualifiers::OCL_Weak: case Qualifiers::OCL_Autoreleasing: return false; } } return T->isScalarType(); case UTT_IsCompound: return T->isCompoundType(); case UTT_IsMemberPointer: return T->isMemberPointerType(); // Type trait expressions which correspond to the type property predicates // in C++0x [meta.unary.prop]. case UTT_IsConst: return T.isConstQualified(); case UTT_IsVolatile: return T.isVolatileQualified(); case UTT_IsTrivial: return T.isTrivialType(C); case UTT_IsTriviallyCopyable: return T.isTriviallyCopyableType(C); case UTT_IsStandardLayout: return T->isStandardLayoutType(); case UTT_IsPOD: return T.isPODType(C); case UTT_IsLiteral: return T->isLiteralType(C); case UTT_IsEmpty: if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return !RD->isUnion() && RD->isEmpty(); return false; case UTT_IsPolymorphic: if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return !RD->isUnion() && RD->isPolymorphic(); return false; case UTT_IsAbstract: if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return !RD->isUnion() && RD->isAbstract(); return false; case UTT_IsAggregate: // Report vector extensions and complex types as aggregates because they // support aggregate initialization. GCC mirrors this behavior for vectors // but not _Complex. return T->isAggregateType() || T->isVectorType() || T->isExtVectorType() || T->isAnyComplexType(); // __is_interface_class only returns true when CL is invoked in /CLR mode and // even then only when it is used with the 'interface struct ...' syntax // Clang doesn't support /CLR which makes this type trait moot. case UTT_IsInterfaceClass: return false; case UTT_IsFinal: case UTT_IsSealed: if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return RD->hasAttr(); return false; case UTT_IsSigned: return T->isSignedIntegerType(); case UTT_IsUnsigned: return T->isUnsignedIntegerType(); // Type trait expressions which query classes regarding their construction, // destruction, and copying. Rather than being based directly on the // related type predicates in the standard, they are specified by both // GCC[1] and the Embarcadero C++ compiler[2], and Clang implements those // specifications. // // 1: http://gcc.gnu/.org/onlinedocs/gcc/Type-Traits.html // 2: http://docwiki.embarcadero.com/RADStudio/XE/en/Type_Trait_Functions_(C%2B%2B0x)_Index // // Note that these builtins do not behave as documented in g++: if a class // has both a trivial and a non-trivial special member of a particular kind, // they return false! For now, we emulate this behavior. // FIXME: This appears to be a g++ bug: more complex cases reveal that it // does not correctly compute triviality in the presence of multiple special // members of the same kind. Revisit this once the g++ bug is fixed. case UTT_HasTrivialDefaultConstructor: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html: // If __is_pod (type) is true then the trait is true, else if type is // a cv class or union type (or array thereof) with a trivial default // constructor ([class.ctor]) then the trait is true, else it is false. if (T.isPODType(C)) return true; if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) return RD->hasTrivialDefaultConstructor() && !RD->hasNonTrivialDefaultConstructor(); return false; case UTT_HasTrivialMoveConstructor: // This trait is implemented by MSVC 2012 and needed to parse the // standard library headers. Specifically this is used as the logic // behind std::is_trivially_move_constructible (20.9.4.3). if (T.isPODType(C)) return true; if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) return RD->hasTrivialMoveConstructor() && !RD->hasNonTrivialMoveConstructor(); return false; case UTT_HasTrivialCopy: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html: // If __is_pod (type) is true or type is a reference type then // the trait is true, else if type is a cv class or union type // with a trivial copy constructor ([class.copy]) then the trait // is true, else it is false. if (T.isPODType(C) || T->isReferenceType()) return true; if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return RD->hasTrivialCopyConstructor() && !RD->hasNonTrivialCopyConstructor(); return false; case UTT_HasTrivialMoveAssign: // This trait is implemented by MSVC 2012 and needed to parse the // standard library headers. Specifically it is used as the logic // behind std::is_trivially_move_assignable (20.9.4.3) if (T.isPODType(C)) return true; if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) return RD->hasTrivialMoveAssignment() && !RD->hasNonTrivialMoveAssignment(); return false; case UTT_HasTrivialAssign: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html: // If type is const qualified or is a reference type then the // trait is false. Otherwise if __is_pod (type) is true then the // trait is true, else if type is a cv class or union type with // a trivial copy assignment ([class.copy]) then the trait is // true, else it is false. // Note: the const and reference restrictions are interesting, // given that const and reference members don't prevent a class // from having a trivial copy assignment operator (but do cause // errors if the copy assignment operator is actually used, q.v. // [class.copy]p12). if (T.isConstQualified()) return false; if (T.isPODType(C)) return true; if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return RD->hasTrivialCopyAssignment() && !RD->hasNonTrivialCopyAssignment(); return false; case UTT_IsDestructible: case UTT_IsTriviallyDestructible: case UTT_IsNothrowDestructible: // C++14 [meta.unary.prop]: // For reference types, is_destructible::value is true. if (T->isReferenceType()) return true; // Objective-C++ ARC: autorelease types don't require destruction. if (T->isObjCLifetimeType() && T.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) return true; // C++14 [meta.unary.prop]: // For incomplete types and function types, is_destructible::value is // false. if (T->isIncompleteType() || T->isFunctionType()) return false; // A type that requires destruction (via a non-trivial destructor or ARC // lifetime semantics) is not trivially-destructible. if (UTT == UTT_IsTriviallyDestructible && T.isDestructedType()) return false; // C++14 [meta.unary.prop]: // For object types and given U equal to remove_all_extents_t, if the // expression std::declval().~U() is well-formed when treated as an // unevaluated operand (Clause 5), then is_destructible::value is true if (auto *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) { CXXDestructorDecl *Destructor = Self.LookupDestructor(RD); if (!Destructor) return false; // C++14 [dcl.fct.def.delete]p2: // A program that refers to a deleted function implicitly or // explicitly, other than to declare it, is ill-formed. if (Destructor->isDeleted()) return false; if (C.getLangOpts().AccessControl && Destructor->getAccess() != AS_public) return false; if (UTT == UTT_IsNothrowDestructible) { const FunctionProtoType *CPT = Destructor->getType()->getAs(); CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT || !CPT->isNothrow()) return false; } } return true; case UTT_HasTrivialDestructor: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html // If __is_pod (type) is true or type is a reference type // then the trait is true, else if type is a cv class or union // type (or array thereof) with a trivial destructor // ([class.dtor]) then the trait is true, else it is // false. if (T.isPODType(C) || T->isReferenceType()) return true; // Objective-C++ ARC: autorelease types don't require destruction. if (T->isObjCLifetimeType() && T.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) return true; if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) return RD->hasTrivialDestructor(); return false; // TODO: Propagate nothrowness for implicitly declared special members. case UTT_HasNothrowAssign: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html: // If type is const qualified or is a reference type then the // trait is false. Otherwise if __has_trivial_assign (type) // is true then the trait is true, else if type is a cv class // or union type with copy assignment operators that are known // not to throw an exception then the trait is true, else it is // false. if (C.getBaseElementType(T).isConstQualified()) return false; if (T->isReferenceType()) return false; if (T.isPODType(C) || T->isObjCLifetimeType()) return true; if (const RecordType *RT = T->getAs()) return HasNoThrowOperator(RT, OO_Equal, Self, KeyLoc, C, &CXXRecordDecl::hasTrivialCopyAssignment, &CXXRecordDecl::hasNonTrivialCopyAssignment, &CXXMethodDecl::isCopyAssignmentOperator); return false; case UTT_HasNothrowMoveAssign: // This trait is implemented by MSVC 2012 and needed to parse the // standard library headers. Specifically this is used as the logic // behind std::is_nothrow_move_assignable (20.9.4.3). if (T.isPODType(C)) return true; if (const RecordType *RT = C.getBaseElementType(T)->getAs()) return HasNoThrowOperator(RT, OO_Equal, Self, KeyLoc, C, &CXXRecordDecl::hasTrivialMoveAssignment, &CXXRecordDecl::hasNonTrivialMoveAssignment, &CXXMethodDecl::isMoveAssignmentOperator); return false; case UTT_HasNothrowCopy: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html: // If __has_trivial_copy (type) is true then the trait is true, else // if type is a cv class or union type with copy constructors that are // known not to throw an exception then the trait is true, else it is // false. if (T.isPODType(C) || T->isReferenceType() || T->isObjCLifetimeType()) return true; if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) { if (RD->hasTrivialCopyConstructor() && !RD->hasNonTrivialCopyConstructor()) return true; bool FoundConstructor = false; unsigned FoundTQs; for (const auto *ND : Self.LookupConstructors(RD)) { // A template constructor is never a copy constructor. // FIXME: However, it may actually be selected at the actual overload // resolution point. if (isa(ND->getUnderlyingDecl())) continue; // UsingDecl itself is not a constructor if (isa(ND)) continue; auto *Constructor = cast(ND->getUnderlyingDecl()); if (Constructor->isCopyConstructor(FoundTQs)) { FoundConstructor = true; const FunctionProtoType *CPT = Constructor->getType()->getAs(); CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT) return false; // TODO: check whether evaluating default arguments can throw. // For now, we'll be conservative and assume that they can throw. if (!CPT->isNothrow() || CPT->getNumParams() > 1) return false; } } return FoundConstructor; } return false; case UTT_HasNothrowConstructor: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html // If __has_trivial_constructor (type) is true then the trait is // true, else if type is a cv class or union type (or array // thereof) with a default constructor that is known not to // throw an exception then the trait is true, else it is false. if (T.isPODType(C) || T->isObjCLifetimeType()) return true; if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) { if (RD->hasTrivialDefaultConstructor() && !RD->hasNonTrivialDefaultConstructor()) return true; bool FoundConstructor = false; for (const auto *ND : Self.LookupConstructors(RD)) { // FIXME: In C++0x, a constructor template can be a default constructor. if (isa(ND->getUnderlyingDecl())) continue; // UsingDecl itself is not a constructor if (isa(ND)) continue; auto *Constructor = cast(ND->getUnderlyingDecl()); if (Constructor->isDefaultConstructor()) { FoundConstructor = true; const FunctionProtoType *CPT = Constructor->getType()->getAs(); CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT) return false; // FIXME: check whether evaluating default arguments can throw. // For now, we'll be conservative and assume that they can throw. if (!CPT->isNothrow() || CPT->getNumParams() > 0) return false; } } return FoundConstructor; } return false; case UTT_HasVirtualDestructor: // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html: // If type is a class type with a virtual destructor ([class.dtor]) // then the trait is true, else it is false. if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) if (CXXDestructorDecl *Destructor = Self.LookupDestructor(RD)) return Destructor->isVirtual(); return false; // These type trait expressions are modeled on the specifications for the // Embarcadero C++0x type trait functions: // http://docwiki.embarcadero.com/RADStudio/XE/en/Type_Trait_Functions_(C%2B%2B0x)_Index case UTT_IsCompleteType: // http://docwiki.embarcadero.com/RADStudio/XE/en/Is_complete_type_(typename_T_): // Returns True if and only if T is a complete type at the point of the // function call. return !T->isIncompleteType(); case UTT_HasUniqueObjectRepresentations: return C.hasUniqueObjectRepresentations(T); } } static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, QualType RhsT, SourceLocation KeyLoc); static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc, ArrayRef Args, SourceLocation RParenLoc) { if (Kind <= UTT_Last) return EvaluateUnaryTypeTrait(S, Kind, KWLoc, Args[0]->getType()); // Evaluate BTT_ReferenceBindsToTemporary alongside the IsConstructible // traits to avoid duplication. if (Kind <= BTT_Last && Kind != BTT_ReferenceBindsToTemporary) return EvaluateBinaryTypeTrait(S, Kind, Args[0]->getType(), Args[1]->getType(), RParenLoc); switch (Kind) { case clang::BTT_ReferenceBindsToTemporary: case clang::TT_IsConstructible: case clang::TT_IsNothrowConstructible: case clang::TT_IsTriviallyConstructible: { // C++11 [meta.unary.prop]: // is_trivially_constructible is defined as: // // is_constructible::value is true and the variable // definition for is_constructible, as defined below, is known to call // no operation that is not trivial. // // The predicate condition for a template specialization // is_constructible shall be satisfied if and only if the // following variable definition would be well-formed for some invented // variable t: // // T t(create()...); assert(!Args.empty()); // Precondition: T and all types in the parameter pack Args shall be // complete types, (possibly cv-qualified) void, or arrays of // unknown bound. for (const auto *TSI : Args) { QualType ArgTy = TSI->getType(); if (ArgTy->isVoidType() || ArgTy->isIncompleteArrayType()) continue; if (S.RequireCompleteType(KWLoc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr)) return false; } // Make sure the first argument is not incomplete nor a function type. QualType T = Args[0]->getType(); if (T->isIncompleteType() || T->isFunctionType()) return false; // Make sure the first argument is not an abstract type. CXXRecordDecl *RD = T->getAsCXXRecordDecl(); if (RD && RD->isAbstract()) return false; SmallVector OpaqueArgExprs; SmallVector ArgExprs; ArgExprs.reserve(Args.size() - 1); for (unsigned I = 1, N = Args.size(); I != N; ++I) { QualType ArgTy = Args[I]->getType(); if (ArgTy->isObjectType() || ArgTy->isFunctionType()) ArgTy = S.Context.getRValueReferenceType(ArgTy); OpaqueArgExprs.push_back( OpaqueValueExpr(Args[I]->getTypeLoc().getLocStart(), ArgTy.getNonLValueExprType(S.Context), Expr::getValueKindForType(ArgTy))); } for (Expr &E : OpaqueArgExprs) ArgExprs.push_back(&E); // Perform the initialization in an unevaluated context within a SFINAE // trap at translation unit scope. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::Unevaluated); Sema::SFINAETrap SFINAE(S, /*AccessCheckingSFINAE=*/true); Sema::ContextRAII TUContext(S, S.Context.getTranslationUnitDecl()); InitializedEntity To(InitializedEntity::InitializeTemporary(Args[0])); InitializationKind InitKind(InitializationKind::CreateDirect(KWLoc, KWLoc, RParenLoc)); InitializationSequence Init(S, To, InitKind, ArgExprs); if (Init.Failed()) return false; ExprResult Result = Init.Perform(S, To, InitKind, ArgExprs); if (Result.isInvalid() || SFINAE.hasErrorOccurred()) return false; if (Kind == clang::TT_IsConstructible) return true; if (Kind == clang::BTT_ReferenceBindsToTemporary) { if (!T->isReferenceType()) return false; return !Init.isDirectReferenceBinding(); } if (Kind == clang::TT_IsNothrowConstructible) return S.canThrow(Result.get()) == CT_Cannot; if (Kind == clang::TT_IsTriviallyConstructible) { // Under Objective-C ARC and Weak, if the destination has non-trivial // Objective-C lifetime, this is a non-trivial construction. if (T.getNonReferenceType().hasNonTrivialObjCLifetime()) return false; // The initialization succeeded; now make sure there are no non-trivial // calls. return !Result.get()->hasNonTrivialCall(S.Context); } llvm_unreachable("unhandled type trait"); return false; } default: llvm_unreachable("not a TT"); } return false; } ExprResult Sema::BuildTypeTrait(TypeTrait Kind, SourceLocation KWLoc, ArrayRef Args, SourceLocation RParenLoc) { QualType ResultType = Context.getLogicalOperationType(); if (Kind <= UTT_Last && !CheckUnaryTypeTraitTypeCompleteness( *this, Kind, KWLoc, Args[0]->getType())) return ExprError(); bool Dependent = false; for (unsigned I = 0, N = Args.size(); I != N; ++I) { if (Args[I]->getType()->isDependentType()) { Dependent = true; break; } } bool Result = false; if (!Dependent) Result = evaluateTypeTrait(*this, Kind, KWLoc, Args, RParenLoc); return TypeTraitExpr::Create(Context, ResultType, KWLoc, Kind, Args, RParenLoc, Result); } ExprResult Sema::ActOnTypeTrait(TypeTrait Kind, SourceLocation KWLoc, ArrayRef Args, SourceLocation RParenLoc) { SmallVector ConvertedArgs; ConvertedArgs.reserve(Args.size()); for (unsigned I = 0, N = Args.size(); I != N; ++I) { TypeSourceInfo *TInfo; QualType T = GetTypeFromParser(Args[I], &TInfo); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(T, KWLoc); ConvertedArgs.push_back(TInfo); } return BuildTypeTrait(Kind, KWLoc, ConvertedArgs, RParenLoc); } static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, QualType RhsT, SourceLocation KeyLoc) { assert(!LhsT->isDependentType() && !RhsT->isDependentType() && "Cannot evaluate traits of dependent types"); switch(BTT) { case BTT_IsBaseOf: { // C++0x [meta.rel]p2 // Base is a base class of Derived without regard to cv-qualifiers or // Base and Derived are not unions and name the same class type without // regard to cv-qualifiers. const RecordType *lhsRecord = LhsT->getAs(); const RecordType *rhsRecord = RhsT->getAs(); if (!rhsRecord || !lhsRecord) { const ObjCObjectType *LHSObjTy = LhsT->getAs(); const ObjCObjectType *RHSObjTy = RhsT->getAs(); if (!LHSObjTy || !RHSObjTy) return false; ObjCInterfaceDecl *BaseInterface = LHSObjTy->getInterface(); ObjCInterfaceDecl *DerivedInterface = RHSObjTy->getInterface(); if (!BaseInterface || !DerivedInterface) return false; if (Self.RequireCompleteType( KeyLoc, RhsT, diag::err_incomplete_type_used_in_type_trait_expr)) return false; return BaseInterface->isSuperClassOf(DerivedInterface); } assert(Self.Context.hasSameUnqualifiedType(LhsT, RhsT) == (lhsRecord == rhsRecord)); if (lhsRecord == rhsRecord) return !lhsRecord->getDecl()->isUnion(); // C++0x [meta.rel]p2: // If Base and Derived are class types and are different types // (ignoring possible cv-qualifiers) then Derived shall be a // complete type. if (Self.RequireCompleteType(KeyLoc, RhsT, diag::err_incomplete_type_used_in_type_trait_expr)) return false; return cast(rhsRecord->getDecl()) ->isDerivedFrom(cast(lhsRecord->getDecl())); } case BTT_IsSame: return Self.Context.hasSameType(LhsT, RhsT); case BTT_TypeCompatible: { // GCC ignores cv-qualifiers on arrays for this builtin. Qualifiers LhsQuals, RhsQuals; QualType Lhs = Self.getASTContext().getUnqualifiedArrayType(LhsT, LhsQuals); QualType Rhs = Self.getASTContext().getUnqualifiedArrayType(RhsT, RhsQuals); return Self.Context.typesAreCompatible(Lhs, Rhs); } case BTT_IsConvertible: case BTT_IsConvertibleTo: { // C++0x [meta.rel]p4: // Given the following function prototype: // // template // typename add_rvalue_reference::type create(); // // the predicate condition for a template specialization // is_convertible shall be satisfied if and only if // the return expression in the following code would be // well-formed, including any implicit conversions to the return // type of the function: // // To test() { // return create(); // } // // Access checking is performed as if in a context unrelated to To and // From. Only the validity of the immediate context of the expression // of the return-statement (including conversions to the return type) // is considered. // // We model the initialization as a copy-initialization of a temporary // of the appropriate type, which for this expression is identical to the // return statement (since NRVO doesn't apply). // Functions aren't allowed to return function or array types. if (RhsT->isFunctionType() || RhsT->isArrayType()) return false; // A return statement in a void function must have void type. if (RhsT->isVoidType()) return LhsT->isVoidType(); // A function definition requires a complete, non-abstract return type. if (!Self.isCompleteType(KeyLoc, RhsT) || Self.isAbstractType(KeyLoc, RhsT)) return false; // Compute the result of add_rvalue_reference. if (LhsT->isObjectType() || LhsT->isFunctionType()) LhsT = Self.Context.getRValueReferenceType(LhsT); // Build a fake source and destination for initialization. InitializedEntity To(InitializedEntity::InitializeTemporary(RhsT)); OpaqueValueExpr From(KeyLoc, LhsT.getNonLValueExprType(Self.Context), Expr::getValueKindForType(LhsT)); Expr *FromPtr = &From; InitializationKind Kind(InitializationKind::CreateCopy(KeyLoc, SourceLocation())); // Perform the initialization in an unevaluated context within a SFINAE // trap at translation unit scope. EnterExpressionEvaluationContext Unevaluated( Self, Sema::ExpressionEvaluationContext::Unevaluated); Sema::SFINAETrap SFINAE(Self, /*AccessCheckingSFINAE=*/true); Sema::ContextRAII TUContext(Self, Self.Context.getTranslationUnitDecl()); InitializationSequence Init(Self, To, Kind, FromPtr); if (Init.Failed()) return false; ExprResult Result = Init.Perform(Self, To, Kind, FromPtr); return !Result.isInvalid() && !SFINAE.hasErrorOccurred(); } case BTT_IsAssignable: case BTT_IsNothrowAssignable: case BTT_IsTriviallyAssignable: { // C++11 [meta.unary.prop]p3: // is_trivially_assignable is defined as: // is_assignable::value is true and the assignment, as defined by // is_assignable, is known to call no operation that is not trivial // // is_assignable is defined as: // The expression declval() = declval() is well-formed when // treated as an unevaluated operand (Clause 5). // // For both, T and U shall be complete types, (possibly cv-qualified) // void, or arrays of unknown bound. if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType() && Self.RequireCompleteType(KeyLoc, LhsT, diag::err_incomplete_type_used_in_type_trait_expr)) return false; if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType() && Self.RequireCompleteType(KeyLoc, RhsT, diag::err_incomplete_type_used_in_type_trait_expr)) return false; // cv void is never assignable. if (LhsT->isVoidType() || RhsT->isVoidType()) return false; // Build expressions that emulate the effect of declval() and // declval(). if (LhsT->isObjectType() || LhsT->isFunctionType()) LhsT = Self.Context.getRValueReferenceType(LhsT); if (RhsT->isObjectType() || RhsT->isFunctionType()) RhsT = Self.Context.getRValueReferenceType(RhsT); OpaqueValueExpr Lhs(KeyLoc, LhsT.getNonLValueExprType(Self.Context), Expr::getValueKindForType(LhsT)); OpaqueValueExpr Rhs(KeyLoc, RhsT.getNonLValueExprType(Self.Context), Expr::getValueKindForType(RhsT)); // Attempt the assignment in an unevaluated context within a SFINAE // trap at translation unit scope. EnterExpressionEvaluationContext Unevaluated( Self, Sema::ExpressionEvaluationContext::Unevaluated); Sema::SFINAETrap SFINAE(Self, /*AccessCheckingSFINAE=*/true); Sema::ContextRAII TUContext(Self, Self.Context.getTranslationUnitDecl()); ExprResult Result = Self.BuildBinOp(/*S=*/nullptr, KeyLoc, BO_Assign, &Lhs, &Rhs); if (Result.isInvalid() || SFINAE.hasErrorOccurred()) return false; if (BTT == BTT_IsAssignable) return true; if (BTT == BTT_IsNothrowAssignable) return Self.canThrow(Result.get()) == CT_Cannot; if (BTT == BTT_IsTriviallyAssignable) { // Under Objective-C ARC and Weak, if the destination has non-trivial // Objective-C lifetime, this is a non-trivial assignment. if (LhsT.getNonReferenceType().hasNonTrivialObjCLifetime()) return false; return !Result.get()->hasNonTrivialCall(Self.Context); } llvm_unreachable("unhandled type trait"); return false; } default: llvm_unreachable("not a BTT"); } llvm_unreachable("Unknown type trait or not implemented"); } ExprResult Sema::ActOnArrayTypeTrait(ArrayTypeTrait ATT, SourceLocation KWLoc, ParsedType Ty, Expr* DimExpr, SourceLocation RParen) { TypeSourceInfo *TSInfo; QualType T = GetTypeFromParser(Ty, &TSInfo); if (!TSInfo) TSInfo = Context.getTrivialTypeSourceInfo(T); return BuildArrayTypeTrait(ATT, KWLoc, TSInfo, DimExpr, RParen); } static uint64_t EvaluateArrayTypeTrait(Sema &Self, ArrayTypeTrait ATT, QualType T, Expr *DimExpr, SourceLocation KeyLoc) { assert(!T->isDependentType() && "Cannot evaluate traits of dependent type"); switch(ATT) { case ATT_ArrayRank: if (T->isArrayType()) { unsigned Dim = 0; while (const ArrayType *AT = Self.Context.getAsArrayType(T)) { ++Dim; T = AT->getElementType(); } return Dim; } return 0; case ATT_ArrayExtent: { llvm::APSInt Value; uint64_t Dim; if (Self.VerifyIntegerConstantExpression(DimExpr, &Value, diag::err_dimension_expr_not_constant_integer, false).isInvalid()) return 0; if (Value.isSigned() && Value.isNegative()) { Self.Diag(KeyLoc, diag::err_dimension_expr_not_constant_integer) << DimExpr->getSourceRange(); return 0; } Dim = Value.getLimitedValue(); if (T->isArrayType()) { unsigned D = 0; bool Matched = false; while (const ArrayType *AT = Self.Context.getAsArrayType(T)) { if (Dim == D) { Matched = true; break; } ++D; T = AT->getElementType(); } if (Matched && T->isArrayType()) { if (const ConstantArrayType *CAT = Self.Context.getAsConstantArrayType(T)) return CAT->getSize().getLimitedValue(); } } return 0; } } llvm_unreachable("Unknown type trait or not implemented"); } ExprResult Sema::BuildArrayTypeTrait(ArrayTypeTrait ATT, SourceLocation KWLoc, TypeSourceInfo *TSInfo, Expr* DimExpr, SourceLocation RParen) { QualType T = TSInfo->getType(); // FIXME: This should likely be tracked as an APInt to remove any host // assumptions about the width of size_t on the target. uint64_t Value = 0; if (!T->isDependentType()) Value = EvaluateArrayTypeTrait(*this, ATT, T, DimExpr, KWLoc); // While the specification for these traits from the Embarcadero C++ // compiler's documentation says the return type is 'unsigned int', Clang // returns 'size_t'. On Windows, the primary platform for the Embarcadero // compiler, there is no difference. On several other platforms this is an // important distinction. return new (Context) ArrayTypeTraitExpr(KWLoc, ATT, TSInfo, Value, DimExpr, RParen, Context.getSizeType()); } ExprResult Sema::ActOnExpressionTrait(ExpressionTrait ET, SourceLocation KWLoc, Expr *Queried, SourceLocation RParen) { // If error parsing the expression, ignore. if (!Queried) return ExprError(); ExprResult Result = BuildExpressionTrait(ET, KWLoc, Queried, RParen); return Result; } static bool EvaluateExpressionTrait(ExpressionTrait ET, Expr *E) { switch (ET) { case ET_IsLValueExpr: return E->isLValue(); case ET_IsRValueExpr: return E->isRValue(); } llvm_unreachable("Expression trait not covered by switch"); } ExprResult Sema::BuildExpressionTrait(ExpressionTrait ET, SourceLocation KWLoc, Expr *Queried, SourceLocation RParen) { if (Queried->isTypeDependent()) { // Delay type-checking for type-dependent expressions. } else if (Queried->getType()->isPlaceholderType()) { ExprResult PE = CheckPlaceholderExpr(Queried); if (PE.isInvalid()) return ExprError(); return BuildExpressionTrait(ET, KWLoc, PE.get(), RParen); } bool Value = EvaluateExpressionTrait(ET, Queried); return new (Context) ExpressionTraitExpr(KWLoc, ET, Queried, Value, RParen, Context.BoolTy); } QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, SourceLocation Loc, bool isIndirect) { assert(!LHS.get()->getType()->isPlaceholderType() && !RHS.get()->getType()->isPlaceholderType() && "placeholders should have been weeded out by now"); // The LHS undergoes lvalue conversions if this is ->*, and undergoes the // temporary materialization conversion otherwise. if (isIndirect) LHS = DefaultLvalueConversion(LHS.get()); else if (LHS.get()->isRValue()) LHS = TemporaryMaterializationConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); // The RHS always undergoes lvalue conversions. RHS = DefaultLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); const char *OpSpelling = isIndirect ? "->*" : ".*"; // C++ 5.5p2 // The binary operator .* [p3: ->*] binds its second operand, which shall // be of type "pointer to member of T" (where T is a completely-defined // class type) [...] QualType RHSType = RHS.get()->getType(); const MemberPointerType *MemPtr = RHSType->getAs(); if (!MemPtr) { Diag(Loc, diag::err_bad_memptr_rhs) << OpSpelling << RHSType << RHS.get()->getSourceRange(); return QualType(); } QualType Class(MemPtr->getClass(), 0); // Note: C++ [expr.mptr.oper]p2-3 says that the class type into which the // member pointer points must be completely-defined. However, there is no // reason for this semantic distinction, and the rule is not enforced by // other compilers. Therefore, we do not check this property, as it is // likely to be considered a defect. // C++ 5.5p2 // [...] to its first operand, which shall be of class T or of a class of // which T is an unambiguous and accessible base class. [p3: a pointer to // such a class] QualType LHSType = LHS.get()->getType(); if (isIndirect) { if (const PointerType *Ptr = LHSType->getAs()) LHSType = Ptr->getPointeeType(); else { Diag(Loc, diag::err_bad_memptr_lhs) << OpSpelling << 1 << LHSType << FixItHint::CreateReplacement(SourceRange(Loc), ".*"); return QualType(); } } if (!Context.hasSameUnqualifiedType(Class, LHSType)) { // If we want to check the hierarchy, we need a complete type. if (RequireCompleteType(Loc, LHSType, diag::err_bad_memptr_lhs, OpSpelling, (int)isIndirect)) { return QualType(); } if (!IsDerivedFrom(Loc, LHSType, Class)) { Diag(Loc, diag::err_bad_memptr_lhs) << OpSpelling << (int)isIndirect << LHS.get()->getType(); return QualType(); } CXXCastPath BasePath; if (CheckDerivedToBaseConversion(LHSType, Class, Loc, SourceRange(LHS.get()->getLocStart(), RHS.get()->getLocEnd()), &BasePath)) return QualType(); // Cast LHS to type of use. QualType UseType = Context.getQualifiedType(Class, LHSType.getQualifiers()); if (isIndirect) UseType = Context.getPointerType(UseType); ExprValueKind VK = isIndirect ? VK_RValue : LHS.get()->getValueKind(); LHS = ImpCastExprToType(LHS.get(), UseType, CK_DerivedToBase, VK, &BasePath); } if (isa(RHS.get()->IgnoreParens())) { // Diagnose use of pointer-to-member type which when used as // the functional cast in a pointer-to-member expression. Diag(Loc, diag::err_pointer_to_member_type) << isIndirect; return QualType(); } // C++ 5.5p2 // The result is an object or a function of the type specified by the // second operand. // The cv qualifiers are the union of those in the pointer and the left side, // in accordance with 5.5p5 and 5.2.5. QualType Result = MemPtr->getPointeeType(); Result = Context.getCVRQualifiedType(Result, LHSType.getCVRQualifiers()); // C++0x [expr.mptr.oper]p6: // In a .* expression whose object expression is an rvalue, the program is // ill-formed if the second operand is a pointer to member function with // ref-qualifier &. In a ->* expression or in a .* expression whose object // expression is an lvalue, the program is ill-formed if the second operand // is a pointer to member function with ref-qualifier &&. if (const FunctionProtoType *Proto = Result->getAs()) { switch (Proto->getRefQualifier()) { case RQ_None: // Do nothing break; case RQ_LValue: if (!isIndirect && !LHS.get()->Classify(Context).isLValue()) { // C++2a allows functions with ref-qualifier & if their cv-qualifier-seq // is (exactly) 'const'. if (Proto->isConst() && !Proto->isVolatile()) Diag(Loc, getLangOpts().CPlusPlus2a ? diag::warn_cxx17_compat_pointer_to_const_ref_member_on_rvalue : diag::ext_pointer_to_const_ref_member_on_rvalue); else Diag(Loc, diag::err_pointer_to_member_oper_value_classify) << RHSType << 1 << LHS.get()->getSourceRange(); } break; case RQ_RValue: if (isIndirect || !LHS.get()->Classify(Context).isRValue()) Diag(Loc, diag::err_pointer_to_member_oper_value_classify) << RHSType << 0 << LHS.get()->getSourceRange(); break; } } // C++ [expr.mptr.oper]p6: // The result of a .* expression whose second operand is a pointer // to a data member is of the same value category as its // first operand. The result of a .* expression whose second // operand is a pointer to a member function is a prvalue. The // result of an ->* expression is an lvalue if its second operand // is a pointer to data member and a prvalue otherwise. if (Result->isFunctionType()) { VK = VK_RValue; return Context.BoundMemberTy; } else if (isIndirect) { VK = VK_LValue; } else { VK = LHS.get()->getValueKind(); } return Result; } /// Try to convert a type to another according to C++11 5.16p3. /// /// This is part of the parameter validation for the ? operator. If either /// value operand is a class type, the two operands are attempted to be /// converted to each other. This function does the conversion in one direction. /// It returns true if the program is ill-formed and has already been diagnosed /// as such. static bool TryClassUnification(Sema &Self, Expr *From, Expr *To, SourceLocation QuestionLoc, bool &HaveConversion, QualType &ToType) { HaveConversion = false; ToType = To->getType(); InitializationKind Kind = InitializationKind::CreateCopy(To->getLocStart(), SourceLocation()); // C++11 5.16p3 // The process for determining whether an operand expression E1 of type T1 // can be converted to match an operand expression E2 of type T2 is defined // as follows: // -- If E2 is an lvalue: E1 can be converted to match E2 if E1 can be // implicitly converted to type "lvalue reference to T2", subject to the // constraint that in the conversion the reference must bind directly to // an lvalue. // -- If E2 is an xvalue: E1 can be converted to match E2 if E1 can be // implicitly converted to the type "rvalue reference to R2", subject to // the constraint that the reference must bind directly. if (To->isLValue() || To->isXValue()) { QualType T = To->isLValue() ? Self.Context.getLValueReferenceType(ToType) : Self.Context.getRValueReferenceType(ToType); InitializedEntity Entity = InitializedEntity::InitializeTemporary(T); InitializationSequence InitSeq(Self, Entity, Kind, From); if (InitSeq.isDirectReferenceBinding()) { ToType = T; HaveConversion = true; return false; } if (InitSeq.isAmbiguous()) return InitSeq.Diagnose(Self, Entity, Kind, From); } // -- If E2 is an rvalue, or if the conversion above cannot be done: // -- if E1 and E2 have class type, and the underlying class types are // the same or one is a base class of the other: QualType FTy = From->getType(); QualType TTy = To->getType(); const RecordType *FRec = FTy->getAs(); const RecordType *TRec = TTy->getAs(); bool FDerivedFromT = FRec && TRec && FRec != TRec && Self.IsDerivedFrom(QuestionLoc, FTy, TTy); if (FRec && TRec && (FRec == TRec || FDerivedFromT || Self.IsDerivedFrom(QuestionLoc, TTy, FTy))) { // E1 can be converted to match E2 if the class of T2 is the // same type as, or a base class of, the class of T1, and // [cv2 > cv1]. if (FRec == TRec || FDerivedFromT) { if (TTy.isAtLeastAsQualifiedAs(FTy)) { InitializedEntity Entity = InitializedEntity::InitializeTemporary(TTy); InitializationSequence InitSeq(Self, Entity, Kind, From); if (InitSeq) { HaveConversion = true; return false; } if (InitSeq.isAmbiguous()) return InitSeq.Diagnose(Self, Entity, Kind, From); } } return false; } // -- Otherwise: E1 can be converted to match E2 if E1 can be // implicitly converted to the type that expression E2 would have // if E2 were converted to an rvalue (or the type it has, if E2 is // an rvalue). // // This actually refers very narrowly to the lvalue-to-rvalue conversion, not // to the array-to-pointer or function-to-pointer conversions. TTy = TTy.getNonLValueExprType(Self.Context); InitializedEntity Entity = InitializedEntity::InitializeTemporary(TTy); InitializationSequence InitSeq(Self, Entity, Kind, From); HaveConversion = !InitSeq.Failed(); ToType = TTy; if (InitSeq.isAmbiguous()) return InitSeq.Diagnose(Self, Entity, Kind, From); return false; } /// Try to find a common type for two according to C++0x 5.16p5. /// /// This is part of the parameter validation for the ? operator. If either /// value operand is a class type, overload resolution is used to find a /// conversion to a common type. static bool FindConditionalOverload(Sema &Self, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { Expr *Args[2] = { LHS.get(), RHS.get() }; OverloadCandidateSet CandidateSet(QuestionLoc, OverloadCandidateSet::CSK_Operator); Self.AddBuiltinOperatorCandidates(OO_Conditional, QuestionLoc, Args, CandidateSet); OverloadCandidateSet::iterator Best; switch (CandidateSet.BestViableFunction(Self, QuestionLoc, Best)) { case OR_Success: { // We found a match. Perform the conversions on the arguments and move on. ExprResult LHSRes = Self.PerformImplicitConversion( LHS.get(), Best->BuiltinParamTypes[0], Best->Conversions[0], Sema::AA_Converting); if (LHSRes.isInvalid()) break; LHS = LHSRes; ExprResult RHSRes = Self.PerformImplicitConversion( RHS.get(), Best->BuiltinParamTypes[1], Best->Conversions[1], Sema::AA_Converting); if (RHSRes.isInvalid()) break; RHS = RHSRes; if (Best->Function) Self.MarkFunctionReferenced(QuestionLoc, Best->Function); return false; } case OR_No_Viable_Function: // Emit a better diagnostic if one of the expressions is a null pointer // constant and the other is a pointer type. In this case, the user most // likely forgot to take the address of the other expression. if (Self.DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return true; Self.Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return true; case OR_Ambiguous: Self.Diag(QuestionLoc, diag::err_conditional_ambiguous_ovl) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); // FIXME: Print the possible common types by printing the return types of // the viable candidates. break; case OR_Deleted: llvm_unreachable("Conditional operator has only built-in overloads"); } return true; } /// Perform an "extended" implicit conversion as returned by /// TryClassUnification. static bool ConvertForConditional(Sema &Self, ExprResult &E, QualType T) { InitializedEntity Entity = InitializedEntity::InitializeTemporary(T); InitializationKind Kind = InitializationKind::CreateCopy(E.get()->getLocStart(), SourceLocation()); Expr *Arg = E.get(); InitializationSequence InitSeq(Self, Entity, Kind, Arg); ExprResult Result = InitSeq.Perform(Self, Entity, Kind, Arg); if (Result.isInvalid()) return true; E = Result; return false; } /// Check the operands of ?: under C++ semantics. /// /// See C++ [expr.cond]. Note that LHS is never null, even for the GNU x ?: y /// extension. In this case, LHS == Cond. (But they're not aliases.) QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation QuestionLoc) { // FIXME: Handle C99's complex types, vector types, block pointers and Obj-C++ // interface pointers. // C++11 [expr.cond]p1 // The first expression is contextually converted to bool. // // FIXME; GCC's vector extension permits the use of a?b:c where the type of // a is that of a integer vector with the same number of elements and // size as the vectors of b and c. If one of either b or c is a scalar // it is implicitly converted to match the type of the vector. // Otherwise the expression is ill-formed. If both b and c are scalars, // then b and c are checked and converted to the type of a if possible. // Unlike the OpenCL ?: operator, the expression is evaluated as // (a[0] != 0 ? b[0] : c[0], .. , a[n] != 0 ? b[n] : c[n]). if (!Cond.get()->isTypeDependent()) { ExprResult CondRes = CheckCXXBooleanCondition(Cond.get()); if (CondRes.isInvalid()) return QualType(); Cond = CondRes; } // Assume r-value. VK = VK_RValue; OK = OK_Ordinary; // Either of the arguments dependent? if (LHS.get()->isTypeDependent() || RHS.get()->isTypeDependent()) return Context.DependentTy; // C++11 [expr.cond]p2 // If either the second or the third operand has type (cv) void, ... QualType LTy = LHS.get()->getType(); QualType RTy = RHS.get()->getType(); bool LVoid = LTy->isVoidType(); bool RVoid = RTy->isVoidType(); if (LVoid || RVoid) { // ... one of the following shall hold: // -- The second or the third operand (but not both) is a (possibly // parenthesized) throw-expression; the result is of the type // and value category of the other. bool LThrow = isa(LHS.get()->IgnoreParenImpCasts()); bool RThrow = isa(RHS.get()->IgnoreParenImpCasts()); if (LThrow != RThrow) { Expr *NonThrow = LThrow ? RHS.get() : LHS.get(); VK = NonThrow->getValueKind(); // DR (no number yet): the result is a bit-field if the // non-throw-expression operand is a bit-field. OK = NonThrow->getObjectKind(); return NonThrow->getType(); } // -- Both the second and third operands have type void; the result is of // type void and is a prvalue. if (LVoid && RVoid) return Context.VoidTy; // Neither holds, error. Diag(QuestionLoc, diag::err_conditional_void_nonvoid) << (LVoid ? RTy : LTy) << (LVoid ? 0 : 1) << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // Neither is void. // C++11 [expr.cond]p3 // Otherwise, if the second and third operand have different types, and // either has (cv) class type [...] an attempt is made to convert each of // those operands to the type of the other. if (!Context.hasSameType(LTy, RTy) && (LTy->isRecordType() || RTy->isRecordType())) { // These return true if a single direction is already ambiguous. QualType L2RType, R2LType; bool HaveL2R, HaveR2L; if (TryClassUnification(*this, LHS.get(), RHS.get(), QuestionLoc, HaveL2R, L2RType)) return QualType(); if (TryClassUnification(*this, RHS.get(), LHS.get(), QuestionLoc, HaveR2L, R2LType)) return QualType(); // If both can be converted, [...] the program is ill-formed. if (HaveL2R && HaveR2L) { Diag(QuestionLoc, diag::err_conditional_ambiguous) << LTy << RTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // If exactly one conversion is possible, that conversion is applied to // the chosen operand and the converted operands are used in place of the // original operands for the remainder of this section. if (HaveL2R) { if (ConvertForConditional(*this, LHS, L2RType) || LHS.isInvalid()) return QualType(); LTy = LHS.get()->getType(); } else if (HaveR2L) { if (ConvertForConditional(*this, RHS, R2LType) || RHS.isInvalid()) return QualType(); RTy = RHS.get()->getType(); } } // C++11 [expr.cond]p3 // if both are glvalues of the same value category and the same type except // for cv-qualification, an attempt is made to convert each of those // operands to the type of the other. // FIXME: // Resolving a defect in P0012R1: we extend this to cover all cases where // one of the operands is reference-compatible with the other, in order // to support conditionals between functions differing in noexcept. ExprValueKind LVK = LHS.get()->getValueKind(); ExprValueKind RVK = RHS.get()->getValueKind(); if (!Context.hasSameType(LTy, RTy) && LVK == RVK && LVK != VK_RValue) { // DerivedToBase was already handled by the class-specific case above. // FIXME: Should we allow ObjC conversions here? bool DerivedToBase, ObjCConversion, ObjCLifetimeConversion; if (CompareReferenceRelationship( QuestionLoc, LTy, RTy, DerivedToBase, ObjCConversion, ObjCLifetimeConversion) == Ref_Compatible && !DerivedToBase && !ObjCConversion && !ObjCLifetimeConversion && // [...] subject to the constraint that the reference must bind // directly [...] !RHS.get()->refersToBitField() && !RHS.get()->refersToVectorElement()) { RHS = ImpCastExprToType(RHS.get(), LTy, CK_NoOp, RVK); RTy = RHS.get()->getType(); } else if (CompareReferenceRelationship( QuestionLoc, RTy, LTy, DerivedToBase, ObjCConversion, ObjCLifetimeConversion) == Ref_Compatible && !DerivedToBase && !ObjCConversion && !ObjCLifetimeConversion && !LHS.get()->refersToBitField() && !LHS.get()->refersToVectorElement()) { LHS = ImpCastExprToType(LHS.get(), RTy, CK_NoOp, LVK); LTy = LHS.get()->getType(); } } // C++11 [expr.cond]p4 // If the second and third operands are glvalues of the same value // category and have the same type, the result is of that type and // value category and it is a bit-field if the second or the third // operand is a bit-field, or if both are bit-fields. // We only extend this to bitfields, not to the crazy other kinds of // l-values. bool Same = Context.hasSameType(LTy, RTy); if (Same && LVK == RVK && LVK != VK_RValue && LHS.get()->isOrdinaryOrBitFieldObject() && RHS.get()->isOrdinaryOrBitFieldObject()) { VK = LHS.get()->getValueKind(); if (LHS.get()->getObjectKind() == OK_BitField || RHS.get()->getObjectKind() == OK_BitField) OK = OK_BitField; // If we have function pointer types, unify them anyway to unify their // exception specifications, if any. if (LTy->isFunctionPointerType() || LTy->isMemberFunctionPointerType()) { Qualifiers Qs = LTy.getQualifiers(); LTy = FindCompositePointerType(QuestionLoc, LHS, RHS, /*ConvertArgs*/false); LTy = Context.getQualifiedType(LTy, Qs); assert(!LTy.isNull() && "failed to find composite pointer type for " "canonically equivalent function ptr types"); assert(Context.hasSameType(LTy, RTy) && "bad composite pointer type"); } return LTy; } // C++11 [expr.cond]p5 // Otherwise, the result is a prvalue. If the second and third operands // do not have the same type, and either has (cv) class type, ... if (!Same && (LTy->isRecordType() || RTy->isRecordType())) { // ... overload resolution is used to determine the conversions (if any) // to be applied to the operands. If the overload resolution fails, the // program is ill-formed. if (FindConditionalOverload(*this, LHS, RHS, QuestionLoc)) return QualType(); } // C++11 [expr.cond]p6 // Lvalue-to-rvalue, array-to-pointer, and function-to-pointer standard // conversions are performed on the second and third operands. LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); LTy = LHS.get()->getType(); RTy = RHS.get()->getType(); // After those conversions, one of the following shall hold: // -- The second and third operands have the same type; the result // is of that type. If the operands have class type, the result // is a prvalue temporary of the result type, which is // copy-initialized from either the second operand or the third // operand depending on the value of the first operand. if (Context.getCanonicalType(LTy) == Context.getCanonicalType(RTy)) { if (LTy->isRecordType()) { // The operands have class type. Make a temporary copy. InitializedEntity Entity = InitializedEntity::InitializeTemporary(LTy); ExprResult LHSCopy = PerformCopyInitialization(Entity, SourceLocation(), LHS); if (LHSCopy.isInvalid()) return QualType(); ExprResult RHSCopy = PerformCopyInitialization(Entity, SourceLocation(), RHS); if (RHSCopy.isInvalid()) return QualType(); LHS = LHSCopy; RHS = RHSCopy; } // If we have function pointer types, unify them anyway to unify their // exception specifications, if any. if (LTy->isFunctionPointerType() || LTy->isMemberFunctionPointerType()) { LTy = FindCompositePointerType(QuestionLoc, LHS, RHS); assert(!LTy.isNull() && "failed to find composite pointer type for " "canonically equivalent function ptr types"); } return LTy; } // Extension: conditional operator involving vector types. if (LTy->isVectorType() || RTy->isVectorType()) return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); // -- The second and third operands have arithmetic or enumeration type; // the usual arithmetic conversions are performed to bring them to a // common type, and the result is of that type. if (LTy->isArithmeticType() && RTy->isArithmeticType()) { QualType ResTy = UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (ResTy.isNull()) { Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LTy << RTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy)); RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy)); return ResTy; } // -- The second and third operands have pointer type, or one has pointer // type and the other is a null pointer constant, or both are null // pointer constants, at least one of which is non-integral; pointer // conversions and qualification conversions are performed to bring them // to their composite pointer type. The result is of the composite // pointer type. // -- The second and third operands have pointer to member type, or one has // pointer to member type and the other is a null pointer constant; // pointer to member conversions and qualification conversions are // performed to bring them to a common type, whose cv-qualification // shall match the cv-qualification of either the second or the third // operand. The result is of the common type. QualType Composite = FindCompositePointerType(QuestionLoc, LHS, RHS); if (!Composite.isNull()) return Composite; // Similarly, attempt to find composite type of two objective-c pointers. Composite = FindCompositeObjCPointerType(LHS, RHS, QuestionLoc); if (!Composite.isNull()) return Composite; // Check if we are using a null with a non-pointer type. if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return QualType(); Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } static FunctionProtoType::ExceptionSpecInfo mergeExceptionSpecs(Sema &S, FunctionProtoType::ExceptionSpecInfo ESI1, FunctionProtoType::ExceptionSpecInfo ESI2, SmallVectorImpl &ExceptionTypeStorage) { ExceptionSpecificationType EST1 = ESI1.Type; ExceptionSpecificationType EST2 = ESI2.Type; // If either of them can throw anything, that is the result. if (EST1 == EST_None) return ESI1; if (EST2 == EST_None) return ESI2; if (EST1 == EST_MSAny) return ESI1; if (EST2 == EST_MSAny) return ESI2; if (EST1 == EST_NoexceptFalse) return ESI1; if (EST2 == EST_NoexceptFalse) return ESI2; // If either of them is non-throwing, the result is the other. if (EST1 == EST_DynamicNone) return ESI2; if (EST2 == EST_DynamicNone) return ESI1; if (EST1 == EST_BasicNoexcept) return ESI2; if (EST2 == EST_BasicNoexcept) return ESI1; if (EST1 == EST_NoexceptTrue) return ESI2; if (EST2 == EST_NoexceptTrue) return ESI1; // If we're left with value-dependent computed noexcept expressions, we're // stuck. Before C++17, we can just drop the exception specification entirely, // since it's not actually part of the canonical type. And this should never // happen in C++17, because it would mean we were computing the composite // pointer type of dependent types, which should never happen. if (EST1 == EST_DependentNoexcept || EST2 == EST_DependentNoexcept) { assert(!S.getLangOpts().CPlusPlus17 && "computing composite pointer type of dependent types"); return FunctionProtoType::ExceptionSpecInfo(); } // Switch over the possibilities so that people adding new values know to // update this function. switch (EST1) { case EST_None: case EST_DynamicNone: case EST_MSAny: case EST_BasicNoexcept: case EST_DependentNoexcept: case EST_NoexceptFalse: case EST_NoexceptTrue: llvm_unreachable("handled above"); case EST_Dynamic: { // This is the fun case: both exception specifications are dynamic. Form // the union of the two lists. assert(EST2 == EST_Dynamic && "other cases should already be handled"); llvm::SmallPtrSet Found; for (auto &Exceptions : {ESI1.Exceptions, ESI2.Exceptions}) for (QualType E : Exceptions) if (Found.insert(S.Context.getCanonicalType(E)).second) ExceptionTypeStorage.push_back(E); FunctionProtoType::ExceptionSpecInfo Result(EST_Dynamic); Result.Exceptions = ExceptionTypeStorage; return Result; } case EST_Unevaluated: case EST_Uninstantiated: case EST_Unparsed: llvm_unreachable("shouldn't see unresolved exception specifications here"); } llvm_unreachable("invalid ExceptionSpecificationType"); } /// Find a merged pointer type and convert the two expressions to it. /// /// This finds the composite pointer type (or member pointer type) for @p E1 /// and @p E2 according to C++1z 5p14. It converts both expressions to this /// type and returns it. /// It does not emit diagnostics. /// /// \param Loc The location of the operator requiring these two expressions to /// be converted to the composite pointer type. /// /// \param ConvertArgs If \c false, do not convert E1 and E2 to the target type. QualType Sema::FindCompositePointerType(SourceLocation Loc, Expr *&E1, Expr *&E2, bool ConvertArgs) { assert(getLangOpts().CPlusPlus && "This function assumes C++"); // C++1z [expr]p14: // The composite pointer type of two operands p1 and p2 having types T1 // and T2 QualType T1 = E1->getType(), T2 = E2->getType(); // where at least one is a pointer or pointer to member type or // std::nullptr_t is: bool T1IsPointerLike = T1->isAnyPointerType() || T1->isMemberPointerType() || T1->isNullPtrType(); bool T2IsPointerLike = T2->isAnyPointerType() || T2->isMemberPointerType() || T2->isNullPtrType(); if (!T1IsPointerLike && !T2IsPointerLike) return QualType(); // - if both p1 and p2 are null pointer constants, std::nullptr_t; // This can't actually happen, following the standard, but we also use this // to implement the end of [expr.conv], which hits this case. // // - if either p1 or p2 is a null pointer constant, T2 or T1, respectively; if (T1IsPointerLike && E2->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (ConvertArgs) E2 = ImpCastExprToType(E2, T1, T1->isMemberPointerType() ? CK_NullToMemberPointer : CK_NullToPointer).get(); return T1; } if (T2IsPointerLike && E1->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (ConvertArgs) E1 = ImpCastExprToType(E1, T2, T2->isMemberPointerType() ? CK_NullToMemberPointer : CK_NullToPointer).get(); return T2; } // Now both have to be pointers or member pointers. if (!T1IsPointerLike || !T2IsPointerLike) return QualType(); assert(!T1->isNullPtrType() && !T2->isNullPtrType() && "nullptr_t should be a null pointer constant"); // - if T1 or T2 is "pointer to cv1 void" and the other type is // "pointer to cv2 T", "pointer to cv12 void", where cv12 is // the union of cv1 and cv2; // - if T1 or T2 is "pointer to noexcept function" and the other type is // "pointer to function", where the function types are otherwise the same, // "pointer to function"; // FIXME: This rule is defective: it should also permit removing noexcept // from a pointer to member function. As a Clang extension, we also // permit removing 'noreturn', so we generalize this rule to; // - [Clang] If T1 and T2 are both of type "pointer to function" or // "pointer to member function" and the pointee types can be unified // by a function pointer conversion, that conversion is applied // before checking the following rules. // - if T1 is "pointer to cv1 C1" and T2 is "pointer to cv2 C2", where C1 // is reference-related to C2 or C2 is reference-related to C1 (8.6.3), // the cv-combined type of T1 and T2 or the cv-combined type of T2 and T1, // respectively; // - if T1 is "pointer to member of C1 of type cv1 U1" and T2 is "pointer // to member of C2 of type cv2 U2" where C1 is reference-related to C2 or // C2 is reference-related to C1 (8.6.3), the cv-combined type of T2 and // T1 or the cv-combined type of T1 and T2, respectively; // - if T1 and T2 are similar types (4.5), the cv-combined type of T1 and // T2; // // If looked at in the right way, these bullets all do the same thing. // What we do here is, we build the two possible cv-combined types, and try // the conversions in both directions. If only one works, or if the two // composite types are the same, we have succeeded. // FIXME: extended qualifiers? // // Note that this will fail to find a composite pointer type for "pointer // to void" and "pointer to function". We can't actually perform the final // conversion in this case, even though a composite pointer type formally // exists. SmallVector QualifierUnion; SmallVector, 4> MemberOfClass; QualType Composite1 = T1; QualType Composite2 = T2; unsigned NeedConstBefore = 0; while (true) { const PointerType *Ptr1, *Ptr2; if ((Ptr1 = Composite1->getAs()) && (Ptr2 = Composite2->getAs())) { Composite1 = Ptr1->getPointeeType(); Composite2 = Ptr2->getPointeeType(); // If we're allowed to create a non-standard composite type, keep track // of where we need to fill in additional 'const' qualifiers. if (Composite1.getCVRQualifiers() != Composite2.getCVRQualifiers()) NeedConstBefore = QualifierUnion.size(); QualifierUnion.push_back( Composite1.getCVRQualifiers() | Composite2.getCVRQualifiers()); MemberOfClass.push_back(std::make_pair(nullptr, nullptr)); continue; } const MemberPointerType *MemPtr1, *MemPtr2; if ((MemPtr1 = Composite1->getAs()) && (MemPtr2 = Composite2->getAs())) { Composite1 = MemPtr1->getPointeeType(); Composite2 = MemPtr2->getPointeeType(); // If we're allowed to create a non-standard composite type, keep track // of where we need to fill in additional 'const' qualifiers. if (Composite1.getCVRQualifiers() != Composite2.getCVRQualifiers()) NeedConstBefore = QualifierUnion.size(); QualifierUnion.push_back( Composite1.getCVRQualifiers() | Composite2.getCVRQualifiers()); MemberOfClass.push_back(std::make_pair(MemPtr1->getClass(), MemPtr2->getClass())); continue; } // FIXME: block pointer types? // Cannot unwrap any more types. break; } // Apply the function pointer conversion to unify the types. We've already // unwrapped down to the function types, and we want to merge rather than // just convert, so do this ourselves rather than calling // IsFunctionConversion. // // FIXME: In order to match the standard wording as closely as possible, we // currently only do this under a single level of pointers. Ideally, we would // allow this in general, and set NeedConstBefore to the relevant depth on // the side(s) where we changed anything. if (QualifierUnion.size() == 1) { if (auto *FPT1 = Composite1->getAs()) { if (auto *FPT2 = Composite2->getAs()) { FunctionProtoType::ExtProtoInfo EPI1 = FPT1->getExtProtoInfo(); FunctionProtoType::ExtProtoInfo EPI2 = FPT2->getExtProtoInfo(); // The result is noreturn if both operands are. bool Noreturn = EPI1.ExtInfo.getNoReturn() && EPI2.ExtInfo.getNoReturn(); EPI1.ExtInfo = EPI1.ExtInfo.withNoReturn(Noreturn); EPI2.ExtInfo = EPI2.ExtInfo.withNoReturn(Noreturn); // The result is nothrow if both operands are. SmallVector ExceptionTypeStorage; EPI1.ExceptionSpec = EPI2.ExceptionSpec = mergeExceptionSpecs(*this, EPI1.ExceptionSpec, EPI2.ExceptionSpec, ExceptionTypeStorage); Composite1 = Context.getFunctionType(FPT1->getReturnType(), FPT1->getParamTypes(), EPI1); Composite2 = Context.getFunctionType(FPT2->getReturnType(), FPT2->getParamTypes(), EPI2); } } } if (NeedConstBefore) { // Extension: Add 'const' to qualifiers that come before the first qualifier // mismatch, so that our (non-standard!) composite type meets the // requirements of C++ [conv.qual]p4 bullet 3. for (unsigned I = 0; I != NeedConstBefore; ++I) if ((QualifierUnion[I] & Qualifiers::Const) == 0) QualifierUnion[I] = QualifierUnion[I] | Qualifiers::Const; } // Rewrap the composites as pointers or member pointers with the union CVRs. auto MOC = MemberOfClass.rbegin(); for (unsigned CVR : llvm::reverse(QualifierUnion)) { Qualifiers Quals = Qualifiers::fromCVRMask(CVR); auto Classes = *MOC++; if (Classes.first && Classes.second) { // Rebuild member pointer type Composite1 = Context.getMemberPointerType( Context.getQualifiedType(Composite1, Quals), Classes.first); Composite2 = Context.getMemberPointerType( Context.getQualifiedType(Composite2, Quals), Classes.second); } else { // Rebuild pointer type Composite1 = Context.getPointerType(Context.getQualifiedType(Composite1, Quals)); Composite2 = Context.getPointerType(Context.getQualifiedType(Composite2, Quals)); } } struct Conversion { Sema &S; Expr *&E1, *&E2; QualType Composite; InitializedEntity Entity; InitializationKind Kind; InitializationSequence E1ToC, E2ToC; bool Viable; Conversion(Sema &S, SourceLocation Loc, Expr *&E1, Expr *&E2, QualType Composite) : S(S), E1(E1), E2(E2), Composite(Composite), Entity(InitializedEntity::InitializeTemporary(Composite)), Kind(InitializationKind::CreateCopy(Loc, SourceLocation())), E1ToC(S, Entity, Kind, E1), E2ToC(S, Entity, Kind, E2), Viable(E1ToC && E2ToC) {} bool perform() { ExprResult E1Result = E1ToC.Perform(S, Entity, Kind, E1); if (E1Result.isInvalid()) return true; E1 = E1Result.getAs(); ExprResult E2Result = E2ToC.Perform(S, Entity, Kind, E2); if (E2Result.isInvalid()) return true; E2 = E2Result.getAs(); return false; } }; // Try to convert to each composite pointer type. Conversion C1(*this, Loc, E1, E2, Composite1); if (C1.Viable && Context.hasSameType(Composite1, Composite2)) { if (ConvertArgs && C1.perform()) return QualType(); return C1.Composite; } Conversion C2(*this, Loc, E1, E2, Composite2); if (C1.Viable == C2.Viable) { // Either Composite1 and Composite2 are viable and are different, or // neither is viable. // FIXME: How both be viable and different? return QualType(); } // Convert to the chosen type. if (ConvertArgs && (C1.Viable ? C1 : C2).perform()) return QualType(); return C1.Viable ? C1.Composite : C2.Composite; } ExprResult Sema::MaybeBindToTemporary(Expr *E) { if (!E) return ExprError(); assert(!isa(E) && "Double-bound temporary?"); // If the result is a glvalue, we shouldn't bind it. if (!E->isRValue()) return E; // In ARC, calls that return a retainable type can return retained, // in which case we have to insert a consuming cast. if (getLangOpts().ObjCAutoRefCount && E->getType()->isObjCRetainableType()) { bool ReturnsRetained; // For actual calls, we compute this by examining the type of the // called value. if (CallExpr *Call = dyn_cast(E)) { Expr *Callee = Call->getCallee()->IgnoreParens(); QualType T = Callee->getType(); if (T == Context.BoundMemberTy) { // Handle pointer-to-members. if (BinaryOperator *BinOp = dyn_cast(Callee)) T = BinOp->getRHS()->getType(); else if (MemberExpr *Mem = dyn_cast(Callee)) T = Mem->getMemberDecl()->getType(); } if (const PointerType *Ptr = T->getAs()) T = Ptr->getPointeeType(); else if (const BlockPointerType *Ptr = T->getAs()) T = Ptr->getPointeeType(); else if (const MemberPointerType *MemPtr = T->getAs()) T = MemPtr->getPointeeType(); const FunctionType *FTy = T->getAs(); assert(FTy && "call to value not of function type?"); ReturnsRetained = FTy->getExtInfo().getProducesResult(); // ActOnStmtExpr arranges things so that StmtExprs of retainable // type always produce a +1 object. } else if (isa(E)) { ReturnsRetained = true; // We hit this case with the lambda conversion-to-block optimization; // we don't want any extra casts here. } else if (isa(E) && isa(cast(E)->getSubExpr())) { return E; // For message sends and property references, we try to find an // actual method. FIXME: we should infer retention by selector in // cases where we don't have an actual method. } else { ObjCMethodDecl *D = nullptr; if (ObjCMessageExpr *Send = dyn_cast(E)) { D = Send->getMethodDecl(); } else if (ObjCBoxedExpr *BoxedExpr = dyn_cast(E)) { D = BoxedExpr->getBoxingMethod(); } else if (ObjCArrayLiteral *ArrayLit = dyn_cast(E)) { // Don't do reclaims if we're using the zero-element array // constant. if (ArrayLit->getNumElements() == 0 && Context.getLangOpts().ObjCRuntime.hasEmptyCollections()) return E; D = ArrayLit->getArrayWithObjectsMethod(); } else if (ObjCDictionaryLiteral *DictLit = dyn_cast(E)) { // Don't do reclaims if we're using the zero-element dictionary // constant. if (DictLit->getNumElements() == 0 && Context.getLangOpts().ObjCRuntime.hasEmptyCollections()) return E; D = DictLit->getDictWithObjectsMethod(); } ReturnsRetained = (D && D->hasAttr()); // Don't do reclaims on performSelector calls; despite their // return type, the invoked method doesn't necessarily actually // return an object. if (!ReturnsRetained && D && D->getMethodFamily() == OMF_performSelector) return E; } // Don't reclaim an object of Class type. if (!ReturnsRetained && E->getType()->isObjCARCImplicitlyUnretainedType()) return E; Cleanup.setExprNeedsCleanups(true); CastKind ck = (ReturnsRetained ? CK_ARCConsumeObject : CK_ARCReclaimReturnedObject); return ImplicitCastExpr::Create(Context, E->getType(), ck, E, nullptr, VK_RValue); } if (!getLangOpts().CPlusPlus) return E; // Search for the base element type (cf. ASTContext::getBaseElementType) with // a fast path for the common case that the type is directly a RecordType. const Type *T = Context.getCanonicalType(E->getType().getTypePtr()); const RecordType *RT = nullptr; while (!RT) { switch (T->getTypeClass()) { case Type::Record: RT = cast(T); break; case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: case Type::DependentSizedArray: T = cast(T)->getElementType().getTypePtr(); break; default: return E; } } // That should be enough to guarantee that this type is complete, if we're // not processing a decltype expression. CXXRecordDecl *RD = cast(RT->getDecl()); if (RD->isInvalidDecl() || RD->isDependentContext()) return E; bool IsDecltype = ExprEvalContexts.back().ExprContext == ExpressionEvaluationContextRecord::EK_Decltype; CXXDestructorDecl *Destructor = IsDecltype ? nullptr : LookupDestructor(RD); if (Destructor) { MarkFunctionReferenced(E->getExprLoc(), Destructor); CheckDestructorAccess(E->getExprLoc(), Destructor, PDiag(diag::err_access_dtor_temp) << E->getType()); if (DiagnoseUseOfDecl(Destructor, E->getExprLoc())) return ExprError(); // If destructor is trivial, we can avoid the extra copy. if (Destructor->isTrivial()) return E; // We need a cleanup, but we don't need to remember the temporary. Cleanup.setExprNeedsCleanups(true); } CXXTemporary *Temp = CXXTemporary::Create(Context, Destructor); CXXBindTemporaryExpr *Bind = CXXBindTemporaryExpr::Create(Context, Temp, E); if (IsDecltype) ExprEvalContexts.back().DelayedDecltypeBinds.push_back(Bind); return Bind; } ExprResult Sema::MaybeCreateExprWithCleanups(ExprResult SubExpr) { if (SubExpr.isInvalid()) return ExprError(); return MaybeCreateExprWithCleanups(SubExpr.get()); } Expr *Sema::MaybeCreateExprWithCleanups(Expr *SubExpr) { assert(SubExpr && "subexpression can't be null!"); CleanupVarDeclMarking(); unsigned FirstCleanup = ExprEvalContexts.back().NumCleanupObjects; assert(ExprCleanupObjects.size() >= FirstCleanup); assert(Cleanup.exprNeedsCleanups() || ExprCleanupObjects.size() == FirstCleanup); if (!Cleanup.exprNeedsCleanups()) return SubExpr; auto Cleanups = llvm::makeArrayRef(ExprCleanupObjects.begin() + FirstCleanup, ExprCleanupObjects.size() - FirstCleanup); auto *E = ExprWithCleanups::Create( Context, SubExpr, Cleanup.cleanupsHaveSideEffects(), Cleanups); DiscardCleanupsInEvaluationContext(); return E; } Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) { assert(SubStmt && "sub-statement can't be null!"); CleanupVarDeclMarking(); if (!Cleanup.exprNeedsCleanups()) return SubStmt; // FIXME: In order to attach the temporaries, wrap the statement into // a StmtExpr; currently this is only used for asm statements. // This is hacky, either create a new CXXStmtWithTemporaries statement or // a new AsmStmtWithTemporaries. CompoundStmt *CompStmt = CompoundStmt::Create( Context, SubStmt, SourceLocation(), SourceLocation()); Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(), SourceLocation()); return MaybeCreateExprWithCleanups(E); } /// Process the expression contained within a decltype. For such expressions, /// certain semantic checks on temporaries are delayed until this point, and /// are omitted for the 'topmost' call in the decltype expression. If the /// topmost call bound a temporary, strip that temporary off the expression. ExprResult Sema::ActOnDecltypeExpression(Expr *E) { assert(ExprEvalContexts.back().ExprContext == ExpressionEvaluationContextRecord::EK_Decltype && "not in a decltype expression"); // C++11 [expr.call]p11: // If a function call is a prvalue of object type, // -- if the function call is either // -- the operand of a decltype-specifier, or // -- the right operand of a comma operator that is the operand of a // decltype-specifier, // a temporary object is not introduced for the prvalue. // Recursively rebuild ParenExprs and comma expressions to strip out the // outermost CXXBindTemporaryExpr, if any. if (ParenExpr *PE = dyn_cast(E)) { ExprResult SubExpr = ActOnDecltypeExpression(PE->getSubExpr()); if (SubExpr.isInvalid()) return ExprError(); if (SubExpr.get() == PE->getSubExpr()) return E; return ActOnParenExpr(PE->getLParen(), PE->getRParen(), SubExpr.get()); } if (BinaryOperator *BO = dyn_cast(E)) { if (BO->getOpcode() == BO_Comma) { ExprResult RHS = ActOnDecltypeExpression(BO->getRHS()); if (RHS.isInvalid()) return ExprError(); if (RHS.get() == BO->getRHS()) return E; return new (Context) BinaryOperator( BO->getLHS(), RHS.get(), BO_Comma, BO->getType(), BO->getValueKind(), BO->getObjectKind(), BO->getOperatorLoc(), BO->getFPFeatures()); } } CXXBindTemporaryExpr *TopBind = dyn_cast(E); CallExpr *TopCall = TopBind ? dyn_cast(TopBind->getSubExpr()) : nullptr; if (TopCall) E = TopCall; else TopBind = nullptr; // Disable the special decltype handling now. ExprEvalContexts.back().ExprContext = ExpressionEvaluationContextRecord::EK_Other; // In MS mode, don't perform any extra checking of call return types within a // decltype expression. if (getLangOpts().MSVCCompat) return E; // Perform the semantic checks we delayed until this point. for (unsigned I = 0, N = ExprEvalContexts.back().DelayedDecltypeCalls.size(); I != N; ++I) { CallExpr *Call = ExprEvalContexts.back().DelayedDecltypeCalls[I]; if (Call == TopCall) continue; if (CheckCallReturnType(Call->getCallReturnType(Context), Call->getLocStart(), Call, Call->getDirectCallee())) return ExprError(); } // Now all relevant types are complete, check the destructors are accessible // and non-deleted, and annotate them on the temporaries. for (unsigned I = 0, N = ExprEvalContexts.back().DelayedDecltypeBinds.size(); I != N; ++I) { CXXBindTemporaryExpr *Bind = ExprEvalContexts.back().DelayedDecltypeBinds[I]; if (Bind == TopBind) continue; CXXTemporary *Temp = Bind->getTemporary(); CXXRecordDecl *RD = Bind->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); CXXDestructorDecl *Destructor = LookupDestructor(RD); Temp->setDestructor(Destructor); MarkFunctionReferenced(Bind->getExprLoc(), Destructor); CheckDestructorAccess(Bind->getExprLoc(), Destructor, PDiag(diag::err_access_dtor_temp) << Bind->getType()); if (DiagnoseUseOfDecl(Destructor, Bind->getExprLoc())) return ExprError(); // We need a cleanup, but we don't need to remember the temporary. Cleanup.setExprNeedsCleanups(true); } // Possibly strip off the top CXXBindTemporaryExpr. return E; } /// Note a set of 'operator->' functions that were used for a member access. static void noteOperatorArrows(Sema &S, ArrayRef OperatorArrows) { unsigned SkipStart = OperatorArrows.size(), SkipCount = 0; // FIXME: Make this configurable? unsigned Limit = 9; if (OperatorArrows.size() > Limit) { // Produce Limit-1 normal notes and one 'skipping' note. SkipStart = (Limit - 1) / 2 + (Limit - 1) % 2; SkipCount = OperatorArrows.size() - (Limit - 1); } for (unsigned I = 0; I < OperatorArrows.size(); /**/) { if (I == SkipStart) { S.Diag(OperatorArrows[I]->getLocation(), diag::note_operator_arrows_suppressed) << SkipCount; I += SkipCount; } else { S.Diag(OperatorArrows[I]->getLocation(), diag::note_operator_arrow_here) << OperatorArrows[I]->getCallResultType(); ++I; } } } ExprResult Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base, SourceLocation OpLoc, tok::TokenKind OpKind, ParsedType &ObjectType, bool &MayBePseudoDestructor) { // Since this might be a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); QualType BaseType = Base->getType(); MayBePseudoDestructor = false; if (BaseType->isDependentType()) { // If we have a pointer to a dependent type and are using the -> operator, // the object type is the type that the pointer points to. We might still // have enough information about that type to do something useful. if (OpKind == tok::arrow) if (const PointerType *Ptr = BaseType->getAs()) BaseType = Ptr->getPointeeType(); ObjectType = ParsedType::make(BaseType); MayBePseudoDestructor = true; return Base; } // C++ [over.match.oper]p8: // [...] When operator->returns, the operator-> is applied to the value // returned, with the original second operand. if (OpKind == tok::arrow) { QualType StartingType = BaseType; bool NoArrowOperatorFound = false; bool FirstIteration = true; FunctionDecl *CurFD = dyn_cast(CurContext); // The set of types we've considered so far. llvm::SmallPtrSet CTypes; SmallVector OperatorArrows; CTypes.insert(Context.getCanonicalType(BaseType)); while (BaseType->isRecordType()) { if (OperatorArrows.size() >= getLangOpts().ArrowDepth) { Diag(OpLoc, diag::err_operator_arrow_depth_exceeded) << StartingType << getLangOpts().ArrowDepth << Base->getSourceRange(); noteOperatorArrows(*this, OperatorArrows); Diag(OpLoc, diag::note_operator_arrow_depth) << getLangOpts().ArrowDepth; return ExprError(); } Result = BuildOverloadedArrowExpr( S, Base, OpLoc, // When in a template specialization and on the first loop iteration, // potentially give the default diagnostic (with the fixit in a // separate note) instead of having the error reported back to here // and giving a diagnostic with a fixit attached to the error itself. (FirstIteration && CurFD && CurFD->isFunctionTemplateSpecialization()) ? nullptr : &NoArrowOperatorFound); if (Result.isInvalid()) { if (NoArrowOperatorFound) { if (FirstIteration) { Diag(OpLoc, diag::err_typecheck_member_reference_suggestion) << BaseType << 1 << Base->getSourceRange() << FixItHint::CreateReplacement(OpLoc, "."); OpKind = tok::period; break; } Diag(OpLoc, diag::err_typecheck_member_reference_arrow) << BaseType << Base->getSourceRange(); CallExpr *CE = dyn_cast(Base); if (Decl *CD = (CE ? CE->getCalleeDecl() : nullptr)) { Diag(CD->getLocStart(), diag::note_member_reference_arrow_from_operator_arrow); } } return ExprError(); } Base = Result.get(); if (CXXOperatorCallExpr *OpCall = dyn_cast(Base)) OperatorArrows.push_back(OpCall->getDirectCallee()); BaseType = Base->getType(); CanQualType CBaseType = Context.getCanonicalType(BaseType); if (!CTypes.insert(CBaseType).second) { Diag(OpLoc, diag::err_operator_arrow_circular) << StartingType; noteOperatorArrows(*this, OperatorArrows); return ExprError(); } FirstIteration = false; } if (OpKind == tok::arrow && (BaseType->isPointerType() || BaseType->isObjCObjectPointerType())) BaseType = BaseType->getPointeeType(); } // Objective-C properties allow "." access on Objective-C pointer types, // so adjust the base type to the object type itself. if (BaseType->isObjCObjectPointerType()) BaseType = BaseType->getPointeeType(); // C++ [basic.lookup.classref]p2: // [...] If the type of the object expression is of pointer to scalar // type, the unqualified-id is looked up in the context of the complete // postfix-expression. // // This also indicates that we could be parsing a pseudo-destructor-name. // Note that Objective-C class and object types can be pseudo-destructor // expressions or normal member (ivar or property) access expressions, and // it's legal for the type to be incomplete if this is a pseudo-destructor // call. We'll do more incomplete-type checks later in the lookup process, // so just skip this check for ObjC types. if (BaseType->isObjCObjectOrInterfaceType()) { ObjectType = ParsedType::make(BaseType); MayBePseudoDestructor = true; return Base; } else if (!BaseType->isRecordType()) { ObjectType = nullptr; MayBePseudoDestructor = true; return Base; } // The object type must be complete (or dependent), or // C++11 [expr.prim.general]p3: // Unlike the object expression in other contexts, *this is not required to // be of complete type for purposes of class member access (5.2.5) outside // the member function body. if (!BaseType->isDependentType() && !isThisOutsideMemberFunctionBody(BaseType) && RequireCompleteType(OpLoc, BaseType, diag::err_incomplete_member_access)) return ExprError(); // C++ [basic.lookup.classref]p2: // If the id-expression in a class member access (5.2.5) is an // unqualified-id, and the type of the object expression is of a class // type C (or of pointer to a class type C), the unqualified-id is looked // up in the scope of class C. [...] ObjectType = ParsedType::make(BaseType); return Base; } static bool CheckArrow(Sema& S, QualType& ObjectType, Expr *&Base, tok::TokenKind& OpKind, SourceLocation OpLoc) { if (Base->hasPlaceholderType()) { ExprResult result = S.CheckPlaceholderExpr(Base); if (result.isInvalid()) return true; Base = result.get(); } ObjectType = Base->getType(); // C++ [expr.pseudo]p2: // The left-hand side of the dot operator shall be of scalar type. The // left-hand side of the arrow operator shall be of pointer to scalar type. // This scalar type is the object type. // Note that this is rather different from the normal handling for the // arrow operator. if (OpKind == tok::arrow) { if (const PointerType *Ptr = ObjectType->getAs()) { ObjectType = Ptr->getPointeeType(); } else if (!Base->isTypeDependent()) { // The user wrote "p->" when they probably meant "p."; fix it. S.Diag(OpLoc, diag::err_typecheck_member_reference_suggestion) << ObjectType << true << FixItHint::CreateReplacement(OpLoc, "."); if (S.isSFINAEContext()) return true; OpKind = tok::period; } } return false; } /// Check if it's ok to try and recover dot pseudo destructor calls on /// pointer objects. static bool canRecoverDotPseudoDestructorCallsOnPointerObjects(Sema &SemaRef, QualType DestructedType) { // If this is a record type, check if its destructor is callable. if (auto *RD = DestructedType->getAsCXXRecordDecl()) { if (CXXDestructorDecl *D = SemaRef.LookupDestructor(RD)) return SemaRef.CanUseDecl(D, /*TreatUnavailableAsInvalid=*/false); return false; } // Otherwise, check if it's a type for which it's valid to use a pseudo-dtor. return DestructedType->isDependentType() || DestructedType->isScalarType() || DestructedType->isVectorType(); } ExprResult Sema::BuildPseudoDestructorExpr(Expr *Base, SourceLocation OpLoc, tok::TokenKind OpKind, const CXXScopeSpec &SS, TypeSourceInfo *ScopeTypeInfo, SourceLocation CCLoc, SourceLocation TildeLoc, PseudoDestructorTypeStorage Destructed) { TypeSourceInfo *DestructedTypeInfo = Destructed.getTypeSourceInfo(); QualType ObjectType; if (CheckArrow(*this, ObjectType, Base, OpKind, OpLoc)) return ExprError(); if (!ObjectType->isDependentType() && !ObjectType->isScalarType() && !ObjectType->isVectorType()) { if (getLangOpts().MSVCCompat && ObjectType->isVoidType()) Diag(OpLoc, diag::ext_pseudo_dtor_on_void) << Base->getSourceRange(); else { Diag(OpLoc, diag::err_pseudo_dtor_base_not_scalar) << ObjectType << Base->getSourceRange(); return ExprError(); } } // C++ [expr.pseudo]p2: // [...] The cv-unqualified versions of the object type and of the type // designated by the pseudo-destructor-name shall be the same type. if (DestructedTypeInfo) { QualType DestructedType = DestructedTypeInfo->getType(); SourceLocation DestructedTypeStart = DestructedTypeInfo->getTypeLoc().getLocalSourceRange().getBegin(); if (!DestructedType->isDependentType() && !ObjectType->isDependentType()) { if (!Context.hasSameUnqualifiedType(DestructedType, ObjectType)) { // Detect dot pseudo destructor calls on pointer objects, e.g.: // Foo *foo; // foo.~Foo(); if (OpKind == tok::period && ObjectType->isPointerType() && Context.hasSameUnqualifiedType(DestructedType, ObjectType->getPointeeType())) { auto Diagnostic = Diag(OpLoc, diag::err_typecheck_member_reference_suggestion) << ObjectType << /*IsArrow=*/0 << Base->getSourceRange(); // Issue a fixit only when the destructor is valid. if (canRecoverDotPseudoDestructorCallsOnPointerObjects( *this, DestructedType)) Diagnostic << FixItHint::CreateReplacement(OpLoc, "->"); // Recover by setting the object type to the destructed type and the // operator to '->'. ObjectType = DestructedType; OpKind = tok::arrow; } else { Diag(DestructedTypeStart, diag::err_pseudo_dtor_type_mismatch) << ObjectType << DestructedType << Base->getSourceRange() << DestructedTypeInfo->getTypeLoc().getLocalSourceRange(); // Recover by setting the destructed type to the object type. DestructedType = ObjectType; DestructedTypeInfo = Context.getTrivialTypeSourceInfo(ObjectType, DestructedTypeStart); Destructed = PseudoDestructorTypeStorage(DestructedTypeInfo); } } else if (DestructedType.getObjCLifetime() != ObjectType.getObjCLifetime()) { if (DestructedType.getObjCLifetime() == Qualifiers::OCL_None) { // Okay: just pretend that the user provided the correctly-qualified // type. } else { Diag(DestructedTypeStart, diag::err_arc_pseudo_dtor_inconstant_quals) << ObjectType << DestructedType << Base->getSourceRange() << DestructedTypeInfo->getTypeLoc().getLocalSourceRange(); } // Recover by setting the destructed type to the object type. DestructedType = ObjectType; DestructedTypeInfo = Context.getTrivialTypeSourceInfo(ObjectType, DestructedTypeStart); Destructed = PseudoDestructorTypeStorage(DestructedTypeInfo); } } } // C++ [expr.pseudo]p2: // [...] Furthermore, the two type-names in a pseudo-destructor-name of the // form // // ::[opt] nested-name-specifier[opt] type-name :: ~ type-name // // shall designate the same scalar type. if (ScopeTypeInfo) { QualType ScopeType = ScopeTypeInfo->getType(); if (!ScopeType->isDependentType() && !ObjectType->isDependentType() && !Context.hasSameUnqualifiedType(ScopeType, ObjectType)) { Diag(ScopeTypeInfo->getTypeLoc().getLocalSourceRange().getBegin(), diag::err_pseudo_dtor_type_mismatch) << ObjectType << ScopeType << Base->getSourceRange() << ScopeTypeInfo->getTypeLoc().getLocalSourceRange(); ScopeType = QualType(); ScopeTypeInfo = nullptr; } } Expr *Result = new (Context) CXXPseudoDestructorExpr(Context, Base, OpKind == tok::arrow, OpLoc, SS.getWithLocInContext(Context), ScopeTypeInfo, CCLoc, TildeLoc, Destructed); return Result; } ExprResult Sema::ActOnPseudoDestructorExpr(Scope *S, Expr *Base, SourceLocation OpLoc, tok::TokenKind OpKind, CXXScopeSpec &SS, UnqualifiedId &FirstTypeName, SourceLocation CCLoc, SourceLocation TildeLoc, UnqualifiedId &SecondTypeName) { assert((FirstTypeName.getKind() == UnqualifiedIdKind::IK_TemplateId || FirstTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) && "Invalid first type name in pseudo-destructor"); assert((SecondTypeName.getKind() == UnqualifiedIdKind::IK_TemplateId || SecondTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) && "Invalid second type name in pseudo-destructor"); QualType ObjectType; if (CheckArrow(*this, ObjectType, Base, OpKind, OpLoc)) return ExprError(); // Compute the object type that we should use for name lookup purposes. Only // record types and dependent types matter. ParsedType ObjectTypePtrForLookup; if (!SS.isSet()) { if (ObjectType->isRecordType()) ObjectTypePtrForLookup = ParsedType::make(ObjectType); else if (ObjectType->isDependentType()) ObjectTypePtrForLookup = ParsedType::make(Context.DependentTy); } // Convert the name of the type being destructed (following the ~) into a // type (with source-location information). QualType DestructedType; TypeSourceInfo *DestructedTypeInfo = nullptr; PseudoDestructorTypeStorage Destructed; if (SecondTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) { ParsedType T = getTypeName(*SecondTypeName.Identifier, SecondTypeName.StartLocation, S, &SS, true, false, ObjectTypePtrForLookup, /*IsCtorOrDtorName*/true); if (!T && ((SS.isSet() && !computeDeclContext(SS, false)) || (!SS.isSet() && ObjectType->isDependentType()))) { // The name of the type being destroyed is a dependent name, and we // couldn't find anything useful in scope. Just store the identifier and // it's location, and we'll perform (qualified) name lookup again at // template instantiation time. Destructed = PseudoDestructorTypeStorage(SecondTypeName.Identifier, SecondTypeName.StartLocation); } else if (!T) { Diag(SecondTypeName.StartLocation, diag::err_pseudo_dtor_destructor_non_type) << SecondTypeName.Identifier << ObjectType; if (isSFINAEContext()) return ExprError(); // Recover by assuming we had the right type all along. DestructedType = ObjectType; } else DestructedType = GetTypeFromParser(T, &DestructedTypeInfo); } else { // Resolve the template-id to a type. TemplateIdAnnotation *TemplateId = SecondTypeName.TemplateId; ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(), TemplateId->NumArgs); TypeResult T = ActOnTemplateIdType(TemplateId->SS, TemplateId->TemplateKWLoc, TemplateId->Template, TemplateId->Name, TemplateId->TemplateNameLoc, TemplateId->LAngleLoc, TemplateArgsPtr, TemplateId->RAngleLoc, /*IsCtorOrDtorName*/true); if (T.isInvalid() || !T.get()) { // Recover by assuming we had the right type all along. DestructedType = ObjectType; } else DestructedType = GetTypeFromParser(T.get(), &DestructedTypeInfo); } // If we've performed some kind of recovery, (re-)build the type source // information. if (!DestructedType.isNull()) { if (!DestructedTypeInfo) DestructedTypeInfo = Context.getTrivialTypeSourceInfo(DestructedType, SecondTypeName.StartLocation); Destructed = PseudoDestructorTypeStorage(DestructedTypeInfo); } // Convert the name of the scope type (the type prior to '::') into a type. TypeSourceInfo *ScopeTypeInfo = nullptr; QualType ScopeType; if (FirstTypeName.getKind() == UnqualifiedIdKind::IK_TemplateId || FirstTypeName.Identifier) { if (FirstTypeName.getKind() == UnqualifiedIdKind::IK_Identifier) { ParsedType T = getTypeName(*FirstTypeName.Identifier, FirstTypeName.StartLocation, S, &SS, true, false, ObjectTypePtrForLookup, /*IsCtorOrDtorName*/true); if (!T) { Diag(FirstTypeName.StartLocation, diag::err_pseudo_dtor_destructor_non_type) << FirstTypeName.Identifier << ObjectType; if (isSFINAEContext()) return ExprError(); // Just drop this type. It's unnecessary anyway. ScopeType = QualType(); } else ScopeType = GetTypeFromParser(T, &ScopeTypeInfo); } else { // Resolve the template-id to a type. TemplateIdAnnotation *TemplateId = FirstTypeName.TemplateId; ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(), TemplateId->NumArgs); TypeResult T = ActOnTemplateIdType(TemplateId->SS, TemplateId->TemplateKWLoc, TemplateId->Template, TemplateId->Name, TemplateId->TemplateNameLoc, TemplateId->LAngleLoc, TemplateArgsPtr, TemplateId->RAngleLoc, /*IsCtorOrDtorName*/true); if (T.isInvalid() || !T.get()) { // Recover by dropping this type. ScopeType = QualType(); } else ScopeType = GetTypeFromParser(T.get(), &ScopeTypeInfo); } } if (!ScopeType.isNull() && !ScopeTypeInfo) ScopeTypeInfo = Context.getTrivialTypeSourceInfo(ScopeType, FirstTypeName.StartLocation); return BuildPseudoDestructorExpr(Base, OpLoc, OpKind, SS, ScopeTypeInfo, CCLoc, TildeLoc, Destructed); } ExprResult Sema::ActOnPseudoDestructorExpr(Scope *S, Expr *Base, SourceLocation OpLoc, tok::TokenKind OpKind, SourceLocation TildeLoc, const DeclSpec& DS) { QualType ObjectType; if (CheckArrow(*this, ObjectType, Base, OpKind, OpLoc)) return ExprError(); QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc(), false); TypeLocBuilder TLB; DecltypeTypeLoc DecltypeTL = TLB.push(T); DecltypeTL.setNameLoc(DS.getTypeSpecTypeLoc()); TypeSourceInfo *DestructedTypeInfo = TLB.getTypeSourceInfo(Context, T); PseudoDestructorTypeStorage Destructed(DestructedTypeInfo); return BuildPseudoDestructorExpr(Base, OpLoc, OpKind, CXXScopeSpec(), nullptr, SourceLocation(), TildeLoc, Destructed); } ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl, CXXConversionDecl *Method, bool HadMultipleCandidates) { // Convert the expression to match the conversion function's implicit object // parameter. ExprResult Exp = PerformObjectArgumentInitialization(E, /*Qualifier=*/nullptr, FoundDecl, Method); if (Exp.isInvalid()) return true; if (Method->getParent()->isLambda() && Method->getConversionType()->isBlockPointerType()) { // This is a lambda coversion to block pointer; check if the argument // was a LambdaExpr. Expr *SubE = E; CastExpr *CE = dyn_cast(SubE); if (CE && CE->getCastKind() == CK_NoOp) SubE = CE->getSubExpr(); SubE = SubE->IgnoreParens(); if (CXXBindTemporaryExpr *BE = dyn_cast(SubE)) SubE = BE->getSubExpr(); if (isa(SubE)) { // For the conversion to block pointer on a lambda expression, we // construct a special BlockLiteral instead; this doesn't really make // a difference in ARC, but outside of ARC the resulting block literal // follows the normal lifetime rules for block literals instead of being // autoreleased. DiagnosticErrorTrap Trap(Diags); PushExpressionEvaluationContext( ExpressionEvaluationContext::PotentiallyEvaluated); ExprResult BlockExp = BuildBlockForLambdaConversion( Exp.get()->getExprLoc(), Exp.get()->getExprLoc(), Method, Exp.get()); PopExpressionEvaluationContext(); if (BlockExp.isInvalid()) Diag(Exp.get()->getExprLoc(), diag::note_lambda_to_block_conv); return BlockExp; } } MemberExpr *ME = new (Context) MemberExpr( Exp.get(), /*IsArrow=*/false, SourceLocation(), Method, SourceLocation(), Context.BoundMemberTy, VK_RValue, OK_Ordinary); if (HadMultipleCandidates) ME->setHadMultipleCandidates(true); MarkMemberReferenced(ME); QualType ResultType = Method->getReturnType(); ExprValueKind VK = Expr::getValueKindForType(ResultType); ResultType = ResultType.getNonLValueExprType(Context); CXXMemberCallExpr *CE = new (Context) CXXMemberCallExpr(Context, ME, None, ResultType, VK, Exp.get()->getLocEnd()); if (CheckFunctionCall(Method, CE, Method->getType()->castAs())) return ExprError(); return CE; } ExprResult Sema::BuildCXXNoexceptExpr(SourceLocation KeyLoc, Expr *Operand, SourceLocation RParen) { // If the operand is an unresolved lookup expression, the expression is ill- // formed per [over.over]p1, because overloaded function names cannot be used // without arguments except in explicit contexts. ExprResult R = CheckPlaceholderExpr(Operand); if (R.isInvalid()) return R; // The operand may have been modified when checking the placeholder type. Operand = R.get(); if (!inTemplateInstantiation() && Operand->HasSideEffects(Context, false)) { // The expression operand for noexcept is in an unevaluated expression // context, so side effects could result in unintended consequences. Diag(Operand->getExprLoc(), diag::warn_side_effects_unevaluated_context); } CanThrowResult CanThrow = canThrow(Operand); return new (Context) CXXNoexceptExpr(Context.BoolTy, Operand, CanThrow, KeyLoc, RParen); } ExprResult Sema::ActOnNoexceptExpr(SourceLocation KeyLoc, SourceLocation, Expr *Operand, SourceLocation RParen) { return BuildCXXNoexceptExpr(KeyLoc, Operand, RParen); } static bool IsSpecialDiscardedValue(Expr *E) { // In C++11, discarded-value expressions of a certain form are special, // according to [expr]p10: // The lvalue-to-rvalue conversion (4.1) is applied only if the // expression is an lvalue of volatile-qualified type and it has // one of the following forms: E = E->IgnoreParens(); // - id-expression (5.1.1), if (isa(E)) return true; // - subscripting (5.2.1), if (isa(E)) return true; // - class member access (5.2.5), if (isa(E)) return true; // - indirection (5.3.1), if (UnaryOperator *UO = dyn_cast(E)) if (UO->getOpcode() == UO_Deref) return true; if (BinaryOperator *BO = dyn_cast(E)) { // - pointer-to-member operation (5.5), if (BO->isPtrMemOp()) return true; // - comma expression (5.18) where the right operand is one of the above. if (BO->getOpcode() == BO_Comma) return IsSpecialDiscardedValue(BO->getRHS()); } // - conditional expression (5.16) where both the second and the third // operands are one of the above, or if (ConditionalOperator *CO = dyn_cast(E)) return IsSpecialDiscardedValue(CO->getTrueExpr()) && IsSpecialDiscardedValue(CO->getFalseExpr()); // The related edge case of "*x ?: *x". if (BinaryConditionalOperator *BCO = dyn_cast(E)) { if (OpaqueValueExpr *OVE = dyn_cast(BCO->getTrueExpr())) return IsSpecialDiscardedValue(OVE->getSourceExpr()) && IsSpecialDiscardedValue(BCO->getFalseExpr()); } // Objective-C++ extensions to the rule. if (isa(E) || isa(E)) return true; return false; } /// Perform the conversions required for an expression used in a /// context that ignores the result. ExprResult Sema::IgnoredValueConversions(Expr *E) { if (E->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return E; E = result.get(); } // C99 6.3.2.1: // [Except in specific positions,] an lvalue that does not have // array type is converted to the value stored in the // designated object (and is no longer an lvalue). if (E->isRValue()) { // In C, function designators (i.e. expressions of function type) // are r-values, but we still want to do function-to-pointer decay // on them. This is both technically correct and convenient for // some clients. if (!getLangOpts().CPlusPlus && E->getType()->isFunctionType()) return DefaultFunctionArrayConversion(E); return E; } if (getLangOpts().CPlusPlus) { // The C++11 standard defines the notion of a discarded-value expression; // normally, we don't need to do anything to handle it, but if it is a // volatile lvalue with a special form, we perform an lvalue-to-rvalue // conversion. if (getLangOpts().CPlusPlus11 && E->isGLValue() && E->getType().isVolatileQualified() && IsSpecialDiscardedValue(E)) { ExprResult Res = DefaultLvalueConversion(E); if (Res.isInvalid()) return E; E = Res.get(); } // C++1z: // If the expression is a prvalue after this optional conversion, the // temporary materialization conversion is applied. // // We skip this step: IR generation is able to synthesize the storage for // itself in the aggregate case, and adding the extra node to the AST is // just clutter. // FIXME: We don't emit lifetime markers for the temporaries due to this. // FIXME: Do any other AST consumers care about this? return E; } // GCC seems to also exclude expressions of incomplete enum type. if (const EnumType *T = E->getType()->getAs()) { if (!T->getDecl()->isComplete()) { // FIXME: stupid workaround for a codegen bug! E = ImpCastExprToType(E, Context.VoidTy, CK_ToVoid).get(); return E; } } ExprResult Res = DefaultFunctionArrayLvalueConversion(E); if (Res.isInvalid()) return E; E = Res.get(); if (!E->getType()->isVoidType()) RequireCompleteType(E->getExprLoc(), E->getType(), diag::err_incomplete_type); return E; } // If we can unambiguously determine whether Var can never be used // in a constant expression, return true. // - if the variable and its initializer are non-dependent, then // we can unambiguously check if the variable is a constant expression. // - if the initializer is not value dependent - we can determine whether // it can be used to initialize a constant expression. If Init can not // be used to initialize a constant expression we conclude that Var can // never be a constant expression. // - FXIME: if the initializer is dependent, we can still do some analysis and // identify certain cases unambiguously as non-const by using a Visitor: // - such as those that involve odr-use of a ParmVarDecl, involve a new // delete, lambda-expr, dynamic-cast, reinterpret-cast etc... static inline bool VariableCanNeverBeAConstantExpression(VarDecl *Var, ASTContext &Context) { if (isa(Var)) return true; const VarDecl *DefVD = nullptr; // If there is no initializer - this can not be a constant expression. if (!Var->getAnyInitializer(DefVD)) return true; assert(DefVD); if (DefVD->isWeak()) return false; EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt(); Expr *Init = cast(Eval->Value); if (Var->getType()->isDependentType() || Init->isValueDependent()) { // FIXME: Teach the constant evaluator to deal with the non-dependent parts // of value-dependent expressions, and use it here to determine whether the // initializer is a potential constant expression. return false; } return !IsVariableAConstantExpression(Var, Context); } /// Check if the current lambda has any potential captures /// that must be captured by any of its enclosing lambdas that are ready to /// capture. If there is a lambda that can capture a nested /// potential-capture, go ahead and do so. Also, check to see if any /// variables are uncaptureable or do not involve an odr-use so do not /// need to be captured. static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures( Expr *const FE, LambdaScopeInfo *const CurrentLSI, Sema &S) { assert(!S.isUnevaluatedContext()); assert(S.CurContext->isDependentContext()); #ifndef NDEBUG DeclContext *DC = S.CurContext; while (DC && isa(DC)) DC = DC->getParent(); assert( CurrentLSI->CallOperator == DC && "The current call operator must be synchronized with Sema's CurContext"); #endif // NDEBUG const bool IsFullExprInstantiationDependent = FE->isInstantiationDependent(); // All the potentially captureable variables in the current nested // lambda (within a generic outer lambda), must be captured by an // outer lambda that is enclosed within a non-dependent context. const unsigned NumPotentialCaptures = CurrentLSI->getNumPotentialVariableCaptures(); for (unsigned I = 0; I != NumPotentialCaptures; ++I) { Expr *VarExpr = nullptr; VarDecl *Var = nullptr; CurrentLSI->getPotentialVariableCapture(I, Var, VarExpr); // If the variable is clearly identified as non-odr-used and the full // expression is not instantiation dependent, only then do we not // need to check enclosing lambda's for speculative captures. // For e.g.: // Even though 'x' is not odr-used, it should be captured. // int test() { // const int x = 10; // auto L = [=](auto a) { // (void) +x + a; // }; // } if (CurrentLSI->isVariableExprMarkedAsNonODRUsed(VarExpr) && !IsFullExprInstantiationDependent) continue; // If we have a capture-capable lambda for the variable, go ahead and // capture the variable in that lambda (and all its enclosing lambdas). if (const Optional Index = getStackIndexOfNearestEnclosingCaptureCapableLambda( S.FunctionScopes, Var, S)) { const unsigned FunctionScopeIndexOfCapturableLambda = Index.getValue(); MarkVarDeclODRUsed(Var, VarExpr->getExprLoc(), S, &FunctionScopeIndexOfCapturableLambda); } const bool IsVarNeverAConstantExpression = VariableCanNeverBeAConstantExpression(Var, S.Context); if (!IsFullExprInstantiationDependent || IsVarNeverAConstantExpression) { // This full expression is not instantiation dependent or the variable // can not be used in a constant expression - which means // this variable must be odr-used here, so diagnose a // capture violation early, if the variable is un-captureable. // This is purely for diagnosing errors early. Otherwise, this // error would get diagnosed when the lambda becomes capture ready. QualType CaptureType, DeclRefType; SourceLocation ExprLoc = VarExpr->getExprLoc(); if (S.tryCaptureVariable(Var, ExprLoc, S.TryCapture_Implicit, /*EllipsisLoc*/ SourceLocation(), /*BuildAndDiagnose*/false, CaptureType, DeclRefType, nullptr)) { // We will never be able to capture this variable, and we need // to be able to in any and all instantiations, so diagnose it. S.tryCaptureVariable(Var, ExprLoc, S.TryCapture_Implicit, /*EllipsisLoc*/ SourceLocation(), /*BuildAndDiagnose*/true, CaptureType, DeclRefType, nullptr); } } } // Check if 'this' needs to be captured. if (CurrentLSI->hasPotentialThisCapture()) { // If we have a capture-capable lambda for 'this', go ahead and capture // 'this' in that lambda (and all its enclosing lambdas). if (const Optional Index = getStackIndexOfNearestEnclosingCaptureCapableLambda( S.FunctionScopes, /*0 is 'this'*/ nullptr, S)) { const unsigned FunctionScopeIndexOfCapturableLambda = Index.getValue(); S.CheckCXXThisCapture(CurrentLSI->PotentialThisCaptureLocation, /*Explicit*/ false, /*BuildAndDiagnose*/ true, &FunctionScopeIndexOfCapturableLambda); } } // Reset all the potential captures at the end of each full-expression. CurrentLSI->clearPotentialCaptures(); } static ExprResult attemptRecovery(Sema &SemaRef, const TypoCorrectionConsumer &Consumer, const TypoCorrection &TC) { LookupResult R(SemaRef, Consumer.getLookupResult().getLookupNameInfo(), Consumer.getLookupResult().getLookupKind()); const CXXScopeSpec *SS = Consumer.getSS(); CXXScopeSpec NewSS; // Use an approprate CXXScopeSpec for building the expr. if (auto *NNS = TC.getCorrectionSpecifier()) NewSS.MakeTrivial(SemaRef.Context, NNS, TC.getCorrectionRange()); else if (SS && !TC.WillReplaceSpecifier()) NewSS = *SS; if (auto *ND = TC.getFoundDecl()) { R.setLookupName(ND->getDeclName()); R.addDecl(ND); if (ND->isCXXClassMember()) { // Figure out the correct naming class to add to the LookupResult. CXXRecordDecl *Record = nullptr; if (auto *NNS = TC.getCorrectionSpecifier()) Record = NNS->getAsType()->getAsCXXRecordDecl(); if (!Record) Record = dyn_cast(ND->getDeclContext()->getRedeclContext()); if (Record) R.setNamingClass(Record); // Detect and handle the case where the decl might be an implicit // member. bool MightBeImplicitMember; if (!Consumer.isAddressOfOperand()) MightBeImplicitMember = true; else if (!NewSS.isEmpty()) MightBeImplicitMember = false; else if (R.isOverloadedResult()) MightBeImplicitMember = false; else if (R.isUnresolvableResult()) MightBeImplicitMember = true; else MightBeImplicitMember = isa(ND) || isa(ND) || isa(ND); if (MightBeImplicitMember) return SemaRef.BuildPossibleImplicitMemberExpr( NewSS, /*TemplateKWLoc*/ SourceLocation(), R, /*TemplateArgs*/ nullptr, /*S*/ nullptr); } else if (auto *Ivar = dyn_cast(ND)) { return SemaRef.LookupInObjCMethod(R, Consumer.getScope(), Ivar->getIdentifier()); } } return SemaRef.BuildDeclarationNameExpr(NewSS, R, /*NeedsADL*/ false, /*AcceptInvalidDecl*/ true); } namespace { class FindTypoExprs : public RecursiveASTVisitor { llvm::SmallSetVector &TypoExprs; public: explicit FindTypoExprs(llvm::SmallSetVector &TypoExprs) : TypoExprs(TypoExprs) {} bool VisitTypoExpr(TypoExpr *TE) { TypoExprs.insert(TE); return true; } }; class TransformTypos : public TreeTransform { typedef TreeTransform BaseTransform; VarDecl *InitDecl; // A decl to avoid as a correction because it is in the // process of being initialized. llvm::function_ref ExprFilter; llvm::SmallSetVector TypoExprs, AmbiguousTypoExprs; llvm::SmallDenseMap TransformCache; llvm::SmallDenseMap OverloadResolution; /// Emit diagnostics for all of the TypoExprs encountered. /// If the TypoExprs were successfully corrected, then the diagnostics should /// suggest the corrections. Otherwise the diagnostics will not suggest /// anything (having been passed an empty TypoCorrection). void EmitAllDiagnostics() { for (TypoExpr *TE : TypoExprs) { auto &State = SemaRef.getTypoExprState(TE); if (State.DiagHandler) { TypoCorrection TC = State.Consumer->getCurrentCorrection(); ExprResult Replacement = TransformCache[TE]; // Extract the NamedDecl from the transformed TypoExpr and add it to the // TypoCorrection, replacing the existing decls. This ensures the right // NamedDecl is used in diagnostics e.g. in the case where overload // resolution was used to select one from several possible decls that // had been stored in the TypoCorrection. if (auto *ND = getDeclFromExpr( Replacement.isInvalid() ? nullptr : Replacement.get())) TC.setCorrectionDecl(ND); State.DiagHandler(TC); } SemaRef.clearDelayedTypo(TE); } } /// If corrections for the first TypoExpr have been exhausted for a /// given combination of the other TypoExprs, retry those corrections against /// the next combination of substitutions for the other TypoExprs by advancing /// to the next potential correction of the second TypoExpr. For the second /// and subsequent TypoExprs, if its stream of corrections has been exhausted, /// the stream is reset and the next TypoExpr's stream is advanced by one (a /// TypoExpr's correction stream is advanced by removing the TypoExpr from the /// TransformCache). Returns true if there is still any untried combinations /// of corrections. bool CheckAndAdvanceTypoExprCorrectionStreams() { for (auto TE : TypoExprs) { auto &State = SemaRef.getTypoExprState(TE); TransformCache.erase(TE); if (!State.Consumer->finished()) return true; State.Consumer->resetCorrectionStream(); } return false; } NamedDecl *getDeclFromExpr(Expr *E) { if (auto *OE = dyn_cast_or_null(E)) E = OverloadResolution[OE]; if (!E) return nullptr; if (auto *DRE = dyn_cast(E)) return DRE->getFoundDecl(); if (auto *ME = dyn_cast(E)) return ME->getFoundDecl(); // FIXME: Add any other expr types that could be be seen by the delayed typo // correction TreeTransform for which the corresponding TypoCorrection could // contain multiple decls. return nullptr; } ExprResult TryTransform(Expr *E) { Sema::SFINAETrap Trap(SemaRef); ExprResult Res = TransformExpr(E); if (Trap.hasErrorOccurred() || Res.isInvalid()) return ExprError(); return ExprFilter(Res.get()); } public: TransformTypos(Sema &SemaRef, VarDecl *InitDecl, llvm::function_ref Filter) : BaseTransform(SemaRef), InitDecl(InitDecl), ExprFilter(Filter) {} ExprResult RebuildCallExpr(Expr *Callee, SourceLocation LParenLoc, MultiExprArg Args, SourceLocation RParenLoc, Expr *ExecConfig = nullptr) { auto Result = BaseTransform::RebuildCallExpr(Callee, LParenLoc, Args, RParenLoc, ExecConfig); if (auto *OE = dyn_cast(Callee)) { if (Result.isUsable()) { Expr *ResultCall = Result.get(); if (auto *BE = dyn_cast(ResultCall)) ResultCall = BE->getSubExpr(); if (auto *CE = dyn_cast(ResultCall)) OverloadResolution[OE] = CE->getCallee(); } } return Result; } ExprResult TransformLambdaExpr(LambdaExpr *E) { return Owned(E); } ExprResult TransformBlockExpr(BlockExpr *E) { return Owned(E); } ExprResult Transform(Expr *E) { ExprResult Res; while (true) { Res = TryTransform(E); // Exit if either the transform was valid or if there were no TypoExprs // to transform that still have any untried correction candidates.. if (!Res.isInvalid() || !CheckAndAdvanceTypoExprCorrectionStreams()) break; } // Ensure none of the TypoExprs have multiple typo correction candidates // with the same edit length that pass all the checks and filters. // TODO: Properly handle various permutations of possible corrections when // there is more than one potentially ambiguous typo correction. // Also, disable typo correction while attempting the transform when // handling potentially ambiguous typo corrections as any new TypoExprs will // have been introduced by the application of one of the correction // candidates and add little to no value if corrected. SemaRef.DisableTypoCorrection = true; while (!AmbiguousTypoExprs.empty()) { auto TE = AmbiguousTypoExprs.back(); auto Cached = TransformCache[TE]; auto &State = SemaRef.getTypoExprState(TE); State.Consumer->saveCurrentPosition(); TransformCache.erase(TE); if (!TryTransform(E).isInvalid()) { State.Consumer->resetCorrectionStream(); TransformCache.erase(TE); Res = ExprError(); break; } AmbiguousTypoExprs.remove(TE); State.Consumer->restoreSavedPosition(); TransformCache[TE] = Cached; } SemaRef.DisableTypoCorrection = false; // Ensure that all of the TypoExprs within the current Expr have been found. if (!Res.isUsable()) FindTypoExprs(TypoExprs).TraverseStmt(E); EmitAllDiagnostics(); return Res; } ExprResult TransformTypoExpr(TypoExpr *E) { // If the TypoExpr hasn't been seen before, record it. Otherwise, return the // cached transformation result if there is one and the TypoExpr isn't the // first one that was encountered. auto &CacheEntry = TransformCache[E]; if (!TypoExprs.insert(E) && !CacheEntry.isUnset()) { return CacheEntry; } auto &State = SemaRef.getTypoExprState(E); assert(State.Consumer && "Cannot transform a cleared TypoExpr"); // For the first TypoExpr and an uncached TypoExpr, find the next likely // typo correction and return it. while (TypoCorrection TC = State.Consumer->getNextCorrection()) { if (InitDecl && TC.getFoundDecl() == InitDecl) continue; // FIXME: If we would typo-correct to an invalid declaration, it's // probably best to just suppress all errors from this typo correction. ExprResult NE = State.RecoveryHandler ? State.RecoveryHandler(SemaRef, E, TC) : attemptRecovery(SemaRef, *State.Consumer, TC); if (!NE.isInvalid()) { // Check whether there may be a second viable correction with the same // edit distance; if so, remember this TypoExpr may have an ambiguous // correction so it can be more thoroughly vetted later. TypoCorrection Next; if ((Next = State.Consumer->peekNextCorrection()) && Next.getEditDistance(false) == TC.getEditDistance(false)) { AmbiguousTypoExprs.insert(E); } else { AmbiguousTypoExprs.remove(E); } assert(!NE.isUnset() && "Typo was transformed into a valid-but-null ExprResult"); return CacheEntry = NE; } } return CacheEntry = ExprError(); } }; } ExprResult Sema::CorrectDelayedTyposInExpr(Expr *E, VarDecl *InitDecl, llvm::function_ref Filter) { // If the current evaluation context indicates there are uncorrected typos // and the current expression isn't guaranteed to not have typos, try to // resolve any TypoExpr nodes that might be in the expression. if (E && !ExprEvalContexts.empty() && ExprEvalContexts.back().NumTypos && (E->isTypeDependent() || E->isValueDependent() || E->isInstantiationDependent())) { auto TyposResolved = DelayedTypos.size(); auto Result = TransformTypos(*this, InitDecl, Filter).Transform(E); TyposResolved -= DelayedTypos.size(); if (Result.isInvalid() || Result.get() != E) { ExprEvalContexts.back().NumTypos -= TyposResolved; return Result; } assert(TyposResolved == 0 && "Corrected typo but got same Expr back?"); } return E; } ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC, bool DiscardedValue, bool IsConstexpr, bool IsLambdaInitCaptureInitializer) { ExprResult FullExpr = FE; if (!FullExpr.get()) return ExprError(); // If we are an init-expression in a lambdas init-capture, we should not // diagnose an unexpanded pack now (will be diagnosed once lambda-expr // containing full-expression is done). // template void test(Ts ... t) { // test([&a(t)]() { <-- (t) is an init-expr that shouldn't be diagnosed now. // return a; // }() ...); // } // FIXME: This is a hack. It would be better if we pushed the lambda scope // when we parse the lambda introducer, and teach capturing (but not // unexpanded pack detection) to walk over LambdaScopeInfos which don't have a // corresponding class yet (that is, have LambdaScopeInfo either represent a // lambda where we've entered the introducer but not the body, or represent a // lambda where we've entered the body, depending on where the // parser/instantiation has got to). if (!IsLambdaInitCaptureInitializer && DiagnoseUnexpandedParameterPack(FullExpr.get())) return ExprError(); // Top-level expressions default to 'id' when we're in a debugger. if (DiscardedValue && getLangOpts().DebuggerCastResultToId && FullExpr.get()->getType() == Context.UnknownAnyTy) { FullExpr = forceUnknownAnyToType(FullExpr.get(), Context.getObjCIdType()); if (FullExpr.isInvalid()) return ExprError(); } if (DiscardedValue) { FullExpr = CheckPlaceholderExpr(FullExpr.get()); if (FullExpr.isInvalid()) return ExprError(); FullExpr = IgnoredValueConversions(FullExpr.get()); if (FullExpr.isInvalid()) return ExprError(); } FullExpr = CorrectDelayedTyposInExpr(FullExpr.get()); if (FullExpr.isInvalid()) return ExprError(); CheckCompletedExpr(FullExpr.get(), CC, IsConstexpr); // At the end of this full expression (which could be a deeply nested // lambda), if there is a potential capture within the nested lambda, // have the outer capture-able lambda try and capture it. // Consider the following code: // void f(int, int); // void f(const int&, double); // void foo() { // const int x = 10, y = 20; // auto L = [=](auto a) { // auto M = [=](auto b) { // f(x, b); <-- requires x to be captured by L and M // f(y, a); <-- requires y to be captured by L, but not all Ms // }; // }; // } // FIXME: Also consider what happens for something like this that involves // the gnu-extension statement-expressions or even lambda-init-captures: // void f() { // const int n = 0; // auto L = [&](auto a) { // +n + ({ 0; a; }); // }; // } // // Here, we see +n, and then the full-expression 0; ends, so we don't // capture n (and instead remove it from our list of potential captures), // and then the full-expression +n + ({ 0; }); ends, but it's too late // for us to see that we need to capture n after all. LambdaScopeInfo *const CurrentLSI = getCurLambda(/*IgnoreCapturedRegions=*/true); // FIXME: PR 17877 showed that getCurLambda() can return a valid pointer // even if CurContext is not a lambda call operator. Refer to that Bug Report // for an example of the code that might cause this asynchrony. // By ensuring we are in the context of a lambda's call operator // we can fix the bug (we only need to check whether we need to capture // if we are within a lambda's body); but per the comments in that // PR, a proper fix would entail : // "Alternative suggestion: // - Add to Sema an integer holding the smallest (outermost) scope // index that we are *lexically* within, and save/restore/set to // FunctionScopes.size() in InstantiatingTemplate's // constructor/destructor. // - Teach the handful of places that iterate over FunctionScopes to // stop at the outermost enclosing lexical scope." DeclContext *DC = CurContext; while (DC && isa(DC)) DC = DC->getParent(); const bool IsInLambdaDeclContext = isLambdaCallOperator(DC); if (IsInLambdaDeclContext && CurrentLSI && CurrentLSI->hasPotentialCaptures() && !FullExpr.isInvalid()) CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(FE, CurrentLSI, *this); return MaybeCreateExprWithCleanups(FullExpr); } StmtResult Sema::ActOnFinishFullStmt(Stmt *FullStmt) { if (!FullStmt) return StmtError(); return MaybeCreateStmtWithCleanups(FullStmt); } Sema::IfExistsResult Sema::CheckMicrosoftIfExistsSymbol(Scope *S, CXXScopeSpec &SS, const DeclarationNameInfo &TargetNameInfo) { DeclarationName TargetName = TargetNameInfo.getName(); if (!TargetName) return IER_DoesNotExist; // If the name itself is dependent, then the result is dependent. if (TargetName.isDependentName()) return IER_Dependent; // Do the redeclaration lookup in the current scope. LookupResult R(*this, TargetNameInfo, Sema::LookupAnyName, Sema::NotForRedeclaration); LookupParsedName(R, S, &SS); R.suppressDiagnostics(); switch (R.getResultKind()) { case LookupResult::Found: case LookupResult::FoundOverloaded: case LookupResult::FoundUnresolvedValue: case LookupResult::Ambiguous: return IER_Exists; case LookupResult::NotFound: return IER_DoesNotExist; case LookupResult::NotFoundInCurrentInstantiation: return IER_Dependent; } llvm_unreachable("Invalid LookupResult Kind!"); } Sema::IfExistsResult Sema::CheckMicrosoftIfExistsSymbol(Scope *S, SourceLocation KeywordLoc, bool IsIfExists, CXXScopeSpec &SS, UnqualifiedId &Name) { DeclarationNameInfo TargetNameInfo = GetNameFromUnqualifiedId(Name); // Check for an unexpanded parameter pack. auto UPPC = IsIfExists ? UPPC_IfExists : UPPC_IfNotExists; if (DiagnoseUnexpandedParameterPack(SS, UPPC) || DiagnoseUnexpandedParameterPack(TargetNameInfo, UPPC)) return IER_Error; return CheckMicrosoftIfExistsSymbol(S, SS, TargetNameInfo); } Index: projects/clang700-import/contrib/llvm/tools/clang =================================================================== --- projects/clang700-import/contrib/llvm/tools/clang (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/clang (revision 337645) Property changes on: projects/clang700-import/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/clang/dist-release_70:r337310-337642 Index: projects/clang700-import/contrib/llvm/tools/lld/COFF/InputFiles.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/lld/COFF/InputFiles.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lld/COFF/InputFiles.cpp (revision 337645) @@ -1,519 +1,563 @@ //===- InputFiles.cpp -----------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "InputFiles.h" #include "Chunks.h" #include "Config.h" #include "Driver.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm-c/lto.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Target/TargetOptions.h" #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support::endian; using llvm::Triple; using llvm::support::ulittle32_t; namespace lld { namespace coff { std::vector ObjFile::Instances; std::vector ImportFile::Instances; std::vector BitcodeFile::Instances; /// Checks that Source is compatible with being a weak alias to Target. /// If Source is Undefined and has no weak alias set, makes it a weak /// alias to Target. static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, Symbol *Source, Symbol *Target) { if (auto *U = dyn_cast(Source)) { if (U->WeakAlias && U->WeakAlias != Target) Symtab->reportDuplicate(Source, F); U->WeakAlias = Target; } } ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. File = CHECK(Archive::create(MB), this); // Read the symbol table to construct Lazy objects. for (const Archive::Symbol &Sym : File->symbols()) Symtab->addLazy(this, Sym); } // Returns a buffer pointing to a member file containing a given symbol. void ArchiveFile::addMember(const Archive::Symbol *Sym) { const Archive::Child &C = CHECK(Sym->getMember(), "could not get the member for symbol " + Sym->getName()); // Return an empty buffer if we have already returned the same buffer. if (!Seen.insert(C.getChildOffset()).second) return; Driver->enqueueArchiveMember(C, Sym->getName(), getName()); } std::vector getArchiveMembers(Archive *File) { std::vector V; Error Err = Error::success(); for (const ErrorOr &COrErr : File->children(Err)) { Archive::Child C = CHECK(COrErr, File->getFileName() + ": could not get the child of the archive"); MemoryBufferRef MBRef = CHECK(C.getMemoryBufferRef(), File->getFileName() + ": could not get the buffer for a child of the archive"); V.push_back(MBRef); } if (Err) fatal(File->getFileName() + ": Archive::children failed: " + toString(std::move(Err))); return V; } void ObjFile::parse() { // Parse a memory buffer as a COFF file. std::unique_ptr Bin = CHECK(createBinary(MB), this); if (auto *Obj = dyn_cast(Bin.get())) { Bin.release(); COFFObj.reset(Obj); } else { fatal(toString(this) + " is not a COFF file"); } // Read section and symbol tables. initializeChunks(); initializeSymbols(); } // We set SectionChunk pointers in the SparseChunks vector to this value // temporarily to mark comdat sections as having an unknown resolution. As we // walk the object file's symbol table, once we visit either a leader symbol or // an associative section definition together with the parent comdat's leader, // we set the pointer to either nullptr (to mark the section as discarded) or a // valid SectionChunk for that section. static SectionChunk *const PendingComdat = reinterpret_cast(1); void ObjFile::initializeChunks() { uint32_t NumSections = COFFObj->getNumberOfSections(); Chunks.reserve(NumSections); SparseChunks.resize(NumSections + 1); for (uint32_t I = 1; I < NumSections + 1; ++I) { const coff_section *Sec; if (auto EC = COFFObj->getSection(I, Sec)) fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) SparseChunks[I] = PendingComdat; else SparseChunks[I] = readSection(I, nullptr, ""); } } SectionChunk *ObjFile::readSection(uint32_t SectionNumber, const coff_aux_section_definition *Def, StringRef LeaderName) { const coff_section *Sec; StringRef Name; if (auto EC = COFFObj->getSection(SectionNumber, Sec)) fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); if (auto EC = COFFObj->getSectionName(Sec, Name)) fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + EC.message()); if (Name == ".drectve") { ArrayRef Data; COFFObj->getSectionContents(Sec, Data); Directives = std::string((const char *)Data.data(), Data.size()); return nullptr; } // Object files may have DWARF debug info or MS CodeView debug info // (or both). // // DWARF sections don't need any special handling from the perspective // of the linker; they are just a data section containing relocations. // We can just link them to complete debug info. // // CodeView needs a linker support. We need to interpret and debug // info, and then write it to a separate .pdb file. // Ignore DWARF debug info unless /debug is given. if (!Config->Debug && Name.startswith(".debug_")) return nullptr; if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) return nullptr; auto *C = make(this, Sec); if (Def) C->Checksum = Def->CheckSum; // CodeView sections are stored to a different vector because they are not // linked in the regular manner. if (C->isCodeView()) DebugChunks.push_back(C); else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y") GuardFidChunks.push_back(C); else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y") GuardLJmpChunks.push_back(C); else if (Name == ".sxdata") SXDataChunks.push_back(C); else if (Config->TailMerge && Sec->NumberOfRelocations == 0 && Name == ".rdata" && LeaderName.startswith("??_C@")) // COFF sections that look like string literal sections (i.e. no // relocations, in .rdata, leader symbol name matches the MSVC name mangling // for string literals) are subject to string tail merging. MergeChunk::addSection(C); else Chunks.push_back(C); return C; } void ObjFile::readAssociativeDefinition( COFFSymbolRef Sym, const coff_aux_section_definition *Def) { - SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())]; + readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj())); +} +void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym, + const coff_aux_section_definition *Def, + uint32_t ParentSection) { + SectionChunk *Parent = SparseChunks[ParentSection]; + // If the parent is pending, it probably means that its section definition // appears after us in the symbol table. Leave the associated section as // pending; we will handle it during the second pass in initializeSymbols(). if (Parent == PendingComdat) return; // Check whether the parent is prevailing. If it is, so are we, and we read // the section; otherwise mark it as discarded. int32_t SectionNumber = Sym.getSectionNumber(); if (Parent) { SparseChunks[SectionNumber] = readSection(SectionNumber, Def, ""); if (SparseChunks[SectionNumber]) Parent->addAssociative(SparseChunks[SectionNumber]); } else { SparseChunks[SectionNumber] = nullptr; } } +void ObjFile::recordPrevailingSymbolForMingw( + COFFSymbolRef Sym, DenseMap &PrevailingSectionMap) { + // For comdat symbols in executable sections, where this is the copy + // of the section chunk we actually include instead of discarding it, + // add the symbol to a map to allow using it for implicitly + // associating .[px]data$ sections to it. + int32_t SectionNumber = Sym.getSectionNumber(); + SectionChunk *SC = SparseChunks[SectionNumber]; + if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + PrevailingSectionMap[Name] = SectionNumber; + } +} + +void ObjFile::maybeAssociateSEHForMingw( + COFFSymbolRef Sym, const coff_aux_section_definition *Def, + const DenseMap &PrevailingSectionMap) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) { + // For MinGW, treat .[px]data$ as implicitly associative to + // the symbol . + auto ParentSym = PrevailingSectionMap.find(Name); + if (ParentSym != PrevailingSectionMap.end()) + readAssociativeDefinition(Sym, Def, ParentSym->second); + } +} + Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; if (Sym.isExternal()) { StringRef Name; COFFObj->getSymbolName(Sym, Name); if (SC) return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); return Symtab->addUndefined(Name, this, false); } if (SC) return make(this, /*Name*/ "", false, /*IsExternal*/ false, Sym.getGeneric(), SC); return nullptr; } void ObjFile::initializeSymbols() { uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); Symbols.resize(NumSymbols); SmallVector, 8> WeakAliases; std::vector PendingIndexes; PendingIndexes.reserve(NumSymbols); + DenseMap PrevailingSectionMap; std::vector ComdatDefs( COFFObj->getNumberOfSections() + 1); for (uint32_t I = 0; I < NumSymbols; ++I) { COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); + bool PrevailingComdat; if (COFFSym.isUndefined()) { Symbols[I] = createUndefined(COFFSym); } else if (COFFSym.isWeakExternal()) { Symbols[I] = createUndefined(COFFSym); uint32_t TagIndex = COFFSym.getAux()->TagIndex; WeakAliases.emplace_back(Symbols[I], TagIndex); - } else if (Optional OptSym = createDefined(COFFSym, ComdatDefs)) { + } else if (Optional OptSym = + createDefined(COFFSym, ComdatDefs, PrevailingComdat)) { Symbols[I] = *OptSym; + if (Config->MinGW && PrevailingComdat) + recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap); } else { // createDefined() returns None if a symbol belongs to a section that // was pending at the point when the symbol was read. This can happen in // two cases: // 1) section definition symbol for a comdat leader; // 2) symbol belongs to a comdat section associated with a section whose // section definition symbol appears later in the symbol table. // In both of these cases, we can expect the section to be resolved by // the time we finish visiting the remaining symbols in the symbol // table. So we postpone the handling of this symbol until that time. PendingIndexes.push_back(I); } I += COFFSym.getNumberOfAuxSymbols(); } for (uint32_t I : PendingIndexes) { COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); - if (auto *Def = Sym.getSectionDefinition()) + if (auto *Def = Sym.getSectionDefinition()) { if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(Sym, Def); + else if (Config->MinGW) + maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap); + } if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) { StringRef Name; COFFObj->getSymbolName(Sym, Name); log("comdat section " + Name + " without leader and unassociated, discarding"); continue; } Symbols[I] = createRegular(Sym); } for (auto &KV : WeakAliases) { Symbol *Sym = KV.first; uint32_t Idx = KV.second; checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]); } } Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { StringRef Name; COFFObj->getSymbolName(Sym, Name); return Symtab->addUndefined(Name, this, Sym.isWeakExternal()); } Optional ObjFile::createDefined( COFFSymbolRef Sym, - std::vector &ComdatDefs) { + std::vector &ComdatDefs, + bool &Prevailing) { + Prevailing = false; auto GetName = [&]() { StringRef S; COFFObj->getSymbolName(Sym, S); return S; }; if (Sym.isCommon()) { auto *C = make(Sym); Chunks.push_back(C); return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(), C); } if (Sym.isAbsolute()) { StringRef Name = GetName(); // Skip special symbols. if (Name == "@comp.id") return nullptr; if (Name == "@feat.00") { Feat00Flags = Sym.getValue(); return nullptr; } if (Sym.isExternal()) return Symtab->addAbsolute(Name, Sym); return make(Name, Sym); } int32_t SectionNumber = Sym.getSectionNumber(); if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) return nullptr; if (llvm::COFF::isReservedSectionNumber(SectionNumber)) fatal(toString(this) + ": " + GetName() + " should not refer to special section " + Twine(SectionNumber)); if ((uint32_t)SectionNumber >= SparseChunks.size()) fatal(toString(this) + ": " + GetName() + " should not refer to non-existent section " + Twine(SectionNumber)); // Handle comdat leader symbols. if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { ComdatDefs[SectionNumber] = nullptr; Symbol *Leader; - bool Prevailing; if (Sym.isExternal()) { std::tie(Leader, Prevailing) = Symtab->addComdat(this, GetName(), Sym.getGeneric()); } else { Leader = make(this, /*Name*/ "", false, /*IsExternal*/ false, Sym.getGeneric()); Prevailing = true; } if (Prevailing) { SectionChunk *C = readSection(SectionNumber, Def, GetName()); SparseChunks[SectionNumber] = C; C->Sym = cast(Leader); cast(Leader)->Data = &C->Repl; } else { SparseChunks[SectionNumber] = nullptr; } return Leader; } // Read associative section definitions and prepare to handle the comdat // leader symbol by setting the section's ComdatDefs pointer if we encounter a // non-associative comdat. if (SparseChunks[SectionNumber] == PendingComdat) { if (auto *Def = Sym.getSectionDefinition()) { if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(Sym, Def); else ComdatDefs[SectionNumber] = Def; } } if (SparseChunks[SectionNumber] == PendingComdat) return None; return createRegular(Sym); } MachineTypes ObjFile::getMachineType() { if (COFFObj) return static_cast(COFFObj->getMachine()); return IMAGE_FILE_MACHINE_UNKNOWN; } StringRef ltrim1(StringRef S, const char *Chars) { if (!S.empty() && strchr(Chars, S[0])) return S.substr(1); return S; } void ImportFile::parse() { const char *Buf = MB.getBufferStart(); const char *End = MB.getBufferEnd(); const auto *Hdr = reinterpret_cast(Buf); // Check if the total size is valid. if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) fatal("broken import library"); // Read names and create an __imp_ symbol. StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr))); StringRef ImpName = Saver.save("__imp_" + Name); const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; DLLName = StringRef(NameStart); StringRef ExtName; switch (Hdr->getNameType()) { case IMPORT_ORDINAL: ExtName = ""; break; case IMPORT_NAME: ExtName = Name; break; case IMPORT_NAME_NOPREFIX: ExtName = ltrim1(Name, "?@_"); break; case IMPORT_NAME_UNDECORATE: ExtName = ltrim1(Name, "?@_"); ExtName = ExtName.substr(0, ExtName.find('@')); break; } this->Hdr = Hdr; ExternalName = ExtName; ImpSym = Symtab->addImportData(ImpName, this); if (Hdr->getType() == llvm::COFF::IMPORT_CONST) static_cast(Symtab->addImportData(Name, this)); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (Hdr->getType() == llvm::COFF::IMPORT_CODE) ThunkSym = Symtab->addImportThunk( Name, cast_or_null(ImpSym), Hdr->Machine); } void BitcodeFile::parse() { Obj = check(lto::InputFile::create(MemoryBufferRef( MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); std::vector> Comdat(Obj->getComdatTable().size()); for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { StringRef SymName = Saver.save(ObjSym.getName()); int ComdatIndex = ObjSym.getComdatIndex(); Symbol *Sym; if (ObjSym.isUndefined()) { Sym = Symtab->addUndefined(SymName, this, false); } else if (ObjSym.isCommon()) { Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { // Weak external. Sym = Symtab->addUndefined(SymName, this, true); std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback)); checkAndSetWeakAlias(Symtab, this, Sym, Alias); } else if (ComdatIndex != -1) { if (SymName == Obj->getComdatTable()[ComdatIndex]) Sym = Comdat[ComdatIndex].first; else if (Comdat[ComdatIndex].second) Sym = Symtab->addRegular(this, SymName); else Sym = Symtab->addUndefined(SymName, this, false); } else { Sym = Symtab->addRegular(this, SymName); } Symbols.push_back(Sym); } Directives = Obj->getCOFFLinkerOpts(); } MachineTypes BitcodeFile::getMachineType() { switch (Triple(Obj->getTargetTriple()).getArch()) { case Triple::x86_64: return AMD64; case Triple::x86: return I386; case Triple::arm: return ARMNT; case Triple::aarch64: return ARM64; default: return IMAGE_FILE_MACHINE_UNKNOWN; } } } // namespace coff } // namespace lld // Returns the last element of a path, which is supposed to be a filename. static StringRef getBasename(StringRef Path) { return sys::path::filename(Path, sys::path::Style::windows); } // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". std::string lld::toString(const coff::InputFile *File) { if (!File) return ""; if (File->ParentName.empty()) return File->getName(); return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + ")") .str(); } Index: projects/clang700-import/contrib/llvm/tools/lld/COFF/InputFiles.h =================================================================== --- projects/clang700-import/contrib/llvm/tools/lld/COFF/InputFiles.h (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lld/COFF/InputFiles.h (revision 337645) @@ -1,254 +1,269 @@ //===- InputFiles.h ---------------------------------------------*- C++ -*-===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLD_COFF_INPUT_FILES_H #define LLD_COFF_INPUT_FILES_H #include "Config.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Support/StringSaver.h" #include #include #include namespace llvm { namespace pdb { class DbiModuleDescriptorBuilder; } } namespace lld { namespace coff { std::vector getArchiveMembers(llvm::object::Archive *File); using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; using llvm::COFF::MachineTypes; using llvm::object::Archive; using llvm::object::COFFObjectFile; using llvm::object::COFFSymbolRef; using llvm::object::coff_import_header; using llvm::object::coff_section; class Chunk; class Defined; class DefinedImportData; class DefinedImportThunk; class Lazy; class SectionChunk; class Symbol; class Undefined; // The root class of input files. class InputFile { public: enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; Kind kind() const { return FileKind; } virtual ~InputFile() {} // Returns the filename. StringRef getName() const { return MB.getBufferIdentifier(); } // Reads a file (the constructor doesn't do that). virtual void parse() = 0; // Returns the CPU type this file was compiled to. virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } MemoryBufferRef MB; // An archive file name if this file is created from an archive. StringRef ParentName; // Returns .drectve section contents if exist. StringRef getDirectives() { return StringRef(Directives).trim(); } protected: InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} std::string Directives; private: const Kind FileKind; }; // .lib or .a file. class ArchiveFile : public InputFile { public: explicit ArchiveFile(MemoryBufferRef M); static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } void parse() override; // Enqueues an archive member load for the given symbol. If we've already // enqueued a load for the same archive member, this function does nothing, // which ensures that we don't load the same member more than once. void addMember(const Archive::Symbol *Sym); private: std::unique_ptr File; std::string Filename; llvm::DenseSet Seen; }; // .obj or .o file. This may be a member of an archive file. class ObjFile : public InputFile { public: explicit ObjFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } void parse() override; MachineTypes getMachineType() override; ArrayRef getChunks() { return Chunks; } ArrayRef getDebugChunks() { return DebugChunks; } ArrayRef getSXDataChunks() { return SXDataChunks; } ArrayRef getGuardFidChunks() { return GuardFidChunks; } ArrayRef getGuardLJmpChunks() { return GuardLJmpChunks; } ArrayRef getSymbols() { return Symbols; } // Returns a Symbol object for the SymbolIndex'th symbol in the // underlying object file. Symbol *getSymbol(uint32_t SymbolIndex) { return Symbols[SymbolIndex]; } // Returns the underying COFF file. COFFObjectFile *getCOFFObj() { return COFFObj.get(); } static std::vector Instances; // Flags in the absolute @feat.00 symbol if it is present. These usually // indicate if an object was compiled with certain security features enabled // like stack guard, safeseh, /guard:cf, or other things. uint32_t Feat00Flags = 0; // True if this object file is compatible with SEH. COFF-specific and // x86-only. COFF spec 5.10.1. The .sxdata section. bool hasSafeSEH() { return Feat00Flags & 0x1; } // True if this file was compiled with /guard:cf. bool hasGuardCF() { return Feat00Flags & 0x800; } // Pointer to the PDB module descriptor builder. Various debug info records // will reference object files by "module index", which is here. Things like // source files and section contributions are also recorded here. Will be null // if we are not producing a PDB. llvm::pdb::DbiModuleDescriptorBuilder *ModuleDBI = nullptr; private: void initializeChunks(); void initializeSymbols(); SectionChunk * readSection(uint32_t SectionNumber, const llvm::object::coff_aux_section_definition *Def, StringRef LeaderName); void readAssociativeDefinition( COFFSymbolRef COFFSym, const llvm::object::coff_aux_section_definition *Def); + void readAssociativeDefinition( + COFFSymbolRef COFFSym, + const llvm::object::coff_aux_section_definition *Def, + uint32_t ParentSection); + + void recordPrevailingSymbolForMingw( + COFFSymbolRef COFFSym, + llvm::DenseMap &PrevailingSectionMap); + + void maybeAssociateSEHForMingw( + COFFSymbolRef Sym, const llvm::object::coff_aux_section_definition *Def, + const llvm::DenseMap &PrevailingSectionMap); + llvm::Optional createDefined(COFFSymbolRef Sym, std::vector - &ComdatDefs); + &ComdatDefs, + bool &PrevailingComdat); Symbol *createRegular(COFFSymbolRef Sym); Symbol *createUndefined(COFFSymbolRef Sym); std::unique_ptr COFFObj; // List of all chunks defined by this file. This includes both section // chunks and non-section chunks for common symbols. std::vector Chunks; // CodeView debug info sections. std::vector DebugChunks; // Chunks containing symbol table indices of exception handlers. Only used for // 32-bit x86. std::vector SXDataChunks; // Chunks containing symbol table indices of address taken symbols and longjmp // targets. These are not linked into the final binary when /guard:cf is set. std::vector GuardFidChunks; std::vector GuardLJmpChunks; // This vector contains the same chunks as Chunks, but they are // indexed such that you can get a SectionChunk by section index. // Nonexistent section indices are filled with null pointers. // (Because section number is 1-based, the first slot is always a // null pointer.) std::vector SparseChunks; // This vector contains a list of all symbols defined or referenced by this // file. They are indexed such that you can get a Symbol by symbol // index. Nonexistent indices (which are occupied by auxiliary // symbols in the real symbol table) are filled with null pointers. std::vector Symbols; }; // This type represents import library members that contain DLL names // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 // for details about the format. class ImportFile : public InputFile { public: explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M) {} static bool classof(const InputFile *F) { return F->kind() == ImportKind; } static std::vector Instances; Symbol *ImpSym = nullptr; Symbol *ThunkSym = nullptr; std::string DLLName; private: void parse() override; public: StringRef ExternalName; const coff_import_header *Hdr; Chunk *Location = nullptr; // We want to eliminate dllimported symbols if no one actually refers them. // These "Live" bits are used to keep track of which import library members // are actually in use. // // If the Live bit is turned off by MarkLive, Writer will ignore dllimported // symbols provided by this import library member. We also track whether the // imported symbol is used separately from whether the thunk is used in order // to avoid creating unnecessary thunks. bool Live = !Config->DoGC; bool ThunkLive = !Config->DoGC; }; // Used for LTO. class BitcodeFile : public InputFile { public: explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } ArrayRef getSymbols() { return Symbols; } MachineTypes getMachineType() override; static std::vector Instances; std::unique_ptr Obj; private: void parse() override; std::vector Symbols; }; } // namespace coff std::string toString(const coff::InputFile *File); } // namespace lld #endif Index: projects/clang700-import/contrib/llvm/tools/lld/ELF/LinkerScript.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/lld/ELF/LinkerScript.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lld/ELF/LinkerScript.cpp (revision 337645) @@ -1,1153 +1,1157 @@ //===- LinkerScript.cpp ---------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the parser/evaluator of the linker script. // //===----------------------------------------------------------------------===// #include "LinkerScript.h" #include "Config.h" #include "InputSection.h" #include "OutputSections.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Writer.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include #include #include #include #include #include #include #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; LinkerScript *elf::Script; static uint64_t getOutputSectionVA(SectionBase *InputSec, StringRef Loc) { if (OutputSection *OS = InputSec->getOutputSection()) return OS->Addr; error(Loc + ": unable to evaluate expression: input section " + InputSec->Name + " has no output section assigned"); return 0; } uint64_t ExprValue::getValue() const { if (Sec) return alignTo(Sec->getOffset(Val) + getOutputSectionVA(Sec, Loc), Alignment); return alignTo(Val, Alignment); } uint64_t ExprValue::getSecAddr() const { if (Sec) return Sec->getOffset(0) + getOutputSectionVA(Sec, Loc); return 0; } uint64_t ExprValue::getSectionOffset() const { // If the alignment is trivial, we don't have to compute the full // value to know the offset. This allows this function to succeed in // cases where the output section is not yet known. if (Alignment == 1 && (!Sec || !Sec->getOutputSection())) return Val; return getValue() - getSecAddr(); } OutputSection *LinkerScript::createOutputSection(StringRef Name, StringRef Location) { OutputSection *&SecRef = NameToOutputSection[Name]; OutputSection *Sec; if (SecRef && SecRef->Location.empty()) { // There was a forward reference. Sec = SecRef; } else { Sec = make(Name, SHT_NOBITS, 0); if (!SecRef) SecRef = Sec; } Sec->Location = Location; return Sec; } OutputSection *LinkerScript::getOrCreateOutputSection(StringRef Name) { OutputSection *&CmdRef = NameToOutputSection[Name]; if (!CmdRef) CmdRef = make(Name, SHT_PROGBITS, 0); return CmdRef; } // Expands the memory region by the specified size. static void expandMemoryRegion(MemoryRegion *MemRegion, uint64_t Size, StringRef RegionName, StringRef SecName) { MemRegion->CurPos += Size; uint64_t NewSize = MemRegion->CurPos - MemRegion->Origin; if (NewSize > MemRegion->Length) error("section '" + SecName + "' will not fit in region '" + RegionName + "': overflowed by " + Twine(NewSize - MemRegion->Length) + " bytes"); } void LinkerScript::expandMemoryRegions(uint64_t Size) { if (Ctx->MemRegion) expandMemoryRegion(Ctx->MemRegion, Size, Ctx->MemRegion->Name, Ctx->OutSec->Name); - if (Ctx->LMARegion) + // Only expand the LMARegion if it is different from MemRegion. + if (Ctx->LMARegion && Ctx->MemRegion != Ctx->LMARegion) expandMemoryRegion(Ctx->LMARegion, Size, Ctx->LMARegion->Name, Ctx->OutSec->Name); } void LinkerScript::expandOutputSection(uint64_t Size) { Ctx->OutSec->Size += Size; expandMemoryRegions(Size); } void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) { uint64_t Val = E().getValue(); if (Val < Dot && InSec) error(Loc + ": unable to move location counter backward for: " + Ctx->OutSec->Name); // Update to location counter means update to section size. if (InSec) expandOutputSection(Val - Dot); else expandMemoryRegions(Val - Dot); Dot = Val; } // Used for handling linker symbol assignments, for both finalizing // their values and doing early declarations. Returns true if symbol // should be defined from linker script. static bool shouldDefineSym(SymbolAssignment *Cmd) { if (Cmd->Name == ".") return false; if (!Cmd->Provide) return true; // If a symbol was in PROVIDE(), we need to define it only // when it is a referenced undefined symbol. Symbol *B = Symtab->find(Cmd->Name); if (B && !B->isDefined()) return true; return false; } // This function is called from processSectionCommands, // while we are fixing the output section layout. void LinkerScript::addSymbol(SymbolAssignment *Cmd) { if (!shouldDefineSym(Cmd)) return; // Define a symbol. Symbol *Sym; uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); ExprValue Value = Cmd->Expression(); SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; // When this function is called, section addresses have not been // fixed yet. So, we may or may not know the value of the RHS // expression. // // For example, if an expression is `x = 42`, we know x is always 42. // However, if an expression is `x = .`, there's no way to know its // value at the moment. // // We want to set symbol values early if we can. This allows us to // use symbols as variables in linker scripts. Doing so allows us to // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`. uint64_t SymValue = Value.Sec ? 0 : Value.getValue(); replaceSymbol(Sym, nullptr, Cmd->Name, STB_GLOBAL, Visibility, STT_NOTYPE, SymValue, 0, Sec); Cmd->Sym = cast(Sym); } // This function is called from LinkerScript::declareSymbols. // It creates a placeholder symbol if needed. static void declareSymbol(SymbolAssignment *Cmd) { if (!shouldDefineSym(Cmd)) return; // We can't calculate final value right now. Symbol *Sym; uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; std::tie(Sym, std::ignore) = Symtab->insert(Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); replaceSymbol(Sym, nullptr, Cmd->Name, STB_GLOBAL, Visibility, STT_NOTYPE, 0, 0, nullptr); Cmd->Sym = cast(Sym); Cmd->Provide = false; } // This method is used to handle INSERT AFTER statement. Here we rebuild // the list of script commands to mix sections inserted into. void LinkerScript::processInsertCommands() { std::vector V; auto Insert = [&](std::vector &From) { V.insert(V.end(), From.begin(), From.end()); From.clear(); }; for (BaseCommand *Base : SectionCommands) { if (auto *OS = dyn_cast(Base)) { Insert(InsertBeforeCommands[OS->Name]); V.push_back(Base); Insert(InsertAfterCommands[OS->Name]); continue; } V.push_back(Base); } for (auto &Cmds : {InsertBeforeCommands, InsertAfterCommands}) for (const std::pair> &P : Cmds) if (!P.second.empty()) error("unable to INSERT AFTER/BEFORE " + P.first + ": section not defined"); SectionCommands = std::move(V); } // Symbols defined in script should not be inlined by LTO. At the same time // we don't know their final values until late stages of link. Here we scan // over symbol assignment commands and create placeholder symbols if needed. void LinkerScript::declareSymbols() { assert(!Ctx); for (BaseCommand *Base : SectionCommands) { if (auto *Cmd = dyn_cast(Base)) { declareSymbol(Cmd); continue; } // If the output section directive has constraints, // we can't say for sure if it is going to be included or not. // Skip such sections for now. Improve the checks if we ever // need symbols from that sections to be declared early. auto *Sec = cast(Base); if (Sec->Constraint != ConstraintKind::NoConstraint) continue; for (BaseCommand *Base2 : Sec->SectionCommands) if (auto *Cmd = dyn_cast(Base2)) declareSymbol(Cmd); } } // This function is called from assignAddresses, while we are // fixing the output section addresses. This function is supposed // to set the final value for a given symbol assignment. void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) { if (Cmd->Name == ".") { setDot(Cmd->Expression, Cmd->Location, InSec); return; } if (!Cmd->Sym) return; ExprValue V = Cmd->Expression(); if (V.isAbsolute()) { Cmd->Sym->Section = nullptr; Cmd->Sym->Value = V.getValue(); } else { Cmd->Sym->Section = V.Sec; Cmd->Sym->Value = V.getSectionOffset(); } } static std::string getFilename(InputFile *File) { if (!File) return ""; if (File->ArchiveName.empty()) return File->getName(); return (File->ArchiveName + "(" + File->getName() + ")").str(); } bool LinkerScript::shouldKeep(InputSectionBase *S) { if (KeptSections.empty()) return false; std::string Filename = getFilename(S->File); for (InputSectionDescription *ID : KeptSections) if (ID->FilePat.match(Filename)) for (SectionPattern &P : ID->SectionPatterns) if (P.SectionPat.match(S->Name)) return true; return false; } // A helper function for the SORT() command. static std::function getComparator(SortSectionPolicy K) { switch (K) { case SortSectionPolicy::Alignment: return [](InputSectionBase *A, InputSectionBase *B) { // ">" is not a mistake. Sections with larger alignments are placed // before sections with smaller alignments in order to reduce the // amount of padding necessary. This is compatible with GNU. return A->Alignment > B->Alignment; }; case SortSectionPolicy::Name: return [](InputSectionBase *A, InputSectionBase *B) { return A->Name < B->Name; }; case SortSectionPolicy::Priority: return [](InputSectionBase *A, InputSectionBase *B) { return getPriority(A->Name) < getPriority(B->Name); }; default: llvm_unreachable("unknown sort policy"); } } // A helper function for the SORT() command. static bool matchConstraints(ArrayRef Sections, ConstraintKind Kind) { if (Kind == ConstraintKind::NoConstraint) return true; bool IsRW = llvm::any_of( Sections, [](InputSection *Sec) { return Sec->Flags & SHF_WRITE; }); return (IsRW && Kind == ConstraintKind::ReadWrite) || (!IsRW && Kind == ConstraintKind::ReadOnly); } static void sortSections(MutableArrayRef Vec, SortSectionPolicy K) { if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) std::stable_sort(Vec.begin(), Vec.end(), getComparator(K)); } // Sort sections as instructed by SORT-family commands and --sort-section // option. Because SORT-family commands can be nested at most two depth // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command // line option is respected even if a SORT command is given, the exact // behavior we have here is a bit complicated. Here are the rules. // // 1. If two SORT commands are given, --sort-section is ignored. // 2. If one SORT command is given, and if it is not SORT_NONE, // --sort-section is handled as an inner SORT command. // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. // 4. If no SORT command is given, sort according to --sort-section. static void sortInputSections(MutableArrayRef Vec, const SectionPattern &Pat) { if (Pat.SortOuter == SortSectionPolicy::None) return; if (Pat.SortInner == SortSectionPolicy::Default) sortSections(Vec, Config->SortSection); else sortSections(Vec, Pat.SortInner); sortSections(Vec, Pat.SortOuter); } // Compute and remember which sections the InputSectionDescription matches. std::vector LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { std::vector Ret; // Collects all sections that satisfy constraints of Cmd. for (const SectionPattern &Pat : Cmd->SectionPatterns) { size_t SizeBefore = Ret.size(); for (InputSectionBase *Sec : InputSections) { if (!Sec->Live || Sec->Assigned) continue; // For -emit-relocs we have to ignore entries like // .rela.dyn : { *(.rela.data) } // which are common because they are in the default bfd script. // We do not ignore SHT_REL[A] linker-synthesized sections here because // want to support scripts that do custom layout for them. if (auto *IS = dyn_cast(Sec)) if (IS->getRelocatedSection()) continue; std::string Filename = getFilename(Sec->File); if (!Cmd->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename) || !Pat.SectionPat.match(Sec->Name)) continue; // It is safe to assume that Sec is an InputSection // because mergeable or EH input sections have already been // handled and eliminated. Ret.push_back(cast(Sec)); Sec->Assigned = true; } sortInputSections(MutableArrayRef(Ret).slice(SizeBefore), Pat); } return Ret; } void LinkerScript::discard(ArrayRef V) { for (InputSection *S : V) { if (S == InX::ShStrTab || S == InX::Dynamic || S == InX::DynSymTab || S == InX::DynStrTab || S == InX::RelaPlt || S == InX::RelaDyn || S == InX::RelrDyn) error("discarding " + S->Name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since // they are synthesized sections, we need to handle them differently than // other regular sections. if (S == InX::GnuHashTab) InX::GnuHashTab = nullptr; if (S == InX::HashTab) InX::HashTab = nullptr; S->Assigned = false; S->Live = false; discard(S->DependentSections); } } std::vector LinkerScript::createInputSectionList(OutputSection &OutCmd) { std::vector Ret; for (BaseCommand *Base : OutCmd.SectionCommands) { if (auto *Cmd = dyn_cast(Base)) { Cmd->Sections = computeInputSections(Cmd); Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end()); } } return Ret; } void LinkerScript::processSectionCommands() { // A symbol can be assigned before any section is mentioned in the linker // script. In an DSO, the symbol values are addresses, so the only important // section values are: // * SHN_UNDEF // * SHN_ABS // * Any value meaning a regular section. // To handle that, create a dummy aether section that fills the void before // the linker scripts switches to another section. It has an index of one // which will map to whatever the first actual section is. Aether = make("", 0, SHF_ALLOC); Aether->SectionIndex = 1; // Ctx captures the local AddressState and makes it accessible deliberately. // This is needed as there are some cases where we cannot just // thread the current state through to a lambda function created by the // script parser. auto Deleter = make_unique(); Ctx = Deleter.get(); Ctx->OutSec = Aether; size_t I = 0; // Add input sections to output sections. for (BaseCommand *Base : SectionCommands) { // Handle symbol assignments outside of any output section. if (auto *Cmd = dyn_cast(Base)) { addSymbol(Cmd); continue; } if (auto *Sec = dyn_cast(Base)) { std::vector V = createInputSectionList(*Sec); // The output section name `/DISCARD/' is special. // Any input section assigned to it is discarded. if (Sec->Name == "/DISCARD/") { discard(V); Sec->SectionCommands.clear(); continue; } // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input // sections satisfy a given constraint. If not, a directive is handled // as if it wasn't present from the beginning. // // Because we'll iterate over SectionCommands many more times, the easy // way to "make it as if it wasn't present" is to make it empty. if (!matchConstraints(V, Sec->Constraint)) { for (InputSectionBase *S : V) S->Assigned = false; Sec->SectionCommands.clear(); continue; } // A directive may contain symbol definitions like this: // ".foo : { ...; bar = .; }". Handle them. for (BaseCommand *Base : Sec->SectionCommands) if (auto *OutCmd = dyn_cast(Base)) addSymbol(OutCmd); // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign // is given, input sections are aligned to that value, whether the // given value is larger or smaller than the original section alignment. if (Sec->SubalignExpr) { uint32_t Subalign = Sec->SubalignExpr().getValue(); for (InputSectionBase *S : V) S->Alignment = Subalign; } // Add input sections to an output section. for (InputSection *S : V) Sec->addSection(S); Sec->SectionIndex = I++; if (Sec->Noload) Sec->Type = SHT_NOBITS; if (Sec->NonAlloc) Sec->Flags &= ~(uint64_t)SHF_ALLOC; } } Ctx = nullptr; } static OutputSection *findByName(ArrayRef Vec, StringRef Name) { for (BaseCommand *Base : Vec) if (auto *Sec = dyn_cast(Base)) if (Sec->Name == Name) return Sec; return nullptr; } static OutputSection *createSection(InputSectionBase *IS, StringRef OutsecName) { OutputSection *Sec = Script->createOutputSection(OutsecName, ""); Sec->addSection(cast(IS)); return Sec; } static OutputSection *addInputSec(StringMap &Map, InputSectionBase *IS, StringRef OutsecName) { // Sections with SHT_GROUP or SHF_GROUP attributes reach here only when the -r // option is given. A section with SHT_GROUP defines a "section group", and // its members have SHF_GROUP attribute. Usually these flags have already been // stripped by InputFiles.cpp as section groups are processed and uniquified. // However, for the -r option, we want to pass through all section groups // as-is because adding/removing members or merging them with other groups // change their semantics. if (IS->Type == SHT_GROUP || (IS->Flags & SHF_GROUP)) return createSection(IS, OutsecName); // Imagine .zed : { *(.foo) *(.bar) } script. Both foo and bar may have // relocation sections .rela.foo and .rela.bar for example. Most tools do // not allow multiple REL[A] sections for output section. Hence we // should combine these relocation sections into single output. // We skip synthetic sections because it can be .rela.dyn/.rela.plt or any // other REL[A] sections created by linker itself. if (!isa(IS) && (IS->Type == SHT_REL || IS->Type == SHT_RELA)) { auto *Sec = cast(IS); OutputSection *Out = Sec->getRelocatedSection()->getOutputSection(); if (Out->RelocationSection) { Out->RelocationSection->addSection(Sec); return nullptr; } Out->RelocationSection = createSection(IS, OutsecName); return Out->RelocationSection; } // When control reaches here, mergeable sections have already been merged into // synthetic sections. For relocatable case we want to create one output // section per syntetic section so that they have a valid sh_entsize. if (Config->Relocatable && (IS->Flags & SHF_MERGE)) return createSection(IS, OutsecName); // The ELF spec just says // ---------------------------------------------------------------- // In the first phase, input sections that match in name, type and // attribute flags should be concatenated into single sections. // ---------------------------------------------------------------- // // However, it is clear that at least some flags have to be ignored for // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be // ignored. We should not have two output .text sections just because one was // in a group and another was not for example. // // It also seems that wording was a late addition and didn't get the // necessary scrutiny. // // Merging sections with different flags is expected by some users. One // reason is that if one file has // // int *const bar __attribute__((section(".foo"))) = (int *)0; // // gcc with -fPIC will produce a read only .foo section. But if another // file has // // int zed; // int *const bar __attribute__((section(".foo"))) = (int *)&zed; // // gcc with -fPIC will produce a read write section. // // Last but not least, when using linker script the merge rules are forced by // the script. Unfortunately, linker scripts are name based. This means that // expressions like *(.foo*) can refer to multiple input sections with // different flags. We cannot put them in different output sections or we // would produce wrong results for // // start = .; *(.foo.*) end = .; *(.bar) // // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to // another. The problem is that there is no way to layout those output // sections such that the .foo sections are the only thing between the start // and end symbols. // // Given the above issues, we instead merge sections by name and error on // incompatible types and flags. OutputSection *&Sec = Map[OutsecName]; if (Sec) { Sec->addSection(cast(IS)); return nullptr; } Sec = createSection(IS, OutsecName); return Sec; } // Add sections that didn't match any sections command. void LinkerScript::addOrphanSections() { unsigned End = SectionCommands.size(); StringMap Map; std::vector V; auto Add = [&](InputSectionBase *S) { if (!S->Live || S->Parent) return; StringRef Name = getOutputSectionName(S); if (Config->OrphanHandling == OrphanHandlingPolicy::Error) error(toString(S) + " is being placed in '" + Name + "'"); else if (Config->OrphanHandling == OrphanHandlingPolicy::Warn) warn(toString(S) + " is being placed in '" + Name + "'"); if (OutputSection *Sec = findByName(makeArrayRef(SectionCommands).slice(0, End), Name)) { Sec->addSection(cast(S)); return; } if (OutputSection *OS = addInputSec(Map, S, Name)) V.push_back(OS); assert(S->getOutputSection()->SectionIndex == UINT32_MAX); }; // For futher --emit-reloc handling code we need target output section // to be created before we create relocation output section, so we want // to create target sections first. We do not want priority handling // for synthetic sections because them are special. for (InputSectionBase *IS : InputSections) { if (auto *Sec = dyn_cast(IS)) if (InputSectionBase *Rel = Sec->getRelocatedSection()) if (auto *RelIS = dyn_cast_or_null(Rel->Parent)) Add(RelIS); Add(IS); } // If no SECTIONS command was given, we should insert sections commands // before others, so that we can handle scripts which refers them, // for example: "foo = ABSOLUTE(ADDR(.text)));". // When SECTIONS command is present we just add all orphans to the end. if (HasSectionsCommand) SectionCommands.insert(SectionCommands.end(), V.begin(), V.end()); else SectionCommands.insert(SectionCommands.begin(), V.begin(), V.end()); } uint64_t LinkerScript::advance(uint64_t Size, unsigned Alignment) { bool IsTbss = (Ctx->OutSec->Flags & SHF_TLS) && Ctx->OutSec->Type == SHT_NOBITS; uint64_t Start = IsTbss ? Dot + Ctx->ThreadBssOffset : Dot; Start = alignTo(Start, Alignment); uint64_t End = Start + Size; if (IsTbss) Ctx->ThreadBssOffset = End - Dot; else Dot = End; return End; } void LinkerScript::output(InputSection *S) { uint64_t Before = advance(0, 1); uint64_t Pos = advance(S->getSize(), S->Alignment); S->OutSecOff = Pos - S->getSize() - Ctx->OutSec->Addr; // Update output section size after adding each section. This is so that // SIZEOF works correctly in the case below: // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } expandOutputSection(Pos - Before); } void LinkerScript::switchTo(OutputSection *Sec) { Ctx->OutSec = Sec; uint64_t Before = advance(0, 1); Ctx->OutSec->Addr = advance(0, Ctx->OutSec->Alignment); expandMemoryRegions(Ctx->OutSec->Addr - Before); } // This function searches for a memory region to place the given output // section in. If found, a pointer to the appropriate memory region is // returned. Otherwise, a nullptr is returned. MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *Sec) { // If a memory region name was specified in the output section command, // then try to find that region first. if (!Sec->MemoryRegionName.empty()) { if (MemoryRegion *M = MemoryRegions.lookup(Sec->MemoryRegionName)) return M; error("memory region '" + Sec->MemoryRegionName + "' not declared"); return nullptr; } // If at least one memory region is defined, all sections must // belong to some memory region. Otherwise, we don't need to do // anything for memory regions. if (MemoryRegions.empty()) return nullptr; // See if a region can be found by matching section flags. for (auto &Pair : MemoryRegions) { MemoryRegion *M = Pair.second; if ((M->Flags & Sec->Flags) && (M->NegFlags & Sec->Flags) == 0) return M; } // Otherwise, no suitable region was found. if (Sec->Flags & SHF_ALLOC) error("no memory region specified for section '" + Sec->Name + "'"); return nullptr; } +static OutputSection *findFirstSection(PhdrEntry *Load) { + for (OutputSection *Sec : OutputSections) + if (Sec->PtLoad == Load) + return Sec; + return nullptr; +} + // This function assigns offsets to input sections and an output section // for a single sections command (e.g. ".text { *(.text); }"). void LinkerScript::assignOffsets(OutputSection *Sec) { if (!(Sec->Flags & SHF_ALLOC)) Dot = 0; else if (Sec->AddrExpr) setDot(Sec->AddrExpr, Sec->Location, false); Ctx->MemRegion = Sec->MemRegion; Ctx->LMARegion = Sec->LMARegion; if (Ctx->MemRegion) Dot = Ctx->MemRegion->CurPos; switchTo(Sec); if (Sec->LMAExpr) Ctx->LMAOffset = Sec->LMAExpr().getValue() - Dot; if (MemoryRegion *MR = Sec->LMARegion) Ctx->LMAOffset = MR->CurPos - Dot; // If neither AT nor AT> is specified for an allocatable section, the linker // will set the LMA such that the difference between VMA and LMA for the // section is the same as the preceding output section in the same region // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html + // This, however, should only be done by the first "non-header" section + // in the segment. if (PhdrEntry *L = Ctx->OutSec->PtLoad) - L->LMAOffset = Ctx->LMAOffset; + if (Sec == findFirstSection(L)) + L->LMAOffset = Ctx->LMAOffset; // We can call this method multiple times during the creation of // thunks and want to start over calculation each time. Sec->Size = 0; // We visited SectionsCommands from processSectionCommands to // layout sections. Now, we visit SectionsCommands again to fix // section offsets. for (BaseCommand *Base : Sec->SectionCommands) { // This handles the assignments to symbol or to the dot. if (auto *Cmd = dyn_cast(Base)) { Cmd->Addr = Dot; assignSymbol(Cmd, true); Cmd->Size = Dot - Cmd->Addr; continue; } // Handle BYTE(), SHORT(), LONG(), or QUAD(). if (auto *Cmd = dyn_cast(Base)) { Cmd->Offset = Dot - Ctx->OutSec->Addr; Dot += Cmd->Size; expandOutputSection(Cmd->Size); continue; } // Handle a single input section description command. // It calculates and assigns the offsets for each section and also // updates the output section size. auto *Cmd = cast(Base); for (InputSection *Sec : Cmd->Sections) { // We tentatively added all synthetic sections at the beginning and // removed empty ones afterwards (because there is no way to know // whether they were going be empty or not other than actually running // linker scripts.) We need to ignore remains of empty sections. if (auto *S = dyn_cast(Sec)) if (S->empty()) continue; if (!Sec->Live) continue; assert(Ctx->OutSec == Sec->getParent()); output(Sec); } } } static bool isDiscardable(OutputSection &Sec) { // We do not remove empty sections that are explicitly // assigned to any segment. if (!Sec.Phdrs.empty()) return false; // We do not want to remove sections that reference symbols in address and // other expressions. We add script symbols as undefined, and want to ensure // all of them are defined in the output, hence have to keep them. if (Sec.ExpressionsUseSymbols) return false; for (BaseCommand *Base : Sec.SectionCommands) { if (auto Cmd = dyn_cast(Base)) // Don't create empty output sections just for unreferenced PROVIDE // symbols. if (Cmd->Name != "." && !Cmd->Sym) continue; if (!isa(*Base)) return false; } return true; } void LinkerScript::adjustSectionsBeforeSorting() { // If the output section contains only symbol assignments, create a // corresponding output section. The issue is what to do with linker script // like ".foo : { symbol = 42; }". One option would be to convert it to // "symbol = 42;". That is, move the symbol out of the empty section // description. That seems to be what bfd does for this simple case. The // problem is that this is not completely general. bfd will give up and // create a dummy section too if there is a ". = . + 1" inside the section // for example. // Given that we want to create the section, we have to worry what impact // it will have on the link. For example, if we just create a section with // 0 for flags, it would change which PT_LOADs are created. // We could remember that particular section is dummy and ignore it in // other parts of the linker, but unfortunately there are quite a few places // that would need to change: // * The program header creation. // * The orphan section placement. // * The address assignment. // The other option is to pick flags that minimize the impact the section // will have on the rest of the linker. That is why we copy the flags from // the previous sections. Only a few flags are needed to keep the impact low. uint64_t Flags = SHF_ALLOC; for (BaseCommand *&Cmd : SectionCommands) { auto *Sec = dyn_cast(Cmd); if (!Sec) continue; // Handle align (e.g. ".foo : ALIGN(16) { ... }"). if (Sec->AlignExpr) Sec->Alignment = std::max(Sec->Alignment, Sec->AlignExpr().getValue()); // A live output section means that some input section was added to it. It // might have been removed (if it was empty synthetic section), but we at // least know the flags. if (Sec->Live) Flags = Sec->Flags; // We do not want to keep any special flags for output section // in case it is empty. bool IsEmpty = getInputSections(Sec).empty(); if (IsEmpty) Sec->Flags = Flags & (SHF_ALLOC | SHF_WRITE | SHF_EXECINSTR); if (IsEmpty && isDiscardable(*Sec)) { Sec->Live = false; Cmd = nullptr; } } // It is common practice to use very generic linker scripts. So for any // given run some of the output sections in the script will be empty. // We could create corresponding empty output sections, but that would // clutter the output. // We instead remove trivially empty sections. The bfd linker seems even // more aggressive at removing them. llvm::erase_if(SectionCommands, [&](BaseCommand *Base) { return !Base; }); } void LinkerScript::adjustSectionsAfterSorting() { // Try and find an appropriate memory region to assign offsets in. for (BaseCommand *Base : SectionCommands) { if (auto *Sec = dyn_cast(Base)) { if (!Sec->LMARegionName.empty()) { if (MemoryRegion *M = MemoryRegions.lookup(Sec->LMARegionName)) Sec->LMARegion = M; else error("memory region '" + Sec->LMARegionName + "' not declared"); } Sec->MemRegion = findMemoryRegion(Sec); } } // If output section command doesn't specify any segments, // and we haven't previously assigned any section to segment, // then we simply assign section to the very first load segment. // Below is an example of such linker script: // PHDRS { seg PT_LOAD; } // SECTIONS { .aaa : { *(.aaa) } } std::vector DefPhdrs; auto FirstPtLoad = llvm::find_if(PhdrsCommands, [](const PhdrsCommand &Cmd) { return Cmd.Type == PT_LOAD; }); if (FirstPtLoad != PhdrsCommands.end()) DefPhdrs.push_back(FirstPtLoad->Name); // Walk the commands and propagate the program headers to commands that don't // explicitly specify them. for (BaseCommand *Base : SectionCommands) { auto *Sec = dyn_cast(Base); if (!Sec) continue; if (Sec->Phdrs.empty()) { // To match the bfd linker script behaviour, only propagate program // headers to sections that are allocated. if (Sec->Flags & SHF_ALLOC) Sec->Phdrs = DefPhdrs; } else { DefPhdrs = Sec->Phdrs; } } -} - -static OutputSection *findFirstSection(PhdrEntry *Load) { - for (OutputSection *Sec : OutputSections) - if (Sec->PtLoad == Load) - return Sec; - return nullptr; } static uint64_t computeBase(uint64_t Min, bool AllocateHeaders) { // If there is no SECTIONS or if the linkerscript is explicit about program // headers, do our best to allocate them. if (!Script->HasSectionsCommand || AllocateHeaders) return 0; // Otherwise only allocate program headers if that would not add a page. return alignDown(Min, Config->MaxPageSize); } // Try to find an address for the file and program headers output sections, // which were unconditionally added to the first PT_LOAD segment earlier. // // When using the default layout, we check if the headers fit below the first // allocated section. When using a linker script, we also check if the headers // are covered by the output section. This allows omitting the headers by not // leaving enough space for them in the linker script; this pattern is common // in embedded systems. // // If there isn't enough space for these sections, we'll remove them from the // PT_LOAD segment, and we'll also remove the PT_PHDR segment. void LinkerScript::allocateHeaders(std::vector &Phdrs) { uint64_t Min = std::numeric_limits::max(); for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) Min = std::min(Min, Sec->Addr); auto It = llvm::find_if( Phdrs, [](const PhdrEntry *E) { return E->p_type == PT_LOAD; }); if (It == Phdrs.end()) return; PhdrEntry *FirstPTLoad = *It; bool HasExplicitHeaders = llvm::any_of(PhdrsCommands, [](const PhdrsCommand &Cmd) { return Cmd.HasPhdrs || Cmd.HasFilehdr; }); uint64_t HeaderSize = getHeaderSize(); if (HeaderSize <= Min - computeBase(Min, HasExplicitHeaders)) { Min = alignDown(Min - HeaderSize, Config->MaxPageSize); Out::ElfHeader->Addr = Min; Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size; return; } // Error if we were explicitly asked to allocate headers. if (HasExplicitHeaders) error("could not allocate headers"); Out::ElfHeader->PtLoad = nullptr; Out::ProgramHeaders->PtLoad = nullptr; FirstPTLoad->FirstSec = findFirstSection(FirstPTLoad); llvm::erase_if(Phdrs, [](const PhdrEntry *E) { return E->p_type == PT_PHDR; }); } LinkerScript::AddressState::AddressState() { for (auto &MRI : Script->MemoryRegions) { MemoryRegion *MR = MRI.second; MR->CurPos = MR->Origin; } } static uint64_t getInitialDot() { // By default linker scripts use an initial value of 0 for '.', // but prefer -image-base if set. if (Script->HasSectionsCommand) return Config->ImageBase ? *Config->ImageBase : 0; uint64_t StartAddr = UINT64_MAX; // The Sections with -T
have been sorted in order of ascending // address. We must lower StartAddr if the lowest -T
as // calls to setDot() must be monotonically increasing. for (auto &KV : Config->SectionStartMap) StartAddr = std::min(StartAddr, KV.second); return std::min(StartAddr, Target->getImageBase() + elf::getHeaderSize()); } // Here we assign addresses as instructed by linker script SECTIONS // sub-commands. Doing that allows us to use final VA values, so here // we also handle rest commands like symbol assignments and ASSERTs. void LinkerScript::assignAddresses() { Dot = getInitialDot(); auto Deleter = make_unique(); Ctx = Deleter.get(); ErrorOnMissingSection = true; switchTo(Aether); for (BaseCommand *Base : SectionCommands) { if (auto *Cmd = dyn_cast(Base)) { Cmd->Addr = Dot; assignSymbol(Cmd, false); Cmd->Size = Dot - Cmd->Addr; continue; } assignOffsets(cast(Base)); } Ctx = nullptr; } // Creates program headers as instructed by PHDRS linker script command. std::vector LinkerScript::createPhdrs() { std::vector Ret; // Process PHDRS and FILEHDR keywords because they are not // real output sections and cannot be added in the following loop. for (const PhdrsCommand &Cmd : PhdrsCommands) { PhdrEntry *Phdr = make(Cmd.Type, Cmd.Flags ? *Cmd.Flags : PF_R); if (Cmd.HasFilehdr) Phdr->add(Out::ElfHeader); if (Cmd.HasPhdrs) Phdr->add(Out::ProgramHeaders); if (Cmd.LMAExpr) { Phdr->p_paddr = Cmd.LMAExpr().getValue(); Phdr->HasLMA = true; } Ret.push_back(Phdr); } // Add output sections to program headers. for (OutputSection *Sec : OutputSections) { // Assign headers specified by linker script for (size_t Id : getPhdrIndices(Sec)) { Ret[Id]->add(Sec); if (!PhdrsCommands[Id].Flags.hasValue()) Ret[Id]->p_flags |= Sec->getPhdrFlags(); } } return Ret; } // Returns true if we should emit an .interp section. // // We usually do. But if PHDRS commands are given, and // no PT_INTERP is there, there's no place to emit an // .interp, so we don't do that in that case. bool LinkerScript::needsInterpSection() { if (PhdrsCommands.empty()) return true; for (PhdrsCommand &Cmd : PhdrsCommands) if (Cmd.Type == PT_INTERP) return true; return false; } ExprValue LinkerScript::getSymbolValue(StringRef Name, const Twine &Loc) { if (Name == ".") { if (Ctx) return {Ctx->OutSec, false, Dot - Ctx->OutSec->Addr, Loc}; error(Loc + ": unable to get location counter value"); return 0; } if (Symbol *Sym = Symtab->find(Name)) { if (auto *DS = dyn_cast(Sym)) return {DS->Section, false, DS->Value, Loc}; if (isa(Sym)) if (!ErrorOnMissingSection) return {nullptr, false, 0, Loc}; } error(Loc + ": symbol not found: " + Name); return 0; } // Returns the index of the segment named Name. static Optional getPhdrIndex(ArrayRef Vec, StringRef Name) { for (size_t I = 0; I < Vec.size(); ++I) if (Vec[I].Name == Name) return I; return None; } // Returns indices of ELF headers containing specific section. Each index is a // zero based number of ELF header listed within PHDRS {} script block. std::vector LinkerScript::getPhdrIndices(OutputSection *Cmd) { std::vector Ret; for (StringRef S : Cmd->Phdrs) { if (Optional Idx = getPhdrIndex(PhdrsCommands, S)) Ret.push_back(*Idx); else if (S != "NONE") error(Cmd->Location + ": section header '" + S + "' is not listed in PHDRS"); } return Ret; } Index: projects/clang700-import/contrib/llvm/tools/lld/ELF/Writer.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/lld/ELF/Writer.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lld/ELF/Writer.cpp (revision 337645) @@ -1,2404 +1,2406 @@ //===- Writer.cpp ---------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Writer.h" #include "AArch64ErrataFix.h" #include "CallGraphSort.h" #include "Config.h" #include "Filesystem.h" #include "LinkerScript.h" #include "MapFile.h" #include "OutputSections.h" #include "Relocations.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; namespace { // The writer writes a SymbolTable result to a file. template class Writer { public: Writer() : Buffer(errorHandler().OutputBuffer) {} typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Ehdr Elf_Ehdr; typedef typename ELFT::Phdr Elf_Phdr; void run(); private: void copyLocalSymbols(); void addSectionSymbols(); void forEachRelSec(llvm::function_ref Fn); void sortSections(); void resolveShfLinkOrder(); void sortInputSections(); void finalizeSections(); void setReservedSymbolSections(); std::vector createPhdrs(); void removeEmptyPTLoad(); void addPtArmExid(std::vector &Phdrs); void assignFileOffsets(); void assignFileOffsetsBinary(); void setPhdrs(); void checkSections(); void fixSectionAlignments(); void openFile(); void writeTrapInstr(); void writeHeader(); void writeSections(); void writeSectionsBinary(); void writeBuildId(); std::unique_ptr &Buffer; void addRelIpltSymbols(); void addStartEndSymbols(); void addStartStopSymbols(OutputSection *Sec); uint64_t getEntryAddr(); std::vector Phdrs; uint64_t FileSize; uint64_t SectionHeaderOff; }; } // anonymous namespace static bool isSectionPrefix(StringRef Prefix, StringRef Name) { return Name.startswith(Prefix) || Name == Prefix.drop_back(); } StringRef elf::getOutputSectionName(const InputSectionBase *S) { if (Config->Relocatable) return S->Name; // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want // to emit .rela.text.foo as .rela.text.bar for consistency (this is not // technically required, but not doing it is odd). This code guarantees that. if (auto *IS = dyn_cast(S)) { if (InputSectionBase *Rel = IS->getRelocatedSection()) { OutputSection *Out = Rel->getOutputSection(); if (S->Type == SHT_RELA) return Saver.save(".rela" + Out->Name); return Saver.save(".rel" + Out->Name); } } // This check is for -z keep-text-section-prefix. This option separates text // sections with prefix ".text.hot", ".text.unlikely", ".text.startup" or // ".text.exit". // When enabled, this allows identifying the hot code region (.text.hot) in // the final binary which can be selectively mapped to huge pages or mlocked, // for instance. if (Config->ZKeepTextSectionPrefix) for (StringRef V : {".text.hot.", ".text.unlikely.", ".text.startup.", ".text.exit."}) { if (isSectionPrefix(V, S->Name)) return V.drop_back(); } for (StringRef V : {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) { if (isSectionPrefix(V, S->Name)) return V.drop_back(); } // CommonSection is identified as "COMMON" in linker scripts. // By default, it should go to .bss section. if (S->Name == "COMMON") return ".bss"; return S->Name; } static bool needsInterpSection() { return !SharedFiles.empty() && !Config->DynamicLinker.empty() && Script->needsInterpSection(); } template void elf::writeResult() { Writer().run(); } template void Writer::removeEmptyPTLoad() { llvm::erase_if(Phdrs, [&](const PhdrEntry *P) { if (P->p_type != PT_LOAD) return false; if (!P->FirstSec) return true; uint64_t Size = P->LastSec->Addr + P->LastSec->Size - P->FirstSec->Addr; return Size == 0; }); } template static void combineEhFrameSections() { for (InputSectionBase *&S : InputSections) { EhInputSection *ES = dyn_cast(S); if (!ES || !ES->Live) continue; InX::EhFrame->addSection(ES); S = nullptr; } std::vector &V = InputSections; V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); } static Defined *addOptionalRegular(StringRef Name, SectionBase *Sec, uint64_t Val, uint8_t StOther = STV_HIDDEN, uint8_t Binding = STB_GLOBAL) { Symbol *S = Symtab->find(Name); if (!S || S->isDefined()) return nullptr; Symbol *Sym = Symtab->addRegular(Name, StOther, STT_NOTYPE, Val, /*Size=*/0, Binding, Sec, /*File=*/nullptr); return cast(Sym); } // The linker is expected to define some symbols depending on // the linking result. This function defines such symbols. void elf::addReservedSymbols() { if (Config->EMachine == EM_MIPS) { // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer // so that it points to an absolute address which by default is relative // to GOT. Default offset is 0x7ff0. // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf ElfSym::MipsGp = Symtab->addAbsolute("_gp", STV_HIDDEN, STB_GLOBAL); // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between // start of function and 'gp' pointer into GOT. if (Symtab->find("_gp_disp")) ElfSym::MipsGpDisp = Symtab->addAbsolute("_gp_disp", STV_HIDDEN, STB_GLOBAL); // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' // pointer. This symbol is used in the code generated by .cpload pseudo-op // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html if (Symtab->find("__gnu_local_gp")) ElfSym::MipsLocalGp = Symtab->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_GLOBAL); } // The Power Architecture 64-bit v2 ABI defines a TableOfContents (TOC) which // combines the typical ELF GOT with the small data sections. It commonly // includes .got .toc .sdata .sbss. The .TOC. symbol replaces both // _GLOBAL_OFFSET_TABLE_ and _SDA_BASE_ from the 32-bit ABI. It is used to // represent the TOC base which is offset by 0x8000 bytes from the start of // the .got section. ElfSym::GlobalOffsetTable = addOptionalRegular( (Config->EMachine == EM_PPC64) ? ".TOC." : "_GLOBAL_OFFSET_TABLE_", Out::ElfHeader, Target->GotBaseSymOff); // __ehdr_start is the location of ELF file headers. Note that we define // this symbol unconditionally even when using a linker script, which // differs from the behavior implemented by GNU linker which only define // this symbol if ELF headers are in the memory mapped segment. addOptionalRegular("__ehdr_start", Out::ElfHeader, 0, STV_HIDDEN); // __executable_start is not documented, but the expectation of at // least the Android libc is that it points to the ELF header. addOptionalRegular("__executable_start", Out::ElfHeader, 0, STV_HIDDEN); // __dso_handle symbol is passed to cxa_finalize as a marker to identify // each DSO. The address of the symbol doesn't matter as long as they are // different in different DSOs, so we chose the start address of the DSO. addOptionalRegular("__dso_handle", Out::ElfHeader, 0, STV_HIDDEN); // If linker script do layout we do not need to create any standart symbols. if (Script->HasSectionsCommand) return; auto Add = [](StringRef S, int64_t Pos) { return addOptionalRegular(S, Out::ElfHeader, Pos, STV_DEFAULT); }; ElfSym::Bss = Add("__bss_start", 0); ElfSym::End1 = Add("end", -1); ElfSym::End2 = Add("_end", -1); ElfSym::Etext1 = Add("etext", -1); ElfSym::Etext2 = Add("_etext", -1); ElfSym::Edata1 = Add("edata", -1); ElfSym::Edata2 = Add("_edata", -1); } static OutputSection *findSection(StringRef Name) { for (BaseCommand *Base : Script->SectionCommands) if (auto *Sec = dyn_cast(Base)) if (Sec->Name == Name) return Sec; return nullptr; } // Initialize Out members. template static void createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. memset(&Out::First, 0, sizeof(Out)); auto Add = [](InputSectionBase *Sec) { InputSections.push_back(Sec); }; InX::DynStrTab = make(".dynstr", true); InX::Dynamic = make>(); if (Config->AndroidPackDynRelocs) { InX::RelaDyn = make>( Config->IsRela ? ".rela.dyn" : ".rel.dyn"); } else { InX::RelaDyn = make>( Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); } InX::ShStrTab = make(".shstrtab", false); Out::ProgramHeaders = make("", 0, SHF_ALLOC); Out::ProgramHeaders->Alignment = Config->Wordsize; if (needsInterpSection()) { InX::Interp = createInterpSection(); Add(InX::Interp); } else { InX::Interp = nullptr; } if (Config->Strip != StripPolicy::All) { InX::StrTab = make(".strtab", false); InX::SymTab = make>(*InX::StrTab); InX::SymTabShndx = make(); } if (Config->BuildId != BuildIdKind::None) { InX::BuildId = make(); Add(InX::BuildId); } InX::Bss = make(".bss", 0, 1); Add(InX::Bss); // If there is a SECTIONS command and a .data.rel.ro section name use name // .data.rel.ro.bss so that we match in the .data.rel.ro output section. // This makes sure our relro is contiguous. bool HasDataRelRo = Script->HasSectionsCommand && findSection(".data.rel.ro"); InX::BssRelRo = make(HasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); Add(InX::BssRelRo); // Add MIPS-specific sections. if (Config->EMachine == EM_MIPS) { if (!Config->Shared && Config->HasDynSymTab) { InX::MipsRldMap = make(); Add(InX::MipsRldMap); } if (auto *Sec = MipsAbiFlagsSection::create()) Add(Sec); if (auto *Sec = MipsOptionsSection::create()) Add(Sec); if (auto *Sec = MipsReginfoSection::create()) Add(Sec); } if (Config->HasDynSymTab) { InX::DynSymTab = make>(*InX::DynStrTab); Add(InX::DynSymTab); In::VerSym = make>(); Add(In::VerSym); if (!Config->VersionDefinitions.empty()) { In::VerDef = make>(); Add(In::VerDef); } In::VerNeed = make>(); Add(In::VerNeed); if (Config->GnuHash) { InX::GnuHashTab = make(); Add(InX::GnuHashTab); } if (Config->SysvHash) { InX::HashTab = make(); Add(InX::HashTab); } Add(InX::Dynamic); Add(InX::DynStrTab); Add(InX::RelaDyn); } if (Config->RelrPackDynRelocs) { InX::RelrDyn = make>(); Add(InX::RelrDyn); } // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (Config->EMachine == EM_MIPS) { InX::MipsGot = make(); Add(InX::MipsGot); } else { InX::Got = make(); Add(InX::Got); } InX::GotPlt = make(); Add(InX::GotPlt); InX::IgotPlt = make(); Add(InX::IgotPlt); if (Config->GdbIndex) { InX::GdbIndex = GdbIndexSection::create(); Add(InX::GdbIndex); } // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. InX::RelaPlt = make>( Config->IsRela ? ".rela.plt" : ".rel.plt", false /*Sort*/); Add(InX::RelaPlt); // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure // that the IRelative relocations are processed last by the dynamic loader. // We cannot place the iplt section in .rel.dyn when Android relocation // packing is enabled because that would cause a section type mismatch. // However, because the Android dynamic loader reads .rel.plt after .rel.dyn, // we can get the desired behaviour by placing the iplt section in .rel.plt. InX::RelaIplt = make>( (Config->EMachine == EM_ARM && !Config->AndroidPackDynRelocs) ? ".rel.dyn" : InX::RelaPlt->Name, false /*Sort*/); Add(InX::RelaIplt); InX::Plt = make(false); Add(InX::Plt); InX::Iplt = make(true); Add(InX::Iplt); if (!Config->Relocatable) { if (Config->EhFrameHdr) { InX::EhFrameHdr = make(); Add(InX::EhFrameHdr); } InX::EhFrame = make(); Add(InX::EhFrame); } if (InX::SymTab) Add(InX::SymTab); if (InX::SymTabShndx) Add(InX::SymTabShndx); Add(InX::ShStrTab); if (InX::StrTab) Add(InX::StrTab); if (Config->EMachine == EM_ARM && !Config->Relocatable) // Add a sentinel to terminate .ARM.exidx. It helps an unwinder // to find the exact address range of the last entry. Add(make()); } // The main function of the writer. template void Writer::run() { // Create linker-synthesized sections such as .got or .plt. // Such sections are of type input section. createSyntheticSections(); if (!Config->Relocatable) combineEhFrameSections(); // We want to process linker script commands. When SECTIONS command // is given we let it create sections. Script->processSectionCommands(); // Linker scripts controls how input sections are assigned to output sections. // Input sections that were not handled by scripts are called "orphans", and // they are assigned to output sections by the default rule. Process that. Script->addOrphanSections(); if (Config->Discard != DiscardPolicy::All) copyLocalSymbols(); if (Config->CopyRelocs) addSectionSymbols(); // Now that we have a complete set of output sections. This function // completes section contents. For example, we need to add strings // to the string table, and add entries to .got and .plt. // finalizeSections does that. finalizeSections(); if (errorCount()) return; Script->assignAddresses(); // If -compressed-debug-sections is specified, we need to compress // .debug_* sections. Do it right now because it changes the size of // output sections. for (OutputSection *Sec : OutputSections) Sec->maybeCompress(); Script->allocateHeaders(Phdrs); // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a // 0 sized region. This has to be done late since only after assignAddresses // we know the size of the sections. removeEmptyPTLoad(); if (!Config->OFormatBinary) assignFileOffsets(); else assignFileOffsetsBinary(); setPhdrs(); if (Config->Relocatable) { for (OutputSection *Sec : OutputSections) Sec->Addr = 0; } if (Config->CheckSections) checkSections(); // It does not make sense try to open the file if we have error already. if (errorCount()) return; // Write the result down to a file. openFile(); if (errorCount()) return; if (!Config->OFormatBinary) { writeTrapInstr(); writeHeader(); writeSections(); } else { writeSectionsBinary(); } // Backfill .note.gnu.build-id section content. This is done at last // because the content is usually a hash value of the entire output file. writeBuildId(); if (errorCount()) return; // Handle -Map and -cref options. writeMapFile(); writeCrossReferenceTable(); if (errorCount()) return; if (auto E = Buffer->commit()) error("failed to write to the output file: " + toString(std::move(E))); } static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName, const Symbol &B) { if (B.isSection()) return false; if (Config->Discard == DiscardPolicy::None) return true; // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if // * --discard-locals is used. // * The symbol is in a SHF_MERGE section, which is normally the reason for // the assembler keeping the .L symbol. if (!SymName.startswith(".L") && !SymName.empty()) return true; if (Config->Discard == DiscardPolicy::Locals) return false; return !Sec || !(Sec->Flags & SHF_MERGE); } static bool includeInSymtab(const Symbol &B) { if (!B.isLocal() && !B.IsUsedInRegularObj) return false; if (auto *D = dyn_cast(&B)) { // Always include absolute symbols. SectionBase *Sec = D->Section; if (!Sec) return true; Sec = Sec->Repl; // Exclude symbols pointing to garbage-collected sections. if (isa(Sec) && !Sec->Live) return false; if (auto *S = dyn_cast(Sec)) if (!S->getSectionPiece(D->Value)->Live) return false; return true; } return B.Used; } // Local symbols are not in the linker's symbol table. This function scans // each object file's symbol table to copy local symbols to the output. template void Writer::copyLocalSymbols() { if (!InX::SymTab) return; for (InputFile *File : ObjectFiles) { ObjFile *F = cast>(File); for (Symbol *B : F->getLocalSymbols()) { if (!B->isLocal()) fatal(toString(F) + ": broken object: getLocalSymbols returns a non-local symbol"); auto *DR = dyn_cast(B); // No reason to keep local undefined symbol in symtab. if (!DR) continue; if (!includeInSymtab(*B)) continue; SectionBase *Sec = DR->Section; if (!shouldKeepInSymtab(Sec, B->getName(), *B)) continue; InX::SymTab->addSymbol(B); } } } template void Writer::addSectionSymbols() { // Create a section symbol for each output section so that we can represent // relocations that point to the section. If we know that no relocation is // referring to a section (that happens if the section is a synthetic one), we // don't create a section symbol for that section. for (BaseCommand *Base : Script->SectionCommands) { auto *Sec = dyn_cast(Base); if (!Sec) continue; auto I = llvm::find_if(Sec->SectionCommands, [](BaseCommand *Base) { if (auto *ISD = dyn_cast(Base)) return !ISD->Sections.empty(); return false; }); if (I == Sec->SectionCommands.end()) continue; InputSection *IS = cast(*I)->Sections[0]; // Relocations are not using REL[A] section symbols. if (IS->Type == SHT_REL || IS->Type == SHT_RELA) continue; // Unlike other synthetic sections, mergeable output sections contain data // copied from input sections, and there may be a relocation pointing to its // contents if -r or -emit-reloc are given. if (isa(IS) && !(IS->Flags & SHF_MERGE)) continue; auto *Sym = make(IS->File, "", STB_LOCAL, /*StOther=*/0, STT_SECTION, /*Value=*/0, /*Size=*/0, IS); InX::SymTab->addSymbol(Sym); } } // Today's loaders have a feature to make segments read-only after // processing dynamic relocations to enhance security. PT_GNU_RELRO // is defined for that. // // This function returns true if a section needs to be put into a // PT_GNU_RELRO segment. static bool isRelroSection(const OutputSection *Sec) { if (!Config->ZRelro) return false; uint64_t Flags = Sec->Flags; // Non-allocatable or non-writable sections don't need RELRO because // they are not writable or not even mapped to memory in the first place. // RELRO is for sections that are essentially read-only but need to // be writable only at process startup to allow dynamic linker to // apply relocations. if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) return false; // Once initialized, TLS data segments are used as data templates // for a thread-local storage. For each new thread, runtime // allocates memory for a TLS and copy templates there. No thread // are supposed to use templates directly. Thus, it can be in RELRO. if (Flags & SHF_TLS) return true; // .init_array, .preinit_array and .fini_array contain pointers to // functions that are executed on process startup or exit. These // pointers are set by the static linker, and they are not expected // to change at runtime. But if you are an attacker, you could do // interesting things by manipulating pointers in .fini_array, for // example. So they are put into RELRO. uint32_t Type = Sec->Type; if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || Type == SHT_PREINIT_ARRAY) return true; // .got contains pointers to external symbols. They are resolved by // the dynamic linker when a module is loaded into memory, and after // that they are not expected to change. So, it can be in RELRO. if (InX::Got && Sec == InX::Got->getParent()) return true; if (Sec->Name.equals(".toc")) return true; // .got.plt contains pointers to external function symbols. They are // by default resolved lazily, so we usually cannot put it into RELRO. // However, if "-z now" is given, the lazy symbol resolution is // disabled, which enables us to put it into RELRO. if (Sec == InX::GotPlt->getParent()) return Config->ZNow; // .dynamic section contains data for the dynamic linker, and // there's no need to write to it at runtime, so it's better to put // it into RELRO. if (Sec == InX::Dynamic->getParent()) return true; // Sections with some special names are put into RELRO. This is a // bit unfortunate because section names shouldn't be significant in // ELF in spirit. But in reality many linker features depend on // magic section names. StringRef S = Sec->Name; return S == ".data.rel.ro" || S == ".bss.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || S == ".eh_frame" || S == ".openbsd.randomdata"; } // We compute a rank for each section. The rank indicates where the // section should be placed in the file. Instead of using simple // numbers (0,1,2...), we use a series of flags. One for each decision // point when placing the section. // Using flags has two key properties: // * It is easy to check if a give branch was taken. // * It is easy two see how similar two ranks are (see getRankProximity). enum RankFlags { RF_NOT_ADDR_SET = 1 << 18, RF_NOT_INTERP = 1 << 17, RF_NOT_ALLOC = 1 << 16, RF_WRITE = 1 << 15, RF_EXEC_WRITE = 1 << 14, RF_EXEC = 1 << 13, RF_RODATA = 1 << 12, RF_NON_TLS_BSS = 1 << 11, RF_NON_TLS_BSS_RO = 1 << 10, RF_NOT_TLS = 1 << 9, RF_BSS = 1 << 8, RF_NOTE = 1 << 7, RF_PPC_NOT_TOCBSS = 1 << 6, RF_PPC_TOCL = 1 << 5, RF_PPC_TOC = 1 << 4, RF_PPC_GOT = 1 << 3, RF_PPC_BRANCH_LT = 1 << 2, RF_MIPS_GPREL = 1 << 1, RF_MIPS_NOT_GOT = 1 << 0 }; static unsigned getSectionRank(const OutputSection *Sec) { unsigned Rank = 0; // We want to put section specified by -T option first, so we // can start assigning VA starting from them later. if (Config->SectionStartMap.count(Sec->Name)) return Rank; Rank |= RF_NOT_ADDR_SET; // Put .interp first because some loaders want to see that section // on the first page of the executable file when loaded into memory. if (Sec->Name == ".interp") return Rank; Rank |= RF_NOT_INTERP; // Allocatable sections go first to reduce the total PT_LOAD size and // so debug info doesn't change addresses in actual code. if (!(Sec->Flags & SHF_ALLOC)) return Rank | RF_NOT_ALLOC; // Sort sections based on their access permission in the following // order: R, RX, RWX, RW. This order is based on the following // considerations: // * Read-only sections come first such that they go in the // PT_LOAD covering the program headers at the start of the file. // * Read-only, executable sections come next. // * Writable, executable sections follow such that .plt on // architectures where it needs to be writable will be placed // between .text and .data. // * Writable sections come last, such that .bss lands at the very // end of the last PT_LOAD. bool IsExec = Sec->Flags & SHF_EXECINSTR; bool IsWrite = Sec->Flags & SHF_WRITE; if (IsExec) { if (IsWrite) Rank |= RF_EXEC_WRITE; else Rank |= RF_EXEC; } else if (IsWrite) { Rank |= RF_WRITE; } else if (Sec->Type == SHT_PROGBITS) { // Make non-executable and non-writable PROGBITS sections (e.g .rodata // .eh_frame) closer to .text. They likely contain PC or GOT relative // relocations and there could be relocation overflow if other huge sections // (.dynstr .dynsym) were placed in between. Rank |= RF_RODATA; } // If we got here we know that both A and B are in the same PT_LOAD. bool IsTls = Sec->Flags & SHF_TLS; bool IsNoBits = Sec->Type == SHT_NOBITS; // The first requirement we have is to put (non-TLS) nobits sections last. The // reason is that the only thing the dynamic linker will see about them is a // p_memsz that is larger than p_filesz. Seeing that it zeros the end of the // PT_LOAD, so that has to correspond to the nobits sections. bool IsNonTlsNoBits = IsNoBits && !IsTls; if (IsNonTlsNoBits) Rank |= RF_NON_TLS_BSS; // We place nobits RelRo sections before plain r/w ones, and non-nobits RelRo // sections after r/w ones, so that the RelRo sections are contiguous. bool IsRelRo = isRelroSection(Sec); if (IsNonTlsNoBits && !IsRelRo) Rank |= RF_NON_TLS_BSS_RO; if (!IsNonTlsNoBits && IsRelRo) Rank |= RF_NON_TLS_BSS_RO; // The TLS initialization block needs to be a single contiguous block in a R/W // PT_LOAD, so stick TLS sections directly before the other RelRo R/W // sections. The TLS NOBITS sections are placed here as they don't take up // virtual address space in the PT_LOAD. if (!IsTls) Rank |= RF_NOT_TLS; // Within the TLS initialization block, the non-nobits sections need to appear // first. if (IsNoBits) Rank |= RF_BSS; // We create a NOTE segment for contiguous .note sections, so make // them contigous if there are more than one .note section with the // same attributes. if (Sec->Type == SHT_NOTE) Rank |= RF_NOTE; // Some architectures have additional ordering restrictions for sections // within the same PT_LOAD. if (Config->EMachine == EM_PPC64) { // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections // that we would like to make sure appear is a specific order to maximize // their coverage by a single signed 16-bit offset from the TOC base // pointer. Conversely, the special .tocbss section should be first among // all SHT_NOBITS sections. This will put it next to the loaded special // PPC64 sections (and, thus, within reach of the TOC base pointer). StringRef Name = Sec->Name; if (Name != ".tocbss") Rank |= RF_PPC_NOT_TOCBSS; if (Name == ".toc1") Rank |= RF_PPC_TOCL; if (Name == ".toc") Rank |= RF_PPC_TOC; if (Name == ".got") Rank |= RF_PPC_GOT; if (Name == ".branch_lt") Rank |= RF_PPC_BRANCH_LT; } if (Config->EMachine == EM_MIPS) { // All sections with SHF_MIPS_GPREL flag should be grouped together // because data in these sections is addressable with a gp relative address. if (Sec->Flags & SHF_MIPS_GPREL) Rank |= RF_MIPS_GPREL; if (Sec->Name != ".got") Rank |= RF_MIPS_NOT_GOT; } return Rank; } static bool compareSections(const BaseCommand *ACmd, const BaseCommand *BCmd) { const OutputSection *A = cast(ACmd); const OutputSection *B = cast(BCmd); if (A->SortRank != B->SortRank) return A->SortRank < B->SortRank; if (!(A->SortRank & RF_NOT_ADDR_SET)) return Config->SectionStartMap.lookup(A->Name) < Config->SectionStartMap.lookup(B->Name); return false; } void PhdrEntry::add(OutputSection *Sec) { LastSec = Sec; if (!FirstSec) FirstSec = Sec; p_align = std::max(p_align, Sec->Alignment); if (p_type == PT_LOAD) Sec->PtLoad = this; } // The beginning and the ending of .rel[a].plt section are marked // with __rel[a]_iplt_{start,end} symbols if it is a statically linked // executable. The runtime needs these symbols in order to resolve // all IRELATIVE relocs on startup. For dynamic executables, we don't // need these symbols, since IRELATIVE relocs are resolved through GOT // and PLT. For details, see http://www.airs.com/blog/archives/403. template void Writer::addRelIpltSymbols() { if (needsInterpSection()) return; StringRef S = Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start"; addOptionalRegular(S, InX::RelaIplt, 0, STV_HIDDEN, STB_WEAK); S = Config->IsRela ? "__rela_iplt_end" : "__rel_iplt_end"; ElfSym::RelaIpltEnd = addOptionalRegular(S, InX::RelaIplt, 0, STV_HIDDEN, STB_WEAK); } template void Writer::forEachRelSec( llvm::function_ref Fn) { // Scan all relocations. Each relocation goes through a series // of tests to determine if it needs special treatment, such as // creating GOT, PLT, copy relocations, etc. // Note that relocations for non-alloc sections are directly // processed by InputSection::relocateNonAlloc. for (InputSectionBase *IS : InputSections) if (IS->Live && isa(IS) && (IS->Flags & SHF_ALLOC)) Fn(*IS); for (EhInputSection *ES : InX::EhFrame->Sections) Fn(*ES); } // This function generates assignments for predefined symbols (e.g. _end or // _etext) and inserts them into the commands sequence to be processed at the // appropriate time. This ensures that the value is going to be correct by the // time any references to these symbols are processed and is equivalent to // defining these symbols explicitly in the linker script. template void Writer::setReservedSymbolSections() { if (ElfSym::GlobalOffsetTable) { // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually // to the start of the .got or .got.plt section. InputSection *GotSection = InX::GotPlt; if (!Target->GotBaseSymInGotPlt) GotSection = InX::MipsGot ? cast(InX::MipsGot) : cast(InX::Got); ElfSym::GlobalOffsetTable->Section = GotSection; } if (ElfSym::RelaIpltEnd) ElfSym::RelaIpltEnd->Value = InX::RelaIplt->getSize(); PhdrEntry *Last = nullptr; PhdrEntry *LastRO = nullptr; for (PhdrEntry *P : Phdrs) { if (P->p_type != PT_LOAD) continue; Last = P; if (!(P->p_flags & PF_W)) LastRO = P; } if (LastRO) { // _etext is the first location after the last read-only loadable segment. if (ElfSym::Etext1) ElfSym::Etext1->Section = LastRO->LastSec; if (ElfSym::Etext2) ElfSym::Etext2->Section = LastRO->LastSec; } if (Last) { // _edata points to the end of the last mapped initialized section. OutputSection *Edata = nullptr; for (OutputSection *OS : OutputSections) { if (OS->Type != SHT_NOBITS) Edata = OS; if (OS == Last->LastSec) break; } if (ElfSym::Edata1) ElfSym::Edata1->Section = Edata; if (ElfSym::Edata2) ElfSym::Edata2->Section = Edata; // _end is the first location after the uninitialized data region. if (ElfSym::End1) ElfSym::End1->Section = Last->LastSec; if (ElfSym::End2) ElfSym::End2->Section = Last->LastSec; } if (ElfSym::Bss) ElfSym::Bss->Section = findSection(".bss"); // Setup MIPS _gp_disp/__gnu_local_gp symbols which should // be equal to the _gp symbol's value. if (ElfSym::MipsGp) { // Find GP-relative section with the lowest address // and use this address to calculate default _gp value. for (OutputSection *OS : OutputSections) { if (OS->Flags & SHF_MIPS_GPREL) { ElfSym::MipsGp->Section = OS; ElfSym::MipsGp->Value = 0x7ff0; break; } } } } // We want to find how similar two ranks are. // The more branches in getSectionRank that match, the more similar they are. // Since each branch corresponds to a bit flag, we can just use // countLeadingZeros. static int getRankProximityAux(OutputSection *A, OutputSection *B) { return countLeadingZeros(A->SortRank ^ B->SortRank); } static int getRankProximity(OutputSection *A, BaseCommand *B) { if (auto *Sec = dyn_cast(B)) return getRankProximityAux(A, Sec); return -1; } // When placing orphan sections, we want to place them after symbol assignments // so that an orphan after // begin_foo = .; // foo : { *(foo) } // end_foo = .; // doesn't break the intended meaning of the begin/end symbols. // We don't want to go over sections since findOrphanPos is the // one in charge of deciding the order of the sections. // We don't want to go over changes to '.', since doing so in // rx_sec : { *(rx_sec) } // . = ALIGN(0x1000); // /* The RW PT_LOAD starts here*/ // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. static bool shouldSkip(BaseCommand *Cmd) { if (auto *Assign = dyn_cast(Cmd)) return Assign->Name != "."; return false; } // We want to place orphan sections so that they share as much // characteristics with their neighbors as possible. For example, if // both are rw, or both are tls. template static std::vector::iterator findOrphanPos(std::vector::iterator B, std::vector::iterator E) { OutputSection *Sec = cast(*E); // Find the first element that has as close a rank as possible. auto I = std::max_element(B, E, [=](BaseCommand *A, BaseCommand *B) { return getRankProximity(Sec, A) < getRankProximity(Sec, B); }); if (I == E) return E; // Consider all existing sections with the same proximity. int Proximity = getRankProximity(Sec, *I); for (; I != E; ++I) { auto *CurSec = dyn_cast(*I); if (!CurSec) continue; if (getRankProximity(Sec, CurSec) != Proximity || Sec->SortRank < CurSec->SortRank) break; } auto IsOutputSec = [](BaseCommand *Cmd) { return isa(Cmd); }; auto J = std::find_if(llvm::make_reverse_iterator(I), llvm::make_reverse_iterator(B), IsOutputSec); I = J.base(); // As a special case, if the orphan section is the last section, put // it at the very end, past any other commands. // This matches bfd's behavior and is convenient when the linker script fully // specifies the start of the file, but doesn't care about the end (the non // alloc sections for example). auto NextSec = std::find_if(I, E, IsOutputSec); if (NextSec == E) return E; while (I != E && shouldSkip(*I)) ++I; return I; } // Builds section order for handling --symbol-ordering-file. static DenseMap buildSectionOrder() { DenseMap SectionOrder; // Use the rarely used option -call-graph-ordering-file to sort sections. if (!Config->CallGraphProfile.empty()) return computeCallGraphProfileOrder(); if (Config->SymbolOrderingFile.empty()) return SectionOrder; struct SymbolOrderEntry { int Priority; bool Present; }; // Build a map from symbols to their priorities. Symbols that didn't // appear in the symbol ordering file have the lowest priority 0. // All explicitly mentioned symbols have negative (higher) priorities. DenseMap SymbolOrder; int Priority = -Config->SymbolOrderingFile.size(); for (StringRef S : Config->SymbolOrderingFile) SymbolOrder.insert({S, {Priority++, false}}); // Build a map from sections to their priorities. auto AddSym = [&](Symbol &Sym) { auto It = SymbolOrder.find(Sym.getName()); if (It == SymbolOrder.end()) return; SymbolOrderEntry &Ent = It->second; Ent.Present = true; warnUnorderableSymbol(&Sym); if (auto *D = dyn_cast(&Sym)) { if (auto *Sec = dyn_cast_or_null(D->Section)) { int &Priority = SectionOrder[cast(Sec->Repl)]; Priority = std::min(Priority, Ent.Priority); } } }; // We want both global and local symbols. We get the global ones from the // symbol table and iterate the object files for the local ones. for (Symbol *Sym : Symtab->getSymbols()) if (!Sym->isLazy()) AddSym(*Sym); for (InputFile *File : ObjectFiles) for (Symbol *Sym : File->getSymbols()) if (Sym->isLocal()) AddSym(*Sym); if (Config->WarnSymbolOrdering) for (auto OrderEntry : SymbolOrder) if (!OrderEntry.second.Present) warn("symbol ordering file: no such symbol: " + OrderEntry.first); return SectionOrder; } // Sorts the sections in ISD according to the provided section order. static void sortISDBySectionOrder(InputSectionDescription *ISD, const DenseMap &Order) { std::vector UnorderedSections; std::vector> OrderedSections; uint64_t UnorderedSize = 0; for (InputSection *IS : ISD->Sections) { auto I = Order.find(IS); if (I == Order.end()) { UnorderedSections.push_back(IS); UnorderedSize += IS->getSize(); continue; } OrderedSections.push_back({IS, I->second}); } llvm::sort( OrderedSections.begin(), OrderedSections.end(), [&](std::pair A, std::pair B) { return A.second < B.second; }); // Find an insertion point for the ordered section list in the unordered // section list. On targets with limited-range branches, this is the mid-point // of the unordered section list. This decreases the likelihood that a range // extension thunk will be needed to enter or exit the ordered region. If the // ordered section list is a list of hot functions, we can generally expect // the ordered functions to be called more often than the unordered functions, // making it more likely that any particular call will be within range, and // therefore reducing the number of thunks required. // // For example, imagine that you have 8MB of hot code and 32MB of cold code. // If the layout is: // // 8MB hot // 32MB cold // // only the first 8-16MB of the cold code (depending on which hot function it // is actually calling) can call the hot code without a range extension thunk. // However, if we use this layout: // // 16MB cold // 8MB hot // 16MB cold // // both the last 8-16MB of the first block of cold code and the first 8-16MB // of the second block of cold code can call the hot code without a thunk. So // we effectively double the amount of code that could potentially call into // the hot code without a thunk. size_t InsPt = 0; if (Target->ThunkSectionSpacing && !OrderedSections.empty()) { uint64_t UnorderedPos = 0; for (; InsPt != UnorderedSections.size(); ++InsPt) { UnorderedPos += UnorderedSections[InsPt]->getSize(); if (UnorderedPos > UnorderedSize / 2) break; } } ISD->Sections.clear(); for (InputSection *IS : makeArrayRef(UnorderedSections).slice(0, InsPt)) ISD->Sections.push_back(IS); for (std::pair P : OrderedSections) ISD->Sections.push_back(P.first); for (InputSection *IS : makeArrayRef(UnorderedSections).slice(InsPt)) ISD->Sections.push_back(IS); } static void sortSection(OutputSection *Sec, const DenseMap &Order) { StringRef Name = Sec->Name; // Sort input sections by section name suffixes for // __attribute__((init_priority(N))). if (Name == ".init_array" || Name == ".fini_array") { if (!Script->HasSectionsCommand) Sec->sortInitFini(); return; } // Sort input sections by the special rule for .ctors and .dtors. if (Name == ".ctors" || Name == ".dtors") { if (!Script->HasSectionsCommand) Sec->sortCtorsDtors(); return; } // Never sort these. if (Name == ".init" || Name == ".fini") return; // Sort input sections by priority using the list provided // by --symbol-ordering-file. if (!Order.empty()) for (BaseCommand *B : Sec->SectionCommands) if (auto *ISD = dyn_cast(B)) sortISDBySectionOrder(ISD, Order); } // If no layout was provided by linker script, we want to apply default // sorting for special input sections. This also handles --symbol-ordering-file. template void Writer::sortInputSections() { // Build the order once since it is expensive. DenseMap Order = buildSectionOrder(); for (BaseCommand *Base : Script->SectionCommands) if (auto *Sec = dyn_cast(Base)) sortSection(Sec, Order); } template void Writer::sortSections() { Script->adjustSectionsBeforeSorting(); // Don't sort if using -r. It is not necessary and we want to preserve the // relative order for SHF_LINK_ORDER sections. if (Config->Relocatable) return; sortInputSections(); for (BaseCommand *Base : Script->SectionCommands) { auto *OS = dyn_cast(Base); if (!OS) continue; OS->SortRank = getSectionRank(OS); // We want to assign rude approximation values to OutSecOff fields // to know the relative order of the input sections. We use it for // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder(). uint64_t I = 0; for (InputSection *Sec : getInputSections(OS)) Sec->OutSecOff = I++; } if (!Script->HasSectionsCommand) { // We know that all the OutputSections are contiguous in this case. auto IsSection = [](BaseCommand *Base) { return isa(Base); }; std::stable_sort( llvm::find_if(Script->SectionCommands, IsSection), llvm::find_if(llvm::reverse(Script->SectionCommands), IsSection).base(), compareSections); return; } // Orphan sections are sections present in the input files which are // not explicitly placed into the output file by the linker script. // // The sections in the linker script are already in the correct // order. We have to figuere out where to insert the orphan // sections. // // The order of the sections in the script is arbitrary and may not agree with // compareSections. This means that we cannot easily define a strict weak // ordering. To see why, consider a comparison of a section in the script and // one not in the script. We have a two simple options: // * Make them equivalent (a is not less than b, and b is not less than a). // The problem is then that equivalence has to be transitive and we can // have sections a, b and c with only b in a script and a less than c // which breaks this property. // * Use compareSectionsNonScript. Given that the script order doesn't have // to match, we can end up with sections a, b, c, d where b and c are in the // script and c is compareSectionsNonScript less than b. In which case d // can be equivalent to c, a to b and d < a. As a concrete example: // .a (rx) # not in script // .b (rx) # in script // .c (ro) # in script // .d (ro) # not in script // // The way we define an order then is: // * Sort only the orphan sections. They are in the end right now. // * Move each orphan section to its preferred position. We try // to put each section in the last position where it can share // a PT_LOAD. // // There is some ambiguity as to where exactly a new entry should be // inserted, because Commands contains not only output section // commands but also other types of commands such as symbol assignment // expressions. There's no correct answer here due to the lack of the // formal specification of the linker script. We use heuristics to // determine whether a new output command should be added before or // after another commands. For the details, look at shouldSkip // function. auto I = Script->SectionCommands.begin(); auto E = Script->SectionCommands.end(); auto NonScriptI = std::find_if(I, E, [](BaseCommand *Base) { if (auto *Sec = dyn_cast(Base)) return Sec->SectionIndex == UINT32_MAX; return false; }); // Sort the orphan sections. std::stable_sort(NonScriptI, E, compareSections); // As a horrible special case, skip the first . assignment if it is before any // section. We do this because it is common to set a load address by starting // the script with ". = 0xabcd" and the expectation is that every section is // after that. auto FirstSectionOrDotAssignment = std::find_if(I, E, [](BaseCommand *Cmd) { return !shouldSkip(Cmd); }); if (FirstSectionOrDotAssignment != E && isa(**FirstSectionOrDotAssignment)) ++FirstSectionOrDotAssignment; I = FirstSectionOrDotAssignment; while (NonScriptI != E) { auto Pos = findOrphanPos(I, NonScriptI); OutputSection *Orphan = cast(*NonScriptI); // As an optimization, find all sections with the same sort rank // and insert them with one rotate. unsigned Rank = Orphan->SortRank; auto End = std::find_if(NonScriptI + 1, E, [=](BaseCommand *Cmd) { return cast(Cmd)->SortRank != Rank; }); std::rotate(Pos, NonScriptI, End); NonScriptI = End; } Script->adjustSectionsAfterSorting(); } static bool compareByFilePosition(InputSection *A, InputSection *B) { // Synthetic, i. e. a sentinel section, should go last. if (A->kind() == InputSectionBase::Synthetic || B->kind() == InputSectionBase::Synthetic) return A->kind() != InputSectionBase::Synthetic; InputSection *LA = A->getLinkOrderDep(); InputSection *LB = B->getLinkOrderDep(); OutputSection *AOut = LA->getParent(); OutputSection *BOut = LB->getParent(); if (AOut != BOut) return AOut->SectionIndex < BOut->SectionIndex; return LA->OutSecOff < LB->OutSecOff; } // This function is used by the --merge-exidx-entries to detect duplicate // .ARM.exidx sections. It is Arm only. // // The .ARM.exidx section is of the form: // | PREL31 offset to function | Unwind instructions for function | // where the unwind instructions are either a small number of unwind // instructions inlined into the table entry, the special CANT_UNWIND value of // 0x1 or a PREL31 offset into a .ARM.extab Section that contains unwind // instructions. // // We return true if all the unwind instructions in the .ARM.exidx entries of // Cur can be merged into the last entry of Prev. static bool isDuplicateArmExidxSec(InputSection *Prev, InputSection *Cur) { // References to .ARM.Extab Sections have bit 31 clear and are not the // special EXIDX_CANTUNWIND bit-pattern. auto IsExtabRef = [](uint32_t Unwind) { return (Unwind & 0x80000000) == 0 && Unwind != 0x1; }; struct ExidxEntry { ulittle32_t Fn; ulittle32_t Unwind; }; // Get the last table Entry from the previous .ARM.exidx section. const ExidxEntry &PrevEntry = Prev->getDataAs().back(); if (IsExtabRef(PrevEntry.Unwind)) return false; // We consider the unwind instructions of an .ARM.exidx table entry // a duplicate if the previous unwind instructions if: // - Both are the special EXIDX_CANTUNWIND. // - Both are the same inline unwind instructions. // We do not attempt to follow and check links into .ARM.extab tables as // consecutive identical entries are rare and the effort to check that they // are identical is high. for (const ExidxEntry Entry : Cur->getDataAs()) if (IsExtabRef(Entry.Unwind) || Entry.Unwind != PrevEntry.Unwind) return false; // All table entries in this .ARM.exidx Section can be merged into the // previous Section. return true; } template void Writer::resolveShfLinkOrder() { for (OutputSection *Sec : OutputSections) { if (!(Sec->Flags & SHF_LINK_ORDER)) continue; // Link order may be distributed across several InputSectionDescriptions // but sort must consider them all at once. std::vector ScriptSections; std::vector Sections; for (BaseCommand *Base : Sec->SectionCommands) { if (auto *ISD = dyn_cast(Base)) { for (InputSection *&IS : ISD->Sections) { ScriptSections.push_back(&IS); Sections.push_back(IS); } } } std::stable_sort(Sections.begin(), Sections.end(), compareByFilePosition); if (!Config->Relocatable && Config->EMachine == EM_ARM && Sec->Type == SHT_ARM_EXIDX) { if (auto *Sentinel = dyn_cast(Sections.back())) { assert(Sections.size() >= 2 && "We should create a sentinel section only if there are " "alive regular exidx sections."); // The last executable section is required to fill the sentinel. // Remember it here so that we don't have to find it again. Sentinel->Highest = Sections[Sections.size() - 2]->getLinkOrderDep(); } if (Config->MergeArmExidx) { // The EHABI for the Arm Architecture permits consecutive identical // table entries to be merged. We use a simple implementation that // removes a .ARM.exidx Input Section if it can be merged into the // previous one. This does not require any rewriting of InputSection // contents but misses opportunities for fine grained deduplication // where only a subset of the InputSection contents can be merged. size_t Prev = 0; // The last one is a sentinel entry which should not be removed. for (size_t I = 1; I < Sections.size() - 1; ++I) { if (isDuplicateArmExidxSec(Sections[Prev], Sections[I])) Sections[I] = nullptr; else Prev = I; } } } for (int I = 0, N = Sections.size(); I < N; ++I) *ScriptSections[I] = Sections[I]; // Remove the Sections we marked as duplicate earlier. for (BaseCommand *Base : Sec->SectionCommands) if (auto *ISD = dyn_cast(Base)) llvm::erase_if(ISD->Sections, [](InputSection *IS) { return !IS; }); } } static void applySynthetic(const std::vector &Sections, llvm::function_ref Fn) { for (SyntheticSection *SS : Sections) if (SS && SS->getParent() && !SS->empty()) Fn(SS); } // In order to allow users to manipulate linker-synthesized sections, // we had to add synthetic sections to the input section list early, // even before we make decisions whether they are needed. This allows // users to write scripts like this: ".mygot : { .got }". // // Doing it has an unintended side effects. If it turns out that we // don't need a .got (for example) at all because there's no // relocation that needs a .got, we don't want to emit .got. // // To deal with the above problem, this function is called after // scanRelocations is called to remove synthetic sections that turn // out to be empty. static void removeUnusedSyntheticSections() { // All input synthetic sections that can be empty are placed after // all regular ones. We iterate over them all and exit at first // non-synthetic. for (InputSectionBase *S : llvm::reverse(InputSections)) { SyntheticSection *SS = dyn_cast(S); if (!SS) return; OutputSection *OS = SS->getParent(); if (!OS || !SS->empty()) continue; // If we reach here, then SS is an unused synthetic section and we want to // remove it from corresponding input section description of output section. for (BaseCommand *B : OS->SectionCommands) if (auto *ISD = dyn_cast(B)) llvm::erase_if(ISD->Sections, [=](InputSection *IS) { return IS == SS; }); } } // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. static bool computeIsPreemptible(const Symbol &B) { assert(!B.isLocal()); // Only symbols that appear in dynsym can be preempted. if (!B.includeInDynsym()) return false; // Only default visibility symbols can be preempted. if (B.Visibility != STV_DEFAULT) return false; // At this point copy relocations have not been created yet, so any // symbol that is not defined locally is preemptible. if (!B.isDefined()) return true; // If we have a dynamic list it specifies which local symbols are preemptible. if (Config->HasDynamicList) return false; if (!Config->Shared) return false; // -Bsymbolic means that definitions are not preempted. if (Config->Bsymbolic || (Config->BsymbolicFunctions && B.isFunc())) return false; return true; } // Create output section objects and add them to OutputSections. template void Writer::finalizeSections() { Out::DebugInfo = findSection(".debug_info"); Out::PreinitArray = findSection(".preinit_array"); Out::InitArray = findSection(".init_array"); Out::FiniArray = findSection(".fini_array"); // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. if (!Config->Relocatable) { addStartEndSymbols(); for (BaseCommand *Base : Script->SectionCommands) if (auto *Sec = dyn_cast(Base)) addStartStopSymbols(Sec); } // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type. // It should be okay as no one seems to care about the type. // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (InX::DynSymTab) Symtab->addRegular("_DYNAMIC", STV_HIDDEN, STT_NOTYPE, 0 /*Value*/, /*Size=*/0, STB_WEAK, InX::Dynamic, /*File=*/nullptr); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); // This responsible for splitting up .eh_frame section into // pieces. The relocation scan uses those pieces, so this has to be // earlier. applySynthetic({InX::EhFrame}, [](SyntheticSection *SS) { SS->finalizeContents(); }); for (Symbol *S : Symtab->getSymbols()) S->IsPreemptible |= computeIsPreemptible(*S); // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. if (!Config->Relocatable) forEachRelSec(scanRelocations); if (InX::Plt && !InX::Plt->empty()) InX::Plt->addSymbols(); if (InX::Iplt && !InX::Iplt->empty()) InX::Iplt->addSymbols(); // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. for (Symbol *Sym : Symtab->getSymbols()) { if (!includeInSymtab(*Sym)) continue; if (InX::SymTab) InX::SymTab->addSymbol(Sym); if (InX::DynSymTab && Sym->includeInDynsym()) { InX::DynSymTab->addSymbol(Sym); if (auto *File = dyn_cast_or_null>(Sym->File)) if (File->IsNeeded && !Sym->isUndefined()) In::VerNeed->addSymbol(Sym); } } // Do not proceed if there was an undefined symbol. if (errorCount()) return; if (InX::MipsGot) InX::MipsGot->build(); removeUnusedSyntheticSections(); sortSections(); // Now that we have the final list, create a list of all the // OutputSections for convenience. for (BaseCommand *Base : Script->SectionCommands) if (auto *Sec = dyn_cast(Base)) OutputSections.push_back(Sec); // Ensure data sections are not mixed with executable sections when // -execute-only is used. if (Config->ExecuteOnly) for (OutputSection *OS : OutputSections) if (OS->Flags & SHF_EXECINSTR) for (InputSection *IS : getInputSections(OS)) if (!(IS->Flags & SHF_EXECINSTR)) error("-execute-only does not support intermingling data and code"); // Prefer command line supplied address over other constraints. for (OutputSection *Sec : OutputSections) { auto I = Config->SectionStartMap.find(Sec->Name); if (I != Config->SectionStartMap.end()) Sec->AddrExpr = [=] { return I->second; }; } // This is a bit of a hack. A value of 0 means undef, so we set it // to 1 to make __ehdr_start defined. The section number is not // particularly relevant. Out::ElfHeader->SectionIndex = 1; unsigned I = 1; for (OutputSection *Sec : OutputSections) { Sec->SectionIndex = I++; Sec->ShName = InX::ShStrTab->addString(Sec->Name); } // Binary and relocatable output does not have PHDRS. // The headers have to be created before finalize as that can influence the // image base and the dynamic section on mips includes the image base. if (!Config->Relocatable && !Config->OFormatBinary) { Phdrs = Script->hasPhdrsCommands() ? Script->createPhdrs() : createPhdrs(); addPtArmExid(Phdrs); Out::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); } // Some symbols are defined in term of program headers. Now that we // have the headers, we can find out which sections they point to. setReservedSymbolSections(); // Dynamic section must be the last one in this list and dynamic // symbol table section (DynSymTab) must be the first one. applySynthetic( {InX::DynSymTab, InX::Bss, InX::BssRelRo, InX::GnuHashTab, InX::HashTab, InX::SymTab, InX::SymTabShndx, InX::ShStrTab, InX::StrTab, In::VerDef, InX::DynStrTab, InX::Got, InX::MipsGot, InX::IgotPlt, InX::GotPlt, InX::RelaDyn, InX::RelrDyn, InX::RelaIplt, InX::RelaPlt, InX::Plt, InX::Iplt, InX::EhFrameHdr, In::VerSym, In::VerNeed, InX::Dynamic}, [](SyntheticSection *SS) { SS->finalizeContents(); }); if (!Script->HasSectionsCommand && !Config->Relocatable) fixSectionAlignments(); // After link order processing .ARM.exidx sections can be deduplicated, which // needs to be resolved before any other address dependent operation. resolveShfLinkOrder(); // Some architectures need to generate content that depends on the address // of InputSections. For example some architectures use small displacements // for jump instructions that is the linker's responsibility for creating // range extension thunks for. As the generation of the content may also // alter InputSection addresses we must converge to a fixed point. if (Target->NeedsThunks || Config->AndroidPackDynRelocs || Config->RelrPackDynRelocs) { ThunkCreator TC; AArch64Err843419Patcher A64P; bool Changed; do { Script->assignAddresses(); Changed = false; if (Target->NeedsThunks) Changed |= TC.createThunks(OutputSections); if (Config->FixCortexA53Errata843419) { if (Changed) Script->assignAddresses(); Changed |= A64P.createFixes(); } if (InX::MipsGot) InX::MipsGot->updateAllocSize(); Changed |= InX::RelaDyn->updateAllocSize(); if (InX::RelrDyn) Changed |= InX::RelrDyn->updateAllocSize(); } while (Changed); } // createThunks may have added local symbols to the static symbol table applySynthetic({InX::SymTab}, [](SyntheticSection *SS) { SS->postThunkContents(); }); // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. for (OutputSection *Sec : OutputSections) Sec->finalize(); } // The linker is expected to define SECNAME_start and SECNAME_end // symbols for a few sections. This function defines them. template void Writer::addStartEndSymbols() { // If a section does not exist, there's ambiguity as to how we // define _start and _end symbols for an init/fini section. Since // the loader assume that the symbols are always defined, we need to // always define them. But what value? The loader iterates over all // pointers between _start and _end to run global ctors/dtors, so if // the section is empty, their symbol values don't actually matter // as long as _start and _end point to the same location. // // That said, we don't want to set the symbols to 0 (which is // probably the simplest value) because that could cause some // program to fail to link due to relocation overflow, if their // program text is above 2 GiB. We use the address of the .text // section instead to prevent that failure. OutputSection *Default = findSection(".text"); if (!Default) Default = Out::ElfHeader; auto Define = [=](StringRef Start, StringRef End, OutputSection *OS) { if (OS) { addOptionalRegular(Start, OS, 0); addOptionalRegular(End, OS, -1); } else { addOptionalRegular(Start, Default, 0); addOptionalRegular(End, Default, 0); } }; Define("__preinit_array_start", "__preinit_array_end", Out::PreinitArray); Define("__init_array_start", "__init_array_end", Out::InitArray); Define("__fini_array_start", "__fini_array_end", Out::FiniArray); if (OutputSection *Sec = findSection(".ARM.exidx")) Define("__exidx_start", "__exidx_end", Sec); } // If a section name is valid as a C identifier (which is rare because of // the leading '.'), linkers are expected to define __start_ and // __stop_ symbols. They are at beginning and end of the section, // respectively. This is not requested by the ELF standard, but GNU ld and // gold provide the feature, and used by many programs. template void Writer::addStartStopSymbols(OutputSection *Sec) { StringRef S = Sec->Name; if (!isValidCIdentifier(S)) return; addOptionalRegular(Saver.save("__start_" + S), Sec, 0, STV_PROTECTED); addOptionalRegular(Saver.save("__stop_" + S), Sec, -1, STV_PROTECTED); } static bool needsPtLoad(OutputSection *Sec) { if (!(Sec->Flags & SHF_ALLOC) || Sec->Noload) return false; // Don't allocate VA space for TLS NOBITS sections. The PT_TLS PHDR is // responsible for allocating space for them, not the PT_LOAD that // contains the TLS initialization image. if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) return false; return true; } // Linker scripts are responsible for aligning addresses. Unfortunately, most // linker scripts are designed for creating two PT_LOADs only, one RX and one // RW. This means that there is no alignment in the RO to RX transition and we // cannot create a PT_LOAD there. static uint64_t computeFlags(uint64_t Flags) { if (Config->Omagic) return PF_R | PF_W | PF_X; if (Config->ExecuteOnly && (Flags & PF_X)) return Flags & ~PF_R; if (Config->SingleRoRx && !(Flags & PF_W)) return Flags | PF_X; return Flags; } // Decide which program headers to create and which sections to include in each // one. template std::vector Writer::createPhdrs() { std::vector Ret; auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * { Ret.push_back(make(Type, Flags)); return Ret.back(); }; // The first phdr entry is PT_PHDR which describes the program header itself. AddHdr(PT_PHDR, PF_R)->add(Out::ProgramHeaders); // PT_INTERP must be the second entry if exists. if (OutputSection *Cmd = findSection(".interp")) AddHdr(PT_INTERP, Cmd->getPhdrFlags())->add(Cmd); // Add the first PT_LOAD segment for regular output sections. uint64_t Flags = computeFlags(PF_R); PhdrEntry *Load = AddHdr(PT_LOAD, Flags); // Add the headers. We will remove them if they don't fit. Load->add(Out::ElfHeader); Load->add(Out::ProgramHeaders); for (OutputSection *Sec : OutputSections) { if (!(Sec->Flags & SHF_ALLOC)) break; if (!needsPtLoad(Sec)) continue; // Segments are contiguous memory regions that has the same attributes // (e.g. executable or writable). There is one phdr for each segment. // Therefore, we need to create a new phdr when the next section has - // different flags or is loaded at a discontiguous address using AT linker - // script command. At the same time, we don't want to create a separate - // load segment for the headers, even if the first output section has - // an AT attribute. + // different flags or is loaded at a discontiguous address or memory + // region using AT or AT> linker script command, respectively. At the same + // time, we don't want to create a separate load segment for the headers, + // even if the first output section has an AT or AT> attribute. uint64_t NewFlags = computeFlags(Sec->getPhdrFlags()); - if ((Sec->LMAExpr && Load->LastSec != Out::ProgramHeaders) || + if (((Sec->LMAExpr || + (Sec->LMARegion && (Sec->LMARegion != Load->FirstSec->LMARegion))) && + Load->LastSec != Out::ProgramHeaders) || Sec->MemRegion != Load->FirstSec->MemRegion || Flags != NewFlags) { Load = AddHdr(PT_LOAD, NewFlags); Flags = NewFlags; } Load->add(Sec); } // Add a TLS segment if any. PhdrEntry *TlsHdr = make(PT_TLS, PF_R); for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_TLS) TlsHdr->add(Sec); if (TlsHdr->FirstSec) Ret.push_back(TlsHdr); // Add an entry for .dynamic. if (InX::DynSymTab) AddHdr(PT_DYNAMIC, InX::Dynamic->getParent()->getPhdrFlags()) ->add(InX::Dynamic->getParent()); // PT_GNU_RELRO includes all sections that should be marked as // read-only by dynamic linker after proccessing relocations. // Current dynamic loaders only support one PT_GNU_RELRO PHDR, give // an error message if more than one PT_GNU_RELRO PHDR is required. PhdrEntry *RelRo = make(PT_GNU_RELRO, PF_R); bool InRelroPhdr = false; bool IsRelroFinished = false; for (OutputSection *Sec : OutputSections) { if (!needsPtLoad(Sec)) continue; if (isRelroSection(Sec)) { InRelroPhdr = true; if (!IsRelroFinished) RelRo->add(Sec); else error("section: " + Sec->Name + " is not contiguous with other relro" + " sections"); } else if (InRelroPhdr) { InRelroPhdr = false; IsRelroFinished = true; } } if (RelRo->FirstSec) Ret.push_back(RelRo); // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. if (!InX::EhFrame->empty() && InX::EhFrameHdr && InX::EhFrame->getParent() && InX::EhFrameHdr->getParent()) AddHdr(PT_GNU_EH_FRAME, InX::EhFrameHdr->getParent()->getPhdrFlags()) ->add(InX::EhFrameHdr->getParent()); // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes // the dynamic linker fill the segment with random data. if (OutputSection *Cmd = findSection(".openbsd.randomdata")) AddHdr(PT_OPENBSD_RANDOMIZE, Cmd->getPhdrFlags())->add(Cmd); // PT_GNU_STACK is a special section to tell the loader to make the // pages for the stack non-executable. If you really want an executable // stack, you can pass -z execstack, but that's not recommended for // security reasons. unsigned Perm = PF_R | PF_W; if (Config->ZExecstack) Perm |= PF_X; AddHdr(PT_GNU_STACK, Perm)->p_memsz = Config->ZStackSize; // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable // is expected to perform W^X violations, such as calling mprotect(2) or // mmap(2) with PROT_WRITE | PROT_EXEC, which is prohibited by default on // OpenBSD. if (Config->ZWxneeded) AddHdr(PT_OPENBSD_WXNEEDED, PF_X); // Create one PT_NOTE per a group of contiguous .note sections. PhdrEntry *Note = nullptr; for (OutputSection *Sec : OutputSections) { if (Sec->Type == SHT_NOTE && (Sec->Flags & SHF_ALLOC)) { if (!Note || Sec->LMAExpr) Note = AddHdr(PT_NOTE, PF_R); Note->add(Sec); } else { Note = nullptr; } } return Ret; } template void Writer::addPtArmExid(std::vector &Phdrs) { if (Config->EMachine != EM_ARM) return; auto I = llvm::find_if(OutputSections, [](OutputSection *Cmd) { return Cmd->Type == SHT_ARM_EXIDX; }); if (I == OutputSections.end()) return; // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME PhdrEntry *ARMExidx = make(PT_ARM_EXIDX, PF_R); ARMExidx->add(*I); Phdrs.push_back(ARMExidx); } // The first section of each PT_LOAD, the first section in PT_GNU_RELRO and the // first section after PT_GNU_RELRO have to be page aligned so that the dynamic // linker can set the permissions. template void Writer::fixSectionAlignments() { auto PageAlign = [](OutputSection *Cmd) { if (Cmd && !Cmd->AddrExpr) Cmd->AddrExpr = [=] { return alignTo(Script->getDot(), Config->MaxPageSize); }; }; for (const PhdrEntry *P : Phdrs) if (P->p_type == PT_LOAD && P->FirstSec) PageAlign(P->FirstSec); for (const PhdrEntry *P : Phdrs) { if (P->p_type != PT_GNU_RELRO) continue; if (P->FirstSec) PageAlign(P->FirstSec); // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we // have to align it to a page. auto End = OutputSections.end(); auto I = std::find(OutputSections.begin(), End, P->LastSec); if (I == End || (I + 1) == End) continue; OutputSection *Cmd = (*(I + 1)); if (needsPtLoad(Cmd)) PageAlign(Cmd); } } // Adjusts the file alignment for a given output section and returns // its new file offset. The file offset must be the same with its // virtual address (modulo the page size) so that the loader can load // executables without any address adjustment. static uint64_t getFileAlignment(uint64_t Off, OutputSection *Cmd) { OutputSection *First = Cmd->PtLoad ? Cmd->PtLoad->FirstSec : nullptr; // The first section in a PT_LOAD has to have congruent offset and address // module the page size. if (Cmd == First) return alignTo(Off, std::max(Cmd->Alignment, Config->MaxPageSize), Cmd->Addr); // For SHT_NOBITS we don't want the alignment of the section to impact the // offset of the sections that follow. Since nothing seems to care about the // sh_offset of the SHT_NOBITS section itself, just ignore it. if (Cmd->Type == SHT_NOBITS) return Off; // If the section is not in a PT_LOAD, we just have to align it. if (!Cmd->PtLoad) return alignTo(Off, Cmd->Alignment); // If two sections share the same PT_LOAD the file offset is calculated // using this formula: Off2 = Off1 + (VA2 - VA1). return First->Offset + Cmd->Addr - First->Addr; } static uint64_t setOffset(OutputSection *Cmd, uint64_t Off) { Off = getFileAlignment(Off, Cmd); Cmd->Offset = Off; // For SHT_NOBITS we should not count the size. if (Cmd->Type == SHT_NOBITS) return Off; return Off + Cmd->Size; } template void Writer::assignFileOffsetsBinary() { uint64_t Off = 0; for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) Off = setOffset(Sec, Off); FileSize = alignTo(Off, Config->Wordsize); } static std::string rangeToString(uint64_t Addr, uint64_t Len) { if (Len == 0) return ""; return "[0x" + utohexstr(Addr) + ", 0x" + utohexstr(Addr + Len - 1) + "]"; } // Assign file offsets to output sections. template void Writer::assignFileOffsets() { uint64_t Off = 0; Off = setOffset(Out::ElfHeader, Off); Off = setOffset(Out::ProgramHeaders, Off); PhdrEntry *LastRX = nullptr; for (PhdrEntry *P : Phdrs) if (P->p_type == PT_LOAD && (P->p_flags & PF_X)) LastRX = P; for (OutputSection *Sec : OutputSections) { Off = setOffset(Sec, Off); if (Script->HasSectionsCommand) continue; // If this is a last section of the last executable segment and that // segment is the last loadable segment, align the offset of the // following section to avoid loading non-segments parts of the file. if (LastRX && LastRX->LastSec == Sec) Off = alignTo(Off, Target->PageSize); } SectionHeaderOff = alignTo(Off, Config->Wordsize); FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr); // Our logic assumes that sections have rising VA within the same segment. // With use of linker scripts it is possible to violate this rule and get file // offset overlaps or overflows. That should never happen with a valid script // which does not move the location counter backwards and usually scripts do // not do that. Unfortunately, there are apps in the wild, for example, Linux // kernel, which control segment distribution explicitly and move the counter // backwards, so we have to allow doing that to support linking them. We // perform non-critical checks for overlaps in checkSectionOverlap(), but here // we want to prevent file size overflows because it would crash the linker. for (OutputSection *Sec : OutputSections) { if (Sec->Type == SHT_NOBITS) continue; if ((Sec->Offset > FileSize) || (Sec->Offset + Sec->Size > FileSize)) error("unable to place section " + Sec->Name + " at file offset " + rangeToString(Sec->Offset, Sec->Offset + Sec->Size) + "; check your linker script for overflows"); } } // Finalize the program headers. We call this function after we assign // file offsets and VAs to all sections. template void Writer::setPhdrs() { for (PhdrEntry *P : Phdrs) { OutputSection *First = P->FirstSec; OutputSection *Last = P->LastSec; if (First) { P->p_filesz = Last->Offset - First->Offset; if (Last->Type != SHT_NOBITS) P->p_filesz += Last->Size; P->p_memsz = Last->Addr + Last->Size - First->Addr; P->p_offset = First->Offset; P->p_vaddr = First->Addr; if (!P->HasLMA) P->p_paddr = First->getLMA(); } if (P->p_type == PT_LOAD) P->p_align = std::max(P->p_align, Config->MaxPageSize); else if (P->p_type == PT_GNU_RELRO) { P->p_align = 1; // The glibc dynamic loader rounds the size down, so we need to round up // to protect the last page. This is a no-op on FreeBSD which always // rounds up. P->p_memsz = alignTo(P->p_memsz, Target->PageSize); } // The TLS pointer goes after PT_TLS. At least glibc will align it, // so round up the size to make sure the offsets are correct. if (P->p_type == PT_TLS) { Out::TlsPhdr = P; if (P->p_memsz) P->p_memsz = alignTo(P->p_memsz, P->p_align); } } } // A helper struct for checkSectionOverlap. namespace { struct SectionOffset { OutputSection *Sec; uint64_t Offset; }; } // namespace // Check whether sections overlap for a specific address range (file offsets, // load and virtual adresses). static void checkOverlap(StringRef Name, std::vector &Sections, bool IsVirtualAddr) { llvm::sort(Sections.begin(), Sections.end(), [=](const SectionOffset &A, const SectionOffset &B) { return A.Offset < B.Offset; }); // Finding overlap is easy given a vector is sorted by start position. // If an element starts before the end of the previous element, they overlap. for (size_t I = 1, End = Sections.size(); I < End; ++I) { SectionOffset A = Sections[I - 1]; SectionOffset B = Sections[I]; if (B.Offset >= A.Offset + A.Sec->Size) continue; // If both sections are in OVERLAY we allow the overlapping of virtual // addresses, because it is what OVERLAY was designed for. if (IsVirtualAddr && A.Sec->InOverlay && B.Sec->InOverlay) continue; errorOrWarn("section " + A.Sec->Name + " " + Name + " range overlaps with " + B.Sec->Name + "\n>>> " + A.Sec->Name + " range is " + rangeToString(A.Offset, A.Sec->Size) + "\n>>> " + B.Sec->Name + " range is " + rangeToString(B.Offset, B.Sec->Size)); } } // Check for overlapping sections and address overflows. // // In this function we check that none of the output sections have overlapping // file offsets. For SHF_ALLOC sections we also check that the load address // ranges and the virtual address ranges don't overlap template void Writer::checkSections() { // First, check that section's VAs fit in available address space for target. for (OutputSection *OS : OutputSections) if ((OS->Addr + OS->Size < OS->Addr) || (!ELFT::Is64Bits && OS->Addr + OS->Size > UINT32_MAX)) errorOrWarn("section " + OS->Name + " at 0x" + utohexstr(OS->Addr) + " of size 0x" + utohexstr(OS->Size) + " exceeds available address space"); // Check for overlapping file offsets. In this case we need to skip any // section marked as SHT_NOBITS. These sections don't actually occupy space in // the file so Sec->Offset + Sec->Size can overlap with others. If --oformat // binary is specified only add SHF_ALLOC sections are added to the output // file so we skip any non-allocated sections in that case. std::vector FileOffs; for (OutputSection *Sec : OutputSections) if (0 < Sec->Size && Sec->Type != SHT_NOBITS && (!Config->OFormatBinary || (Sec->Flags & SHF_ALLOC))) FileOffs.push_back({Sec, Sec->Offset}); checkOverlap("file", FileOffs, false); // When linking with -r there is no need to check for overlapping virtual/load // addresses since those addresses will only be assigned when the final // executable/shared object is created. if (Config->Relocatable) return; // Checking for overlapping virtual and load addresses only needs to take // into account SHF_ALLOC sections since others will not be loaded. // Furthermore, we also need to skip SHF_TLS sections since these will be // mapped to other addresses at runtime and can therefore have overlapping // ranges in the file. std::vector VMAs; for (OutputSection *Sec : OutputSections) if (0 < Sec->Size && (Sec->Flags & SHF_ALLOC) && !(Sec->Flags & SHF_TLS)) VMAs.push_back({Sec, Sec->Addr}); checkOverlap("virtual address", VMAs, true); // Finally, check that the load addresses don't overlap. This will usually be // the same as the virtual addresses but can be different when using a linker // script with AT(). std::vector LMAs; for (OutputSection *Sec : OutputSections) if (0 < Sec->Size && (Sec->Flags & SHF_ALLOC) && !(Sec->Flags & SHF_TLS)) LMAs.push_back({Sec, Sec->getLMA()}); checkOverlap("load address", LMAs, false); } // The entry point address is chosen in the following ways. // // 1. the '-e' entry command-line option; // 2. the ENTRY(symbol) command in a linker control script; // 3. the value of the symbol _start, if present; // 4. the number represented by the entry symbol, if it is a number; // 5. the address of the first byte of the .text section, if present; // 6. the address 0. template uint64_t Writer::getEntryAddr() { // Case 1, 2 or 3 if (Symbol *B = Symtab->find(Config->Entry)) return B->getVA(); // Case 4 uint64_t Addr; if (to_integer(Config->Entry, Addr)) return Addr; // Case 5 if (OutputSection *Sec = findSection(".text")) { if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + utohexstr(Sec->Addr)); return Sec->Addr; } // Case 6 if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; not setting start address"); return 0; } static uint16_t getELFType() { if (Config->Pic) return ET_DYN; if (Config->Relocatable) return ET_REL; return ET_EXEC; } static uint8_t getAbiVersion() { // MIPS non-PIC executable gets ABI version 1. if (Config->EMachine == EM_MIPS && getELFType() == ET_EXEC && (Config->EFlags & (EF_MIPS_PIC | EF_MIPS_CPIC)) == EF_MIPS_CPIC) return 1; return 0; } template void Writer::writeHeader() { uint8_t *Buf = Buffer->getBufferStart(); // For executable segments, the trap instructions are written before writing // the header. Setting Elf header bytes to zero ensures that any unused bytes // in header are zero-cleared, instead of having trap instructions. memset(Buf, 0, sizeof(Elf_Ehdr)); memcpy(Buf, "\177ELF", 4); // Write the ELF header. auto *EHdr = reinterpret_cast(Buf); EHdr->e_ident[EI_CLASS] = Config->Is64 ? ELFCLASS64 : ELFCLASS32; EHdr->e_ident[EI_DATA] = Config->IsLE ? ELFDATA2LSB : ELFDATA2MSB; EHdr->e_ident[EI_VERSION] = EV_CURRENT; EHdr->e_ident[EI_OSABI] = Config->OSABI; EHdr->e_ident[EI_ABIVERSION] = getAbiVersion(); EHdr->e_type = getELFType(); EHdr->e_machine = Config->EMachine; EHdr->e_version = EV_CURRENT; EHdr->e_entry = getEntryAddr(); EHdr->e_shoff = SectionHeaderOff; EHdr->e_flags = Config->EFlags; EHdr->e_ehsize = sizeof(Elf_Ehdr); EHdr->e_phnum = Phdrs.size(); EHdr->e_shentsize = sizeof(Elf_Shdr); if (!Config->Relocatable) { EHdr->e_phoff = sizeof(Elf_Ehdr); EHdr->e_phentsize = sizeof(Elf_Phdr); } // Write the program header table. auto *HBuf = reinterpret_cast(Buf + EHdr->e_phoff); for (PhdrEntry *P : Phdrs) { HBuf->p_type = P->p_type; HBuf->p_flags = P->p_flags; HBuf->p_offset = P->p_offset; HBuf->p_vaddr = P->p_vaddr; HBuf->p_paddr = P->p_paddr; HBuf->p_filesz = P->p_filesz; HBuf->p_memsz = P->p_memsz; HBuf->p_align = P->p_align; ++HBuf; } // Write the section header table. // // The ELF header can only store numbers up to SHN_LORESERVE in the e_shnum // and e_shstrndx fields. When the value of one of these fields exceeds // SHN_LORESERVE ELF requires us to put sentinel values in the ELF header and // use fields in the section header at index 0 to store // the value. The sentinel values and fields are: // e_shnum = 0, SHdrs[0].sh_size = number of sections. // e_shstrndx = SHN_XINDEX, SHdrs[0].sh_link = .shstrtab section index. auto *SHdrs = reinterpret_cast(Buf + EHdr->e_shoff); size_t Num = OutputSections.size() + 1; if (Num >= SHN_LORESERVE) SHdrs->sh_size = Num; else EHdr->e_shnum = Num; uint32_t StrTabIndex = InX::ShStrTab->getParent()->SectionIndex; if (StrTabIndex >= SHN_LORESERVE) { SHdrs->sh_link = StrTabIndex; EHdr->e_shstrndx = SHN_XINDEX; } else { EHdr->e_shstrndx = StrTabIndex; } for (OutputSection *Sec : OutputSections) Sec->writeHeaderTo(++SHdrs); } // Open a result file. template void Writer::openFile() { if (!Config->Is64 && FileSize > UINT32_MAX) { error("output file too large: " + Twine(FileSize) + " bytes"); return; } unlinkAsync(Config->OutputFile); unsigned Flags = 0; if (!Config->Relocatable) Flags = FileOutputBuffer::F_executable; Expected> BufferOrErr = FileOutputBuffer::create(Config->OutputFile, FileSize, Flags); if (!BufferOrErr) error("failed to open " + Config->OutputFile + ": " + llvm::toString(BufferOrErr.takeError())); else Buffer = std::move(*BufferOrErr); } template void Writer::writeSectionsBinary() { uint8_t *Buf = Buffer->getBufferStart(); for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) Sec->writeTo(Buf + Sec->Offset); } static void fillTrap(uint8_t *I, uint8_t *End) { for (; I + 4 <= End; I += 4) memcpy(I, &Target->TrapInstr, 4); } // Fill the last page of executable segments with trap instructions // instead of leaving them as zero. Even though it is not required by any // standard, it is in general a good thing to do for security reasons. // // We'll leave other pages in segments as-is because the rest will be // overwritten by output sections. template void Writer::writeTrapInstr() { if (Script->HasSectionsCommand) return; // Fill the last page. uint8_t *Buf = Buffer->getBufferStart(); for (PhdrEntry *P : Phdrs) if (P->p_type == PT_LOAD && (P->p_flags & PF_X)) fillTrap(Buf + alignDown(P->p_offset + P->p_filesz, Target->PageSize), Buf + alignTo(P->p_offset + P->p_filesz, Target->PageSize)); // Round up the file size of the last segment to the page boundary iff it is // an executable segment to ensure that other tools don't accidentally // trim the instruction padding (e.g. when stripping the file). PhdrEntry *Last = nullptr; for (PhdrEntry *P : Phdrs) if (P->p_type == PT_LOAD) Last = P; if (Last && (Last->p_flags & PF_X)) Last->p_memsz = Last->p_filesz = alignTo(Last->p_filesz, Target->PageSize); } // Write section contents to a mmap'ed file. template void Writer::writeSections() { uint8_t *Buf = Buffer->getBufferStart(); OutputSection *EhFrameHdr = nullptr; if (InX::EhFrameHdr && !InX::EhFrameHdr->empty()) EhFrameHdr = InX::EhFrameHdr->getParent(); // In -r or -emit-relocs mode, write the relocation sections first as in // ELf_Rel targets we might find out that we need to modify the relocated // section while doing it. for (OutputSection *Sec : OutputSections) if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) Sec->writeTo(Buf + Sec->Offset); for (OutputSection *Sec : OutputSections) if (Sec != EhFrameHdr && Sec->Type != SHT_REL && Sec->Type != SHT_RELA) Sec->writeTo(Buf + Sec->Offset); // The .eh_frame_hdr depends on .eh_frame section contents, therefore // it should be written after .eh_frame is written. if (EhFrameHdr) EhFrameHdr->writeTo(Buf + EhFrameHdr->Offset); } template void Writer::writeBuildId() { if (!InX::BuildId || !InX::BuildId->getParent()) return; // Compute a hash of all sections of the output file. uint8_t *Start = Buffer->getBufferStart(); uint8_t *End = Start + FileSize; InX::BuildId->writeBuildId({Start, End}); } template void elf::writeResult(); template void elf::writeResult(); template void elf::writeResult(); template void elf::writeResult(); Index: projects/clang700-import/contrib/llvm/tools/lld =================================================================== --- projects/clang700-import/contrib/llvm/tools/lld (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lld (revision 337645) Property changes on: projects/clang700-import/contrib/llvm/tools/lld ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/lld/dist-release_70:r337311-337643 Index: projects/clang700-import/contrib/llvm/tools/lldb/include/lldb/Utility/VMRange.h =================================================================== --- projects/clang700-import/contrib/llvm/tools/lldb/include/lldb/Utility/VMRange.h (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lldb/include/lldb/Utility/VMRange.h (revision 337645) @@ -1,128 +1,110 @@ //===-- VMRange.h -----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef liblldb_VMRange_h_ #define liblldb_VMRange_h_ #include "lldb/lldb-types.h" // for addr_t #include // for size_t #include // for uint32_t #include namespace lldb_private { class Stream; } namespace lldb_private { //---------------------------------------------------------------------- // A vm address range. These can represent offsets ranges or actual // addresses. //---------------------------------------------------------------------- class VMRange { public: typedef std::vector collection; typedef collection::iterator iterator; typedef collection::const_iterator const_iterator; VMRange() : m_base_addr(0), m_byte_size(0) {} VMRange(lldb::addr_t start_addr, lldb::addr_t end_addr) : m_base_addr(start_addr), m_byte_size(end_addr > start_addr ? end_addr - start_addr : 0) {} ~VMRange() {} void Clear() { m_base_addr = 0; m_byte_size = 0; } // Set the start and end values void Reset(lldb::addr_t start_addr, lldb::addr_t end_addr) { SetBaseAddress(start_addr); SetEndAddress(end_addr); } // Set the start value for the range, and keep the same size void SetBaseAddress(lldb::addr_t base_addr) { m_base_addr = base_addr; } void SetEndAddress(lldb::addr_t end_addr) { const lldb::addr_t base_addr = GetBaseAddress(); if (end_addr > base_addr) m_byte_size = end_addr - base_addr; else m_byte_size = 0; } lldb::addr_t GetByteSize() const { return m_byte_size; } void SetByteSize(lldb::addr_t byte_size) { m_byte_size = byte_size; } lldb::addr_t GetBaseAddress() const { return m_base_addr; } lldb::addr_t GetEndAddress() const { return GetBaseAddress() + m_byte_size; } bool IsValid() const { return m_byte_size > 0; } bool Contains(lldb::addr_t addr) const { return (GetBaseAddress() <= addr) && (addr < GetEndAddress()); } bool Contains(const VMRange &range) const { if (Contains(range.GetBaseAddress())) { lldb::addr_t range_end = range.GetEndAddress(); return (GetBaseAddress() <= range_end) && (range_end <= GetEndAddress()); } return false; } void Dump(Stream *s, lldb::addr_t base_addr = 0, uint32_t addr_width = 8) const; - class ValueInRangeUnaryPredicate { - public: - ValueInRangeUnaryPredicate(lldb::addr_t value) : _value(value) {} - bool operator()(const VMRange &range) const { - return range.Contains(_value); - } - lldb::addr_t _value; - }; - - class RangeInRangeUnaryPredicate { - public: - RangeInRangeUnaryPredicate(VMRange range) : _range(range) {} - bool operator()(const VMRange &range) const { - return range.Contains(_range); - } - const VMRange &_range; - }; - static bool ContainsValue(const VMRange::collection &coll, lldb::addr_t value); static bool ContainsRange(const VMRange::collection &coll, const VMRange &range); protected: lldb::addr_t m_base_addr; lldb::addr_t m_byte_size; }; bool operator==(const VMRange &lhs, const VMRange &rhs); bool operator!=(const VMRange &lhs, const VMRange &rhs); bool operator<(const VMRange &lhs, const VMRange &rhs); bool operator<=(const VMRange &lhs, const VMRange &rhs); bool operator>(const VMRange &lhs, const VMRange &rhs); bool operator>=(const VMRange &lhs, const VMRange &rhs); } // namespace lldb_private #endif // liblldb_VMRange_h_ Index: projects/clang700-import/contrib/llvm/tools/lldb/source/Utility/VMRange.cpp =================================================================== --- projects/clang700-import/contrib/llvm/tools/lldb/source/Utility/VMRange.cpp (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lldb/source/Utility/VMRange.cpp (revision 337645) @@ -1,69 +1,71 @@ //===-- VMRange.cpp ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "lldb/Utility/VMRange.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-types.h" // for addr_t #include #include // for distance #include // for const_iterator #include // for size_t #include // for UINT32_MAX, uint32_t using namespace lldb; using namespace lldb_private; bool VMRange::ContainsValue(const VMRange::collection &coll, lldb::addr_t value) { - ValueInRangeUnaryPredicate in_range_predicate(value); - return llvm::find_if(coll, in_range_predicate) != coll.end(); + return llvm::find_if(coll, [&](const VMRange &r) { + return r.Contains(value); + }) != coll.end(); } bool VMRange::ContainsRange(const VMRange::collection &coll, const VMRange &range) { - RangeInRangeUnaryPredicate in_range_predicate(range); - return llvm::find_if(coll, in_range_predicate) != coll.end(); + return llvm::find_if(coll, [&](const VMRange &r) { + return r.Contains(range); + }) != coll.end(); } void VMRange::Dump(Stream *s, lldb::addr_t offset, uint32_t addr_width) const { s->AddressRange(offset + GetBaseAddress(), offset + GetEndAddress(), addr_width); } bool lldb_private::operator==(const VMRange &lhs, const VMRange &rhs) { return lhs.GetBaseAddress() == rhs.GetBaseAddress() && lhs.GetEndAddress() == rhs.GetEndAddress(); } bool lldb_private::operator!=(const VMRange &lhs, const VMRange &rhs) { return !(lhs == rhs); } bool lldb_private::operator<(const VMRange &lhs, const VMRange &rhs) { if (lhs.GetBaseAddress() < rhs.GetBaseAddress()) return true; else if (lhs.GetBaseAddress() > rhs.GetBaseAddress()) return false; return lhs.GetEndAddress() < rhs.GetEndAddress(); } bool lldb_private::operator<=(const VMRange &lhs, const VMRange &rhs) { return !(lhs > rhs); } bool lldb_private::operator>(const VMRange &lhs, const VMRange &rhs) { return rhs < lhs; } bool lldb_private::operator>=(const VMRange &lhs, const VMRange &rhs) { return !(lhs < rhs); } Index: projects/clang700-import/contrib/llvm/tools/lldb =================================================================== --- projects/clang700-import/contrib/llvm/tools/lldb (revision 337644) +++ projects/clang700-import/contrib/llvm/tools/lldb (revision 337645) Property changes on: projects/clang700-import/contrib/llvm/tools/lldb ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/lldb/dist-release_70:r337312-337643 Index: projects/clang700-import/contrib/llvm =================================================================== --- projects/clang700-import/contrib/llvm (revision 337644) +++ projects/clang700-import/contrib/llvm (revision 337645) Property changes on: projects/clang700-import/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/llvm/dist-release_70:r337309-337642 Index: projects/clang700-import/lib/clang/freebsd_cc_version.h =================================================================== --- projects/clang700-import/lib/clang/freebsd_cc_version.h (revision 337644) +++ projects/clang700-import/lib/clang/freebsd_cc_version.h (revision 337645) @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define FREEBSD_CC_VERSION 1200015 +#define FREEBSD_CC_VERSION 1200016 Index: projects/clang700-import/lib/clang/include/clang/Basic/Version.inc =================================================================== --- projects/clang700-import/lib/clang/include/clang/Basic/Version.inc (revision 337644) +++ projects/clang700-import/lib/clang/include/clang/Basic/Version.inc (revision 337645) @@ -1,11 +1,11 @@ /* $FreeBSD$ */ #define CLANG_VERSION 7.0.0 #define CLANG_VERSION_STRING "7.0.0" #define CLANG_VERSION_MAJOR 7 #define CLANG_VERSION_MINOR 0 #define CLANG_VERSION_PATCHLEVEL 0 #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "338892" +#define SVN_REVISION "339355" Index: projects/clang700-import/lib/clang/include/lld/Common/Version.inc =================================================================== --- projects/clang700-import/lib/clang/include/lld/Common/Version.inc (revision 337644) +++ projects/clang700-import/lib/clang/include/lld/Common/Version.inc (revision 337645) @@ -1,10 +1,10 @@ // $FreeBSD$ #define LLD_VERSION 7.0.0 #define LLD_VERSION_STRING "7.0.0" #define LLD_VERSION_MAJOR 7 #define LLD_VERSION_MINOR 0 #define LLD_REPOSITORY_STRING "FreeBSD" // - -#define LLD_REVISION_STRING "338892-1200005" +#define LLD_REVISION_STRING "339355-1200005"