Index: projects/clang400-import/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S =================================================================== --- projects/clang400-import/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S (revision 313893) +++ projects/clang400-import/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S (revision 313894) @@ -1,34 +1,34 @@ //===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "../assembly.h" // // extern float __subsf3vfp(float a, float b); // // Returns the difference between two single precision floating point numbers // using the Darwin calling convention where single arguments are passsed // like 32-bit ints. // .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) #if defined(COMPILER_RT_ARMHF_TARGET) vsub.f32 s0, s0, s1 -#elsee +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vsub.f32 s14, s14, s15 vmov r0, s14 // move result back to r0 #endif bx lr END_COMPILERRT_FUNCTION(__subsf3vfp) NO_EXEC_STACK_DIRECTIVE Index: projects/clang400-import/contrib/compiler-rt =================================================================== --- projects/clang400-import/contrib/compiler-rt (revision 313893) +++ projects/clang400-import/contrib/compiler-rt (revision 313894) Property changes on: projects/clang400-import/contrib/compiler-rt ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/compiler-rt/dist:r313643-313892 Index: projects/clang400-import/contrib/libc++ =================================================================== --- projects/clang400-import/contrib/libc++ (revision 313893) +++ projects/clang400-import/contrib/libc++ (revision 313894) Property changes on: projects/clang400-import/contrib/libc++ ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/libc++/dist:r313643-313893 Index: projects/clang400-import/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h =================================================================== --- projects/clang400-import/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h (revision 313893) +++ projects/clang400-import/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h (revision 313894) @@ -1,313 +1,330 @@ //===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file declares the ThinLTOCodeGenerator class, similar to the // LTOCodeGenerator but for the ThinLTO scheme. It provides an interface for // linker plugin. // //===----------------------------------------------------------------------===// #ifndef LLVM_LTO_THINLTOCODEGENERATOR_H #define LLVM_LTO_THINLTOCODEGENERATOR_H #include "llvm-c/lto.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetOptions.h" #include namespace llvm { class StringRef; class LLVMContext; class TargetMachine; +/// Wrapper around MemoryBufferRef, owning the identifier +class ThinLTOBuffer { + std::string OwnedIdentifier; + StringRef Buffer; + +public: + ThinLTOBuffer(StringRef Buffer, StringRef Identifier) + : OwnedIdentifier(Identifier), Buffer(Buffer) {} + + MemoryBufferRef getMemBuffer() const { + return MemoryBufferRef(Buffer, + {OwnedIdentifier.c_str(), OwnedIdentifier.size()}); + } + StringRef getBuffer() const { return Buffer; } + StringRef getBufferIdentifier() const { return OwnedIdentifier; } +}; + /// Helper to gather options relevant to the target machine creation struct TargetMachineBuilder { Triple TheTriple; std::string MCpu; std::string MAttr; TargetOptions Options; Optional RelocModel; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Aggressive; std::unique_ptr create() const; }; /// This class define an interface similar to the LTOCodeGenerator, but adapted /// for ThinLTO processing. /// The ThinLTOCodeGenerator is not intended to be reuse for multiple /// compilation: the model is that the client adds modules to the generator and /// ask to perform the ThinLTO optimizations / codegen, and finally destroys the /// codegenerator. class ThinLTOCodeGenerator { public: /// Add given module to the code generator. void addModule(StringRef Identifier, StringRef Data); /** * Adds to a list of all global symbols that must exist in the final generated * code. If a symbol is not listed there, it will be optimized away if it is * inlined into every usage. */ void preserveSymbol(StringRef Name); /** * Adds to a list of all global symbols that are cross-referenced between * ThinLTO files. If the ThinLTO CodeGenerator can ensure that every * references from a ThinLTO module to this symbol is optimized away, then * the symbol can be discarded. */ void crossReferenceSymbol(StringRef Name); /** * Process all the modules that were added to the code generator in parallel. * * Client can access the resulting object files using getProducedBinaries(), * unless setGeneratedObjectsDirectory() has been called, in which case * results are available through getProducedBinaryFiles(). */ void run(); /** * Return the "in memory" binaries produced by the code generator. This is * filled after run() unless setGeneratedObjectsDirectory() has been * called, in which case results are available through * getProducedBinaryFiles(). */ std::vector> &getProducedBinaries() { return ProducedBinaries; } /** * Return the "on-disk" binaries produced by the code generator. This is * filled after run() when setGeneratedObjectsDirectory() has been * called, in which case results are available through getProducedBinaries(). */ std::vector &getProducedBinaryFiles() { return ProducedBinaryFiles; } /** * \defgroup Options setters * @{ */ /** * \defgroup Cache controlling options * * These entry points control the ThinLTO cache. The cache is intended to * support incremental build, and thus needs to be persistent accross build. * The client enabled the cache by supplying a path to an existing directory. * The code generator will use this to store objects files that may be reused * during a subsequent build. * To avoid filling the disk space, a few knobs are provided: * - The pruning interval limit the frequency at which the garbage collector * will try to scan the cache directory to prune it from expired entries. * Setting to -1 disable the pruning (default). * - The pruning expiration time indicates to the garbage collector how old * an entry needs to be to be removed. * - Finally, the garbage collector can be instructed to prune the cache till * the occupied space goes below a threshold. * @{ */ struct CachingOptions { std::string Path; // Path to the cache, empty to disable. int PruningInterval = 1200; // seconds, -1 to disable pruning. unsigned int Expiration = 7 * 24 * 3600; // seconds (1w default). unsigned MaxPercentageOfAvailableSpace = 75; // percentage. }; /// Provide a path to a directory where to store the cached files for /// incremental build. void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); } /// Cache policy: interval (seconds) between two prune of the cache. Set to a /// negative value (default) to disable pruning. A value of 0 will be ignored. void setCachePruningInterval(int Interval) { if (Interval) CacheOptions.PruningInterval = Interval; } /// Cache policy: expiration (in seconds) for an entry. /// A value of 0 will be ignored. void setCacheEntryExpiration(unsigned Expiration) { if (Expiration) CacheOptions.Expiration = Expiration; } /** * Sets the maximum cache size that can be persistent across build, in terms * of percentage of the available space on the the disk. Set to 100 to * indicate no limit, 50 to indicate that the cache size will not be left over * half the available space. A value over 100 will be reduced to 100, and a * value of 0 will be ignored. * * * The formula looks like: * AvailableSpace = FreeSpace + ExistingCacheSize * NewCacheSize = AvailableSpace * P/100 * */ void setMaxCacheSizeRelativeToAvailableSpace(unsigned Percentage) { if (Percentage) CacheOptions.MaxPercentageOfAvailableSpace = Percentage; } /**@}*/ /// Set the path to a directory where to save temporaries at various stages of /// the processing. void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); } /// Set the path to a directory where to save generated object files. This /// path can be used by a linker to request on-disk files instead of in-memory /// buffers. When set, results are available through getProducedBinaryFiles() /// instead of getProducedBinaries(). void setGeneratedObjectsDirectory(std::string Path) { SavedObjectsDirectoryPath = std::move(Path); } /// CPU to use to initialize the TargetMachine void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); } /// Subtarget attributes void setAttr(std::string MAttr) { TMBuilder.MAttr = std::move(MAttr); } /// TargetMachine options void setTargetOptions(TargetOptions Options) { TMBuilder.Options = std::move(Options); } /// CodeModel void setCodePICModel(Optional Model) { TMBuilder.RelocModel = Model; } /// CodeGen optimization level void setCodeGenOptLevel(CodeGenOpt::Level CGOptLevel) { TMBuilder.CGOptLevel = CGOptLevel; } /// IR optimization level: from 0 to 3. void setOptLevel(unsigned NewOptLevel) { OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel; } /// Disable CodeGen, only run the stages till codegen and stop. The output /// will be bitcode. void disableCodeGen(bool Disable) { DisableCodeGen = Disable; } /// Perform CodeGen only: disable all other stages. void setCodeGenOnly(bool CGOnly) { CodeGenOnly = CGOnly; } /**@}*/ /** * \defgroup Set of APIs to run individual stages in isolation. * @{ */ /** * Produce the combined summary index from all the bitcode files: * "thin-link". */ std::unique_ptr linkCombinedIndex(); /** * Perform promotion and renaming of exported internal functions, * and additionally resolve weak and linkonce symbols. * Index is updated to reflect linkage changes from weak resolution. */ void promote(Module &Module, ModuleSummaryIndex &Index); /** * Compute and emit the imported files for module at \p ModulePath. */ static void emitImports(StringRef ModulePath, StringRef OutputName, ModuleSummaryIndex &Index); /** * Perform cross-module importing for the module identified by * ModuleIdentifier. */ void crossModuleImport(Module &Module, ModuleSummaryIndex &Index); /** * Compute the list of summaries needed for importing into module. */ static void gatherImportedSummariesForModule( StringRef ModulePath, ModuleSummaryIndex &Index, std::map &ModuleToSummariesForIndex); /** * Perform internalization. Index is updated to reflect linkage changes. */ void internalize(Module &Module, ModuleSummaryIndex &Index); /** * Perform post-importing ThinLTO optimizations. */ void optimize(Module &Module); /** * Perform ThinLTO CodeGen. */ std::unique_ptr codegen(Module &Module); /**@}*/ private: /// Helper factory to build a TargetMachine TargetMachineBuilder TMBuilder; /// Vector holding the in-memory buffer containing the produced binaries, when /// SavedObjectsDirectoryPath isn't set. std::vector> ProducedBinaries; /// Path to generated files in the supplied SavedObjectsDirectoryPath if any. std::vector ProducedBinaryFiles; /// Vector holding the input buffers containing the bitcode modules to /// process. - std::vector Modules; + std::vector Modules; /// Set of symbols that need to be preserved outside of the set of bitcode /// files. StringSet<> PreservedSymbols; /// Set of symbols that are cross-referenced between bitcode files. StringSet<> CrossReferencedSymbols; /// Control the caching behavior. CachingOptions CacheOptions; /// Path to a directory to save the temporary bitcode files. std::string SaveTempsDir; /// Path to a directory to save the generated object files. std::string SavedObjectsDirectoryPath; /// Flag to enable/disable CodeGen. When set to true, the process stops after /// optimizations and a bitcode is produced. bool DisableCodeGen = false; /// Flag to indicate that only the CodeGen will be performed, no cross-module /// importing or optimization. bool CodeGenOnly = false; /// IR Optimization Level [0-3]. unsigned OptLevel = 3; }; } #endif Index: projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 313894) @@ -1,15566 +1,15572 @@ //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run // both before and after the DAG is legalized. // // This pass is not a substitute for the LLVM IR instcombine pass. This pass is // primarily intended to handle simplification opportunities that are implicit // in the LLVM IR and exposed by the various codegen lowering phases. // //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; #define DEBUG_TYPE "dagcombine" STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt CombinerAA("combiner-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner alias-analysis heuristics")); static cl::opt CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis")); static cl::opt UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA")); #ifndef NDEBUG static cl::opt CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function")); #endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. static cl::opt StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load " "slicing"), cl::init(false)); static cl::opt MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; bool ForCodeSize; /// \brief Worklist of all of the nodes that need to be simplified. /// /// This must behave as a stack -- new nodes to process are pushed onto the /// back and when processing we pop off of the back. /// /// The worklist will not contain duplicates but may contain null entries /// due to nodes being deleted from the underlying DAG. SmallVector Worklist; /// \brief Mapping from an SDNode to its position on the worklist. /// /// This is used to find and remove nodes from the worklist (by nulling /// them) when they are deleted from the underlying DAG. It relies on /// stable indices of nodes within the worklist. DenseMap WorklistMap; /// \brief Set of nodes which have been combined (at least once). /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. SmallPtrSet CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; /// When an instruction is simplified, add all users of the instruction to /// the work lists because they might get more simplified now. void AddUsersToWorklist(SDNode *N) { for (SDNode *Node : N->uses()) AddToWorklist(Node); } /// Call the node-specific routine that folds each particular type of node. SDValue visit(SDNode *N); public: /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { // Skip handle nodes as they can't usefully be combined and confuse the // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) return; if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) Worklist.push_back(N); } /// Remove all instances of N from the worklist. void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) return; // Not in the worklist. // Null out the entry rather than erasing it to avoid a linear operation. Worklist[It->second] = nullptr; WorklistMap.erase(It); } void deleteAndRecombine(SDNode *N); bool recursivelyDeleteUnusedNodes(SDNode *N); /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { return CombineTo(N, &Res, 1, AddTo); } /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true) { SDValue To[] = { Res0, Res1 }; return CombineTo(N, To, 2, AddTo); } void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: /// Check the specified integer node value to see if it can be simplified or /// if things it uses can be simplified by bit propagation. /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed /// load. /// /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. /// \param InVecVT type of the input vector to EVE with bitcasts resolved. /// \param EltNo index of the vector element to load. /// \param OriginalLoad load that EVE came from to be replaced. /// \returns EVE on success SDValue() on failure. SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); SDValue PromoteIntBinOp(SDValue Op); SDValue PromoteIntShiftOp(SDValue Op); SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); void ExtendSetCCUses(const SmallVectorImpl &SetCCs, SDValue Trunc, SDValue ExtLoad, const SDLoc &DL, ISD::NodeType ExtType); /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. SDValue combine(SDNode *N); // Visitation implementation - Implement dag node combining for different // node types. The semantics are as follows: // Return Value: // SDValue.getNode() == 0 - No change was made // SDValue.getNode() == N - N was replaced, is dead and has been handled. // otherwise - N should be replaced by the returned Operand. // SDValue visitTokenFactor(SDNode *N); SDValue visitMERGE_VALUES(SDNode *N); SDValue visitADD(SDNode *N); SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); SDValue visitSUBC(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitSMULO(SDNode *N); SDValue visitUMULO(SDNode *N); SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitBSWAP(SDNode *N); SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSETCCE(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); SDValue visitFP_ROUND_INREG(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); SDValue visitFMINNUM(SDNode *N); SDValue visitFMAXNUM(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); SDValue replaceStoreOfFPConstant(StoreSDNode *ST); SDValue visitSTORE(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, SDValue RHS); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); SDValue foldSelectOfConstants(SDNode *N); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans = true); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, SDNodeFlags *Flags, bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, SDNodeFlags *Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); SDValue GetDemandedBits(SDValue V, const APInt &Mask); /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases); /// Return true if there is any possibility that the two addresses overlap. bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; /// Walk up chain skipping non-aliasing memory nodes, looking for a better /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); /// Try to replace a store and any possibly adjacent stores on /// consecutive chains with better chains. Return true only if St is /// replaced. /// /// Notice that other chains may still be replaced even if the function /// returns false. bool findBetterNeighborChains(StoreSDNode *St); /// Match "(X shl/srl V1) & V2" where V2 may not be present. bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } // Ptr to the mem node. LSBaseSDNode *MemNode; // Offset from the base ptr. int64_t OffsetFromBase; // What is the sequence number of this mem node. // Lowest mem operand in the DAG starts at zero. unsigned SequenceNum; }; /// This is a helper function for visitMUL to check the profitability /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). /// MulNode is the original multiply, AddNode is (add x, c1), /// and ConstNode is c2. bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue &AddNode, SDValue &ConstNode); /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, ArrayRef Stores, SmallVectorImpl &Chains, EVT Ty) const; /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns /// the type of the loaded value to be extended. LoadedVT returns the type /// of the original loaded value. NarrowLoad returns whether the load would /// need to be narrowed in order to match. bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, bool &NarrowLoad); /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. /// \return number of stores that were merged into a merged store (always /// a prefix of \p StoreNode). bool MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. /// Loads that may alias with those stores are placed in AliasLoadNodes. void getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl &StoreNodes, SmallVectorImpl &AliasLoadNodes); /// Helper function for MergeConsecutiveStores. Checks if /// Candidate stores have indirect dependency through their /// operands. \return True if safe to merge bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes); /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return number of stores that were merged into a merged store (the /// affected nodes are stored as a prefix in \p StoreNodes). bool MergeConsecutiveStores(StoreSDNode *N, SmallVectorImpl &StoreNodes); /// \brief Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) /// /// \p N needs to be a truncation and its first operand an AND. Other /// requirements are checked by the function (e.g. that trunc is /// single-use) and if missed an empty SDValue is returned. SDValue distributeTruncateThroughAnd(SDNode *N); public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); SelectionDAG &getDAG() const { return DAG; } /// Returns a type large enough to hold any valid shift amount - before type /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; auto &DL = DAG.getDataLayout(); return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) : TLI.getPointerTy(DL); } /// This method returns true if we are running before type legalization or /// if the specified VT is legal. bool isTypeLegal(const EVT &VT) { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } }; } namespace { /// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} void NodeDeleted(SDNode *N, SDNode *E) override { DC.removeFromWorklist(N); } }; } //===----------------------------------------------------------------------===// // TargetLowering::DAGCombinerInfo implementation //===----------------------------------------------------------------------===// void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorklist(N); } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, ArrayRef To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); } //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// void DAGCombiner::deleteAndRecombine(SDNode *N) { removeFromWorklist(N); // If the operands of this node are only used by the node, they will now be // dead. Make sure to re-visit them and recursively delete dead nodes. for (const SDValue &Op : N->ops()) // For an operand generating multiple values, one of the values may // become dead allowing further simplification (e.g. split index // arithmetic from an indexed load). if (Op->hasOneUse() || Op->getNumValues() > 1) AddToWorklist(Op.getNode()); DAG.DeleteNode(N); } /// Return 1 if we can compute the negated form of the specified expression for /// the same cost as the expression itself, or 2 if we can compute the negated /// form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; // Don't allow anything with multiple uses. if (!Op.hasOneUse()) return 0; // Don't recurse exponentially. if (Depth > 6) return 0; switch (Op.getOpcode()) { default: return false; case ISD::ConstantFP: // Don't invert constant FP values after legalize. The negated constant // isn't necessarily legal. return LegalOperations ? 0 : 1; case ISD::FADD: // FIXME: determine better conditions for this xform. if (!Options->UnsafeFPMath) return 0; // After operation legalization, it might not be legal to create new FSUBs. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) return 0; // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; case ISD::FMUL: case ISD::FDIV: if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1); } } /// If isNegatibleForFree returns true, return the newly negated expression. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth = 0) { const TargetOptions &Options = DAG.getTarget().Options; // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); // Don't allow anything with multiple uses. assert(Op.hasOneUse() && "Unknown reuse!"); assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); const SDNodeFlags *Flags = Op.getNode()->getFlags(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { APFloat V = cast(Op)->getValueAPF(); V.changeSign(); return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: // FIXME: determine better conditions for this xform. assert(Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1), Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), Op.getOperand(0), Flags); case ISD::FSUB: // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) if (N0CFP->isZero()) return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(0), Flags); case ISD::FMUL: case ISD::FDIV: assert(!Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1), Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), Flags); case ISD::FP_EXTEND: case ISD::FSIN: return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1)); case ISD::FP_ROUND: return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); } } // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); LHS = LHS.zextOrSelf(Bits); RHS = RHS.zextOrSelf(Bits); } // Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to // the appropriate nodes based on the type of node we are checking. This // simplifies life a bit for the callers. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(2); return true; } if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2).getNode()) || !TLI.isConstFalseVal(N.getOperand(3).getNode())) return false; if (TLI.getBooleanContents(N.getValueType()) == TargetLowering::UndefinedBooleanContent) return false; LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(4); return true; } /// Return true if this is a SetCC-equivalent operation with only one use. /// If this is true, it allows the users to invert the operation for free when /// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) return true; return false; } // \brief Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { if (isa(N)) return N.getNode(); if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) return N.getNode(); return nullptr; } // Determines if it is a constant integer or a build vector of constant // integers (and undefs). // Do not permit build vector implicit truncation. static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { if (ConstantSDNode *Const = dyn_cast(N)) return !(Const->isOpaque() && NoOpaques); if (N.getOpcode() != ISD::BUILD_VECTOR) return false; unsigned BitWidth = N.getScalarValueSizeInBits(); for (const SDValue &Op : N->op_values()) { if (Op.isUndef()) continue; ConstantSDNode *Const = dyn_cast(Op); if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || (Const->isOpaque() && NoOpaques)) return false; } return true; } // Determines if it is a constant null integer or a splatted vector of a // constant null integer (with no undefs). // Build vector implicit truncation is not an issue for null values. static bool isNullConstantOrNullSplatConstant(SDValue N) { if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isNullValue(); return false; } // Determines if it is a constant integer of one or a splatted vector of a // constant integer of one (with no undefs). // Do not permit build vector implicit truncation. static bool isOneConstantOrOneSplatConstant(SDValue N) { unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; return false; } // Determines if it is a constant integer of all ones or a splatted vector of a // constant integer of all ones (with no undefs). // Do not permit build vector implicit truncation. static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isAllOnesValue() && Splat->getAPIntValue().getBitWidth() == BitWidth; return false; } // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with // undef's. static bool isAnyConstantBuildVector(const SDNode *N) { return ISD::isBuildVectorOfConstantSDNodes(N) || ISD::isBuildVectorOfConstantFPSDNodes(N); } SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one // use SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } } if (N1.getOpcode() == Opc) { if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } if (N1.hasOneUse()) { // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one // use SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); if (!OpNode.getNode()) return SDValue(); AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); } } } return SDValue(); } SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump(&DAG); dbgs() << " and " << NumTo-1 << " other values\n"); for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { if (To[i].getNode()) { AddToWorklist(To[i].getNode()); AddUsersToWorklist(To[i].getNode()); } } } // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. if (N->use_empty()) deleteAndRecombine(N); return SDValue(N, 0); } void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorklist(TLO.New.getNode()); AddUsersToWorklist(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. if (TLO.Old.getNode()->use_empty()) deleteAndRecombine(TLO.Old.getNode()); } /// Check the specified integer node value to see if it can be simplified or if /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; // Revisit the node. AddToWorklist(Op.getNode()); // Replace the old value with the new one. ++NodesCombined; DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); dbgs() << '\n'); CommitTargetLoweringOpt(TLO); return true; } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDLoc DL(Load); EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; Trunc.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); deleteAndRecombine(Load); AddToWorklist(Trunc.getNode()); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; SDLoc DL(Op); if (ISD::isUNINDEXEDLoad(Op.getNode())) { LoadSDNode *LD = cast(Op); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); switch (Opc) { default: break; case ISD::AssertSext: return DAG.getNode(ISD::AssertSext, DL, PVT, SExtPromoteOperand(Op.getOperand(0), PVT), Op.getOperand(1)); case ISD::AssertZext: return DAG.getNode(ISD::AssertZext, DL, PVT, ZExtPromoteOperand(Op.getOperand(0), PVT), Op.getOperand(1)); case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOpc, DL, PVT, Op); } } if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) return SDValue(); return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); } SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) return SDValue(); AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, DAG.getValueType(OldVT)); } SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) return SDValue(); AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); return DAG.getZeroExtendInReg(NewOp, DL, OldVT); } /// Promote the specified integer binary operation if the target indicates it is /// beneficial. e.g. On x86, it's usually better to promote i16 operations to /// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (!LegalOperations) return SDValue(); EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return SDValue(); // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return SDValue(); EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); bool Replace0 = false; SDValue N0 = Op.getOperand(0); SDValue NN0 = PromoteOperand(N0, PVT, Replace0); if (!NN0.getNode()) return SDValue(); bool Replace1 = false; SDValue N1 = Op.getOperand(1); SDValue NN1; if (N0 == N1) NN1 = NN0; else { NN1 = PromoteOperand(N1, PVT, Replace1); if (!NN1.getNode()) return SDValue(); } AddToWorklist(NN0.getNode()); if (NN1.getNode()) AddToWorklist(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); if (Replace1) ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); SDLoc DL(Op); return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); } return SDValue(); } /// Promote the specified integer shift operation if the target indicates it is /// beneficial. e.g. On x86, it's usually better to promote i16 operations to /// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (!LegalOperations) return SDValue(); EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return SDValue(); // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return SDValue(); EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); bool Replace = false; SDValue N0 = Op.getOperand(0); if (Opc == ISD::SRA) N0 = SExtPromoteOperand(Op.getOperand(0), PVT); else if (Opc == ISD::SRL) N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); else N0 = PromoteOperand(N0, PVT, Replace); if (!N0.getNode()) return SDValue(); AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); SDLoc DL(Op); return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1))); } return SDValue(); } SDValue DAGCombiner::PromoteExtend(SDValue Op) { if (!LegalOperations) return SDValue(); EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return SDValue(); // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return SDValue(); EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); } bool DAGCombiner::PromoteLoad(SDValue Op) { if (!LegalOperations) return false; if (!ISD::isUNINDEXEDLoad(Op.getNode())) return false; EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return false; // If operation type is 'undesirable', e.g. i16 on x86, consider // promoting it. unsigned Opc = Op.getOpcode(); if (TLI.isTypeDesirableForOp(Opc, VT)) return false; EVT PVT = VT; // Consult target whether it is a good idea to promote this operation and // what's the right type to promote it to. if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); SDLoc DL(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); deleteAndRecombine(N); AddToWorklist(Result.getNode()); return true; } return false; } /// \brief Recursively delete a node which has no uses and any operands for /// which it is the only use. /// /// Note that this both deletes the nodes and removes them from the worklist. /// It also adds any nodes who have had a user deleted to the worklist as they /// may now have only one use and subject to other combines. bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { if (!N->use_empty()) return false; SmallSetVector Nodes; Nodes.insert(N); do { N = Nodes.pop_back_val(); if (!N) continue; if (N->use_empty()) { for (const SDValue &ChildN : N->op_values()) Nodes.insert(ChildN.getNode()); removeFromWorklist(N); DAG.DeleteNode(N); } else { AddToWorklist(N); } } while (!Nodes.empty()); return true; } //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. for (SDNode &Node : DAG.allnodes()) AddToWorklist(&Node); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any // changes of the root. HandleSDNode Dummy(DAG.getRoot()); // While the worklist isn't empty, find a node and try to combine it. while (!WorklistMap.empty()) { SDNode *N; // The Worklist holds the SDNodes in order, but it may contain null entries. do { N = Worklist.pop_back_val(); } while (!N); bool GoodWorklistEntry = WorklistMap.erase(N); (void)GoodWorklistEntry; assert(GoodWorklistEntry && "Found a worklist entry without a corresponding map entry!"); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. if (recursivelyDeleteUnusedNodes(N)) continue; WorklistRemover DeadNodes(*this); // If this combine is running after legalizing the DAG, re-legalize any // nodes pulled off the worklist. if (Level == AfterLegalizeDAG) { SmallSetVector UpdatedNodes; bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) { AddToWorklist(LN); AddUsersToWorklist(LN); } if (!NIsValid) continue; } DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); // Add any operands of the new node which have not yet been combined to the // worklist as well. Because the worklist uniques things already, this // won't repeatedly process the same operand. CombinedNodes.insert(N); for (const SDValue &ChildN : N->op_values()) if (!CombinedNodes.count(ChildN.getNode())) AddToWorklist(ChildN.getNode()); SDValue RV = combine(N); if (!RV.getNode()) continue; ++NodesCombined; // If we get back the same node we passed in, rather than a new node or // zero, we know that the node must have defined multiple values and // CombineTo was used. Since CombineTo takes care of the worklist // mechanics for us, we have no work to do in this case. if (RV.getNode() == N) continue; assert(N->getOpcode() != ISD::DELETED_NODE && RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { assert(N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && "Type mismatch"); SDValue OpV = RV; DAG.ReplaceAllUsesWith(N, &OpV); } // Push the new node and any users onto the worklist AddToWorklist(RV.getNode()); AddUsersToWorklist(RV.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. This will also take care of adding any // operands which have lost a user to the worklist. recursivelyDeleteUnusedNodes(N); } // If the root changed (e.g. it was a dead load, update the root). DAG.setRoot(Dummy.getValue()); DAG.RemoveDeadNodes(); } SDValue DAGCombiner::visit(SDNode *N) { switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::ADDC: return visitADDC(N); case ISD::SUBC: return visitSUBC(N); case ISD::ADDE: return visitADDE(N); case ISD::SUBE: return visitSUBE(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); case ISD::SREM: case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: return visitSMULO(N); case ISD::UMULO: return visitUMULO(N); case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); case ISD::SHL: return visitSHL(N); case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SETCCE: return visitSETCCE(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); case ISD::FMINNUM: return visitFMINNUM(N); case ISD::FMAXNUM: return visitFMAXNUM(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); case ISD::STORE: return visitSTORE(N); case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); case ISD::MGATHER: return visitMGATHER(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); } return SDValue(); } SDValue DAGCombiner::combine(SDNode *N) { SDValue RV = visit(N); // If nothing happened, try a target-specific DAG combine. if (!RV.getNode()) { assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"); if (N->getOpcode() >= ISD::BUILTIN_OP_END || TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } } // If nothing happened still, try promoting the operation. if (!RV.getNode()) { switch (N->getOpcode()) { default: break; case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: RV = PromoteIntBinOp(SDValue(N, 0)); break; case ISD::SHL: case ISD::SRA: case ISD::SRL: RV = PromoteIntShiftOp(SDValue(N, 0)); break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: RV = PromoteExtend(SDValue(N, 0)); break; case ISD::LOAD: if (PromoteLoad(SDValue(N, 0))) RV = SDValue(N, 0); break; } } // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Constant operands are canonicalized to RHS. if (isa(N0) || !isa(N1)) { SDValue Ops[] = {N1, N0}; SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, N->getFlags()); if (CSENode) return SDValue(CSENode, 0); } } return RV; } /// Given a node, return its input chain if it has one, otherwise return a null /// sd operand. static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) return N->getOperand(i); } return SDValue(); } SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // If N has two operands, where one has an input chain equal to the other, // the 'other' chain is redundant. if (N->getNumOperands() == 2) { if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) return N->getOperand(0); if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) return N->getOperand(1); } SmallVector TFs; // List of token factors to visit. SmallVector Ops; // Ops for replacing token factor. SmallPtrSet SeenOps; bool Changed = false; // If we should replace this token factor. // Start out with this token factor. TFs.push_back(N); // Iterate through token factors. The TFs grows when new token factors are // encountered. for (unsigned i = 0; i < TFs.size(); ++i) { SDNode *TF = TFs[i]; // Check each of the operands. for (const SDValue &Op : TF->op_values()) { switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are // redundant. Changed = true; break; case ISD::TokenFactor: if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. AddToWorklist(Op.getNode()); Changed = true; break; } LLVM_FALLTHROUGH; default: // Only add if it isn't already in the list. if (SeenOps.insert(Op.getNode()).second) Ops.push_back(Op); else Changed = true; break; } } } SDValue Result; // If we've changed things around then replace token factor. if (Changed) { if (Ops.empty()) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); } else { // New and improved token factor. Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } // Add users to worklist if AA is enabled, since it may introduce // a lot of new chained token factors while removing memory deps. bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); return CombineTo(N, Result, UseAA /*add to worklist*/); } return Result; } /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { WorklistRemover DeadNodes(*this); // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. // First add the users of this node to the work list so that they // can be tried again once they have new operands. AddUsersToWorklist(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a /// ConstantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { ConstantSDNode *Const = dyn_cast(N); return Const != nullptr && !Const->isOpaque() ? Const : nullptr; } SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (add x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) return N0; if (ISD::isBuildVectorAllZeros(N0.getNode())) return N1; } // fold (add x, undef) -> undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // canonicalize constant to RHS if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, DL, VT, N1, N0); // fold (add c1, c2) -> c1+c2 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), N1.getNode()); } // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; // fold ((c1-A)+c2) -> (c1+c2)-A if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { if (N0.getOpcode() == ISD::SUB) if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { return DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); } } // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1)) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isNullConstantOrNullSplatConstant(N0.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B if (N1.getOpcode() == ISD::SUB && isNullConstantOrNullSplatConstant(N1.getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); // fold ((B-A)+A) -> B if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) return N0.getOperand(0); // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) return DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N0, DAG.getNode(ISD::SHL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0))) return DAG.getNode(ISD::SUB, DL, VT, N1, DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); unsigned DestBits = VT.getScalarSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. if (NumSignBits == DestBits && isOneConstantOrOneSplatConstant(N1->getOperand(1))) return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } // add (sext i1), X -> sub X, (zext i1) if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } // add X, (sextinreg Y i1) -> sub X, (and Y 1) if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); } } return SDValue(); } SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); } return SDValue(); } SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); // canonicalize constant to RHS ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return DAG.getConstant(0, DL, VT); return SDValue(); } SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (sub x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) return N0; } // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes); if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // fold (sub c1, c2) -> c1-c2 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), N1.getNode()); } ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); // fold (sub x, c) -> (add x, -c) if (N1C) { return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } if (isNullConstantOrNullSplatConstant(N0)) { unsigned BitWidth = VT.getScalarSizeInBits(); // Right-shifting everything out but the sign bit followed by negation is // the same as flipping arithmetic/logical shift type without the negation: // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); } } // 0 - X --> 0 if the sub is NUW. if (N->getFlags()->hasNoUnsignedWrap()) return N0; if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. if (N->getFlags()->hasNoSignedWrap()) return N0; // 0 - X --> X if X is 0 or the minimum signed value. return N1; } } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (isAllOnesConstantOrAllOnesSplatConstant(N0)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD) { SDValue N11 = N1.getOperand(1); if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && isConstantOrConstantVector(N11, /* NoOpaques */ true)) { SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(1)); // fold ((A+(C+B))-B) -> A+C if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold ((A-(B-C))-C) -> A-B if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 if (GlobalAddressSDNode *GB = dyn_cast(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), DL, VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); } } return SDValue(); } SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (isAllOnesConstant(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); return SDValue(); } SDValue DAGCombiner::visitSUBE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); // fold (sube x, y, false) -> (subc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); // fold (mul x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; bool N1IsConst = false; bool N1IsOpaqueConst = false; bool N0IsOpaqueConst = false; APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = isa(N0); if (N0IsConst) { ConstValue0 = cast(N0)->getAPIntValue(); N0IsOpaqueConst = cast(N0)->isOpaque(); } N1IsConst = isa(N1); if (N1IsConst) { ConstValue1 = cast(N1)->getAPIntValue(); N1IsOpaqueConst = cast(N1)->isOpaque(); } } // fold (mul c1, c2) -> c1*c2 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, N0.getNode(), N1.getNode()); // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) return N1; // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnesValue()) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); } // fold (mul x, (1 << c)) -> x << c if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && IsFullSplat) { SDLoc DL(N); return DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ConstValue1.logBase2(), DL, getShiftAmountTy(N0.getValueType()))); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(Log2Val, DL, getShiftAmountTy(N0.getValueType())))); } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N0.getOpcode() == ISD::SHL && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); if (isConstantOrConstantVector(C3)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { SDValue Sh(nullptr, 0), Y(nullptr, 0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && isConstantOrConstantVector(N0.getOperand(1)) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && isConstantOrConstantVector(N1.getOperand(1)) && N1.getNode()->hasOneUse()) { Sh = N1; Y = N0; } if (Sh.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && N0.getOpcode() == ISD::ADD && DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // reassociate mul if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) return RMUL; return SDValue(); } /// Return true if divmod libcall is available. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; EVT NodeType = Node->getValueType(0); if (!NodeType.isSimple()) return false; switch (NodeType.getSimpleVT().SimpleTy) { default: return false; // No libcall for vector types. case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } return TLI.getLibcallName(LC) != nullptr; } /// Issue divrem if both quotient and remainder are needed. SDValue DAGCombiner::useDivRem(SDNode *Node) { if (Node->use_empty()) return SDValue(); // This is a dead node, leave it alone. unsigned Opcode = Node->getOpcode(); bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; // DivMod lib calls can still work on non-legal types if using lib-calls. EVT VT = Node->getValueType(0); if (VT.isVector() || !VT.isInteger()) return SDValue(); if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) return SDValue(); // If DIVREM is going to get expanded into a libcall, // but there is no libcall available, then don't combine. if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && !isDivRemLibcallAvailable(Node, isSigned, TLI)) return SDValue(); // If div is legal, it's better to do the normal expansion unsigned OtherOpcode = 0; if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; if (TLI.isOperationLegalOrCustom(Opcode, VT)) return SDValue(); } else { OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) return SDValue(); } SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE;) { SDNode *User = *UI++; if (User == Node || User->use_empty()) continue; // Convert the other matching node(s), too; // otherwise, the DIVREM may get target-legalized into something // target-specific that we won't be able to recognize. unsigned UserOpc = User->getOpcode(); if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && User->getOperand(0) == Op0 && User->getOperand(1) == Op1) { if (!combined) { if (UserOpc == OtherOpcode) { SDVTList VTs = DAG.getVTList(VT, VT); combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); } else if (UserOpc == DivRemOpc) { combined = SDValue(User, 0); } else { assert(UserOpc == Opcode); continue; } } if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) CombineTo(User, combined); else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) CombineTo(User, combined.getValue(1)); } } return combined; } SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; SDLoc DL(N); // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); // fold (sdiv X, 1) -> X if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && !cast(N)->Flags.hasExact() && (N1C->getAPIntValue().isPowerOf2() || (-N1C->getAPIntValue()).isPowerOf2())) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, SGN, DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); AddToWorklist(SRL.getNode()); AddToWorklist(ADD.getNode()); // Divide by pow2 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, DAG.getConstant(lg2, DL, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. if (N1C->getAPIntValue().isNonNegative()) return SRA; AddToWorklist(SRA.getNode()); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. Targets may check function attributes for size/speed // trade-offs. AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; // sdiv, srem -> sdivrem // If the divisor is constant, then return DIVREM only if isIntDivCheap() is // true. Otherwise, we break the simplification logic in visitREM(). if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; // undef / X -> 0 if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.isUndef()) return N1; return SDValue(); } SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; SDLoc DL(N); // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; // fold (udiv x, (1 << c)) -> x >>u c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1)) { SDValue LogBase2 = BuildLogBase2(N1, DL); AddToWorklist(LogBase2.getNode()); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); AddToWorklist(Trunc.getNode()); return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { SDValue N10 = N1.getOperand(0); if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N10)) { SDValue LogBase2 = BuildLogBase2(N10, DL); AddToWorklist(LogBase2.getNode()); EVT ADDVT = N1.getOperand(1).getValueType(); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); AddToWorklist(Trunc.getNode()); SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } // fold (udiv x, c) -> alternate AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; // sdiv, srem -> sdivrem // If the divisor is constant, then return DIVREM only if isIntDivCheap() is // true. Otherwise, we break the simplification logic in visitREM(). if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; // undef / X -> 0 if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.isUndef()) return N1; return SDValue(); } // handles ISD::SREM and ISD::UREM SDValue DAGCombiner::visitREM(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); bool isSigned = (Opcode == ISD::SREM); SDLoc DL(N); // fold (rem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; if (isSigned) { // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } else { // fold (urem x, pow2) -> (and x, pow2-1) if (DAG.isKnownToBeAPowerOfTwo(N1)) { APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL && DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. // To avoid mangling nodes, this simplification requires that the combine() // call for the speculative DIV must not cause a DIVREM conversion. We guard // against this by skipping the simplification if isIntDivCheap(). When // div is not cheap, combine will not return a DIVREM. Regardless, // checking cheapness here makes sense since the simplification results in // fatter code. if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && (OptimizedDiv.getOpcode() != ISD::SDIVREM)); SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(Mul.getNode()); return Sub; } } // sdiv, srem -> sdivrem if (SDValue DivRem = useDivRem(N)) return DivRem.getValue(1); // undef % X -> 0 if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X % undef -> undef if (N1.isUndef()) return N1; return SDValue(); } SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) { SDLoc DL(N); return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); } // fold (mulhs x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } return SDValue(); } SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhu x, 1) -> 0 if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } return SDValue(); } /// Perform optimizations common to nodes that compute two values. LoOp and HiOp /// give the opcodes for the two computations that are being performed. Return /// true if a simplification was made. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { // If the high half is not needed, just compute the low half. bool HiExists = N->hasAnyUseOfValue(1); if (!HiExists && (!LegalOperations || TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); return CombineTo(N, Res, Res); } // If the low half is not needed, just compute the high half. bool LoExists = N->hasAnyUseOfValue(0); if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); return CombineTo(N, Res, Res); } // If both halves are used, return as it is. if (LoExists && HiExists) return SDValue(); // If the two computed results can be simplified separately, separate them. if (LoExists) { SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && (!LegalOperations || TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) return CombineTo(N, LoOpt, LoOpt); } if (HiExists) { SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && (!LegalOperations || TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) return CombineTo(N, HiOpt, HiOpt); } return SDValue(); } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) return Res; EVT VT = N->getValueType(0); SDLoc DL(N); // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } return SDValue(); } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) return Res; EVT VT = N->getValueType(0); SDLoc DL(N); // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); if (TLI.isOperationLegal(ISD::MUL, NewVT)) { SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, DAG.getConstant(SimpleSize, DL, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } return SDValue(); } SDValue DAGCombiner::visitSMULO(SDNode *N) { // (smulo x, 2) -> (saddo x, x) if (ConstantSDNode *C2 = dyn_cast(N->getOperand(1))) if (C2->getAPIntValue() == 2) return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); } SDValue DAGCombiner::visitUMULO(SDNode *N) { // (umulo x, 2) -> (uaddo x, x) if (ConstantSDNode *C2 = dyn_cast(N->getOperand(1))) if (C2->getAPIntValue() == 2) return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); } SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (add c1, c2) -> c1+c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); } /// If this is a binary operator with two operands of the same opcode, try to /// simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); // Bail early if none of these transforms apply. if (N0.getNumOperands() == 0) return SDValue(); // For each of OP in AND/OR/XOR: // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::BSWAP || // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || (N0.getOpcode() == ISD::TRUNCATE && (!TLI.isZExtFree(VT, Op0VT) || !TLI.isTruncateFree(Op0VT, VT)) && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } // For each of OP in SHL/SRL/SRA/AND... // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); SDLoc DL(N); // If both incoming values are integers, and the original types are the // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); AddToWorklist(Op.getNode()); return BC; } } // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) // If both shuffles use the same mask, and both shuffle within a single // vector, then it is worthwhile to move the swizzle after the operation. // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. // There are other cases where moving the shuffle after the xor/and/or // is profitable even if shuffles don't perform a swizzle. // If both shuffles use the same mask, and both shuffles have the same first // or second operand, then it might still be profitable to move the shuffle // after the xor/and/or operation. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { ShuffleVectorSDNode *SVN0 = cast(N0); ShuffleVectorSDNode *SVN1 = cast(N1); assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"); // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. // Check also that shuffles have only one use to avoid introducing extra // instructions. if (SVN0->hasOneUse() && SVN1->hasOneUse() && SVN0->getMask().equals(SVN1->getMask())) { SDValue ShOp = N0->getOperand(1); // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(0), N1->getOperand(0)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, SVN0->getMask()); } // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. ShOp = N0->getOperand(0); if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(1), N1->getOperand(1)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, SVN0->getMask()); } } } return SDValue(); } /// This contains all DAGCombine rules which reduce two values combined by /// an And operation to a single value. This makes them reusable in the context /// of visitSELECT(). Rules involving constants are not included as /// visitSELECT() already handles those cases. SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N1.getValueType(); // fold (and x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(LocReference), VT); // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast(CC0)->get(); ISD::CondCode Op1 = cast(CC1)->get(); if (LR == RR && isa(LR) && Op0 == Op1 && LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (isNullConstant(LR) && Op1 == ISD::SETEQ) { EVT CCVT = getSetCCResultType(LR.getValueType()); if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); } } if (isAllOnesConstant(LR)) { // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (Op1 == ISD::SETEQ) { EVT CCVT = getSetCCResultType(LR.getValueType()); if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); } } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (Op1 == ISD::SETGT) { EVT CCVT = getSetCCResultType(LR.getValueType()); if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); } } } } // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) if (LL == RL && isa(LR) && isa(RR) && Op0 == Op1 && LL.getValueType().isInteger() && Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { EVT CCVT = getSetCCResultType(LL.getValueType()); if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { SDLoc DL(N0); SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), LL, DAG.getConstant(1, DL, LL.getValueType())); AddToWorklist(ADDNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, DAG.getConstant(2, DL, LL.getValueType()), ISD::SETUGE); } } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); std::swap(RL, RR); } if (LL == RL && LR == RR) { bool isInteger = LL.getValueType().isInteger(); ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { EVT CCVT = getSetCCResultType(LL.getValueType()); if (N0.getValueType() == CCVT || (!LegalOperations && N0.getValueType() == MVT::i1)) return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), LL, LR, Result); } } } if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && VT.getSizeInBits() <= 64) { if (ConstantSDNode *ADDI = dyn_cast(N0.getOperand(1))) { APInt ADDC = ADDI->getAPIntValue(); if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal // immediate for an add, but it is legal if its top c2 bits are set, // transform the ADD so the immediate doesn't need to be materialized // in a register. if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), SRLI->getZExtValue()); if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { SDLoc DL(N0); SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); CombineTo(N0.getNode(), NewAdd); // Return N so it doesn't get rechecked! return SDValue(LocReference, 0); } } } } } } // Reduce bit extract of low half of an integer to the narrower type. // (and (srl i64:x, K), KMask) -> // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *CAnd = dyn_cast(N1)) { if (ConstantSDNode *CShift = dyn_cast(N0.getOperand(1))) { unsigned Size = VT.getSizeInBits(); const APInt &AndMask = CAnd->getAPIntValue(); unsigned ShiftBits = CShift->getZExtValue(); // Bail out, this node will probably disappear anyway. if (ShiftBits == 0) return SDValue(); unsigned MaskBits = AndMask.countTrailingOnes(); EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); if (APIntOps::isMask(AndMask) && // Required bits must not span the two halves of the integer and // must fit in the half size type. (ShiftBits + MaskBits <= Size / 2) && TLI.isNarrowingProfitable(VT, HalfVT) && TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && TLI.isTruncateFree(VT, HalfVT) && TLI.isZExtFree(HalfVT, VT)) { // The isNarrowingProfitable is to avoid regressions on PPC and // AArch64 which match a few 64-bit bit insert / bit extract patterns // on downstream users of this. Those patterns could probably be // extended to handle extensions mixed in. SDValue SL(N0); assert(MaskBits <= Size); // Extracting the highest bit of the low half. EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, N0.getOperand(0)); SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); } } } } return SDValue(); } bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, bool &NarrowLoad) { uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) return false; ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); LoadedVT = LoadN->getMemoryVT(); if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { // ZEXTLOAD will match without needing to change the size of the value being // loaded. NarrowLoad = false; return true; } // Do not change the width of a volatile load. if (LoadN->isVolatile()) return false; // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) return false; if (LegalOperations && !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) return false; if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) return false; NarrowLoad = true; return true; } SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); // x & x --> x if (N0 == N1) return N0; // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) return N1; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N0; } // fold (and c1, c2) -> c1&c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); // reassociate and if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); // Replace uses of the AND with uses of the Zero extend node. CombineTo(N, Zext); // We actually want to replace all uses of the any_extend with the // zero_extend, to avoid duplicating things. This will later cause this // AND to be folded. CombineTo(N0.getNode(), Zext); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // // the 'X' node here can either be nothing or an extract_vector_elt to catch // more cases. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && N0.getOperand(0).getOpcode() == ISD::LOAD && N0.getOperand(0).getResNo() == 0) || (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { LoadSDNode *Load = cast( (N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0) ); // Get the constant (if applicable) the zero'th operand is being ANDed with. // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getNullValue(1); if (const ConstantSDNode *C = dyn_cast(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast(N1)) { APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs); if (IsSplat) { // Undef bits can contribute to a possible optimisation if set, so // set them. SplatValue |= SplatUndef; // The splat value may be something like "0x00FFFFFF", which means 0 for // the first vector value and FF for the rest, repeating. We need a mask // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getScalarSizeInBits(); // If the splat value has been compressed to a bitlength lower // than the size of the vector lane, we need to re-expand it to // the lane size. if (BitWidth > SplatBitSize) for (SplatValue = SplatValue.zextOrTrunc(BitWidth); SplatBitSize < BitWidth; SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. if (SplatBitSize % BitWidth == 0) { Constant = APInt::getAllOnesValue(BitWidth); for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } } // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is // actually legal and isn't going to get expanded, else this is a false // optimisation. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, Load->getValueType(0), Load->getMemoryVT()); // Resize the constant to the same size as the original memory access before // extension. If it is still the AllOnesValue then this AND is completely // unneeded. Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); bool B; switch (Load->getExtensionType()) { default: B = false; break; case ISD::EXTLOAD: B = CanZextLoadProfitably; break; case ISD::ZEXTLOAD: case ISD::NON_EXTLOAD: B = true; break; } if (B && Constant.isAllOnesValue()) { // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to // preserve semantics once we get rid of the AND. SDValue NewLoad(Load, 0); if (Load->getExtensionType() == ISD::EXTLOAD) { NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, Load->getValueType(0), SDLoc(Load), Load->getChain(), Load->getBasePtr(), Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); // Replace uses of the EXTLOAD with the new ZEXTLOAD. if (Load->getNumValues() == 3) { // PRE/POST_INC loads have 3 values. SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), NewLoad.getValue(2) }; CombineTo(Load, To, 3, true); } else { CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); } } // Fold the AND away, taking care not to fold to the old load node if we // replaced it. CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || (N0.getOpcode() == ISD::ANY_EXTEND && N0.getOperand(0).getOpcode() == ISD::LOAD))) { bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; LoadSDNode *LN0 = HasAnyExt ? cast(N0.getOperand(0)) : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { auto NarrowLoad = false; EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; EVT ExtVT, LoadedVT; if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, NarrowLoad)) { if (!NarrowLoad) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } else { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); SDValue NewPtr = LN0->getBasePtr(); // For big endian targets, we need to add an offset to the pointer // to load the correct bytes. For little endian systems, we merely // need to read fewer bytes from the same pointer. if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; SDLoc DL(LN0); NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } AddToWorklist(NewPtr.getNode()); SDValue Load = DAG.getExtLoad( ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, Alignment, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } } if (SDValue Combined = visitANDLike(N0, N1, N)) return Combined; // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) // and (sub 0, sext(bool X)), 1 --> zext(bool X) // // Note: the SimplifyDemandedBits fold below can make an information-losing // transform, and then we have no way to find this better fold. if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0)); SDValue SubRHS = N0.getOperand(1); if (SubLHS && SubLHS->isNullValue()) { if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) return SubRHS; if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); } } // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getScalarValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getScalarValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false)) return BSwap; } return SDValue(); } /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits) { if (!LegalOperations) return SDValue(); EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) bool LookPassAnd0 = false; bool LookPassAnd1 = false; if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) std::swap(N0, N1); if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() == ISD::AND) { if (!N0.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); if (!N01C || N01C->getZExtValue() != 0xFF00) return SDValue(); N0 = N0.getOperand(0); LookPassAnd0 = true; } if (N1.getOpcode() == ISD::AND) { if (!N1.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C || N11C->getZExtValue() != 0xFF) return SDValue(); N1 = N1.getOperand(0); LookPassAnd1 = true; } if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N01C || !N11C) return SDValue(); if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) return SDValue(); // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) SDValue N00 = N0->getOperand(0); if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { if (!N00.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N001C = dyn_cast(N00.getOperand(1)); if (!N001C || N001C->getZExtValue() != 0xFF) return SDValue(); N00 = N00.getOperand(0); LookPassAnd0 = true; } SDValue N10 = N1->getOperand(0); if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { if (!N10.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast(N10.getOperand(1)); if (!N101C || N101C->getZExtValue() != 0xFF00) return SDValue(); N10 = N10.getOperand(0); LookPassAnd1 = true; } if (N00 != N10) return SDValue(); // Make sure everything beyond the low halfword gets set to zero since the SRL // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); if (DemandHighBits && OpSizeInBits > 16) { // If the left-shift isn't masked out then the only way this is a bswap is // if all bits beyond the low 8 are 0. In that case the entire pattern // reduces to a left shift anyway: leave it for other parts of the combiner. if (!LookPassAnd0) return SDValue(); // However, if the right shift isn't masked out then it might be because // it's not needed. See if we can spot that too. if (!LookPassAnd1 && !DAG.MaskedValueIsZero( N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) return SDValue(); } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) { SDLoc DL(N); Res = DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(OpSizeInBits - 16, DL, getShiftAmountTy(VT))); } return Res; } /// Return true if the specified node is an element that makes up a 32-bit /// packed halfword byteswap. /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { if (!N.getNode()->hasOneUse()) return false; unsigned Opc = N.getOpcode(); if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); if (!N1C) return false; unsigned Num; switch (N1C->getZExtValue()) { default: return false; case 0xFF: Num = 0; break; case 0xFF00: Num = 1; break; case 0xFF0000: Num = 2; break; case 0xFF000000: Num = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { if (Num == 0 || Num == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 if (N0.getOpcode() != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 if (N0.getOpcode() != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 if (Num != 0 && Num != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 if (Num != 1 && Num != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } if (Parts[Num]) return false; Parts[Num] = N0.getOperand(0).getNode(); return true; } /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) /// => (rotl (bswap x), 16) SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!LegalOperations) return SDValue(); EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) if (N0.getOpcode() != ISD::OR) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDNode *Parts[4] = {}; if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) SDValue N000 = N00.getOperand(0); if (!isBSwapHWordElement(N000, Parts)) return SDValue(); SDValue N001 = N00.getOperand(1); if (!isBSwapHWordElement(N001, Parts)) return SDValue(); SDValue N010 = N01.getOperand(0); if (!isBSwapHWordElement(N010, Parts)) return SDValue(); SDValue N011 = N01.getOperand(1); if (!isBSwapHWordElement(N011, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); if (!isBSwapHWordElement(N01, Parts)) return SDValue(); if (N00.getOpcode() != ISD::OR) return SDValue(); SDValue N000 = N00.getOperand(0); if (!isBSwapHWordElement(N000, Parts)) return SDValue(); SDValue N001 = N00.getOperand(1); if (!isBSwapHWordElement(N001, Parts)) return SDValue(); } // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); SDLoc DL(N); SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, SDValue(Parts[0], 0)); // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); } /// This contains all DAGCombine rules which reduce two values combined by /// an Or operation to a single value \see visitANDLike(). SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N1.getValueType(); // fold (or x, undef) -> -1 if (!LegalOperations && (N0.isUndef() || N1.isUndef())) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), SDLoc(LocReference), VT); } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast(CC0)->get(); ISD::CondCode Op1 = cast(CC1)->get(); if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { EVT CCVT = getSetCCResultType(LR.getValueType()); if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); } } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { EVT CCVT = getSetCCResultType(LR.getValueType()); if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); } } } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); std::swap(RL, RR); } if (LL == RL && LR == RR) { bool isInteger = LL.getValueType().isInteger(); ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { EVT CCVT = getSetCCResultType(LL.getValueType()); if (N0.getValueType() == CCVT || (!LegalOperations && N0.getValueType() == MVT::i1)) return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), LL, LR, Result); } } } // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && // Don't increase # computations. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { // We can only do this xform if we know that bits from X that are set in C2 // but not in C1 are already zero. Likewise for Y. if (const ConstantSDNode *N0O1C = getAsNonOpaqueConstant(N0.getOperand(1))) { if (const ConstantSDNode *N1O1C = getAsNonOpaqueConstant(N1.getOperand(1))) { // We can only do this xform if we know that bits from X that are set in // C2 but not in C1 are already zero. Likewise for Y. const APInt &LHSMask = N0O1C->getAPIntValue(); const APInt &RHSMask = N1O1C->getAPIntValue(); if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1.getOperand(0)); SDLoc DL(LocReference); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(LHSMask | RHSMask, DL, VT)); } } } } // (or (and X, M), (and X, N)) -> (and X, (or M, N)) if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && N0.getOperand(0) == N1.getOperand(0) && // Don't increase # computations. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(1), N1.getOperand(1)); return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); } return SDValue(); } SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); // x | x --> x if (N0 == N1) return N0; // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (or x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) return N1; if (ISD::isBuildVectorAllZeros(N1.getNode())) return N0; // fold (or x, -1) -> -1, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) // do not return N0, because undef node may exist in N0 return DAG.getConstant( APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. if (isa(N0) && isa(N1) && // Avoid folding a node with illegal type. TLI.isTypeLegal(VT)) { bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); // Ensure both shuffles have a zero input. if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); const ShuffleVectorSDNode *SV0 = cast(N0); const ShuffleVectorSDNode *SV1 = cast(N1); bool CanFold = true; int NumElts = VT.getVectorNumElements(); SmallVector Mask(NumElts); for (int i = 0; i != NumElts; ++i) { int M0 = SV0->getMaskElt(i); int M1 = SV1->getMaskElt(i); // Determine if either index is pointing to a zero vector. bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); // If one element is zero and the otherside is undef, keep undef. // This also handles the case that both are undef. if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { Mask[i] = -1; continue; } // Make sure only one of the elements is zero. if (M0Zero == M1Zero) { CanFold = false; break; } assert((M0 >= 0 || M1 >= 0) && "Undef index!"); // We have a zero and non-zero element. If the non-zero came from // SV0 make the index a LHS index. If it came from SV1, make it // a RHS index. We need to mod by NumElts because we don't care // which operand it came from in the original shuffles. Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; } if (CanFold) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); if (!LegalMask) { std::swap(NewLHS, NewRHS); ShuffleVectorSDNode::commuteMask(Mask); LegalMask = TLI.isShuffleMaskLegal(Mask, VT); } if (LegalMask) return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); } } } } // fold (or c1, c2) -> c1|c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (isNullConstant(N1)) return N0; // fold (or x, -1) -> -1 if (isAllOnesConstant(N1)) return N1; // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; if (SDValue Combined = visitORLike(N0, N1, N)) return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; // reassociate or if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) { ConstantSDNode *C1 = cast(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1)) return DAG.getNode( ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); return SDValue(); } } // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); // Simplify the operands using demanded-bits information. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } /// Match "(X shl/srl V1) & V2" where V2 may not be present. bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { return false; } } if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { Shift = Op; return true; } return false; } // Return true if we can prove that, whenever Neg and Pos are both in the // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). // // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check // for the stronger condition: // // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] // // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) // we can just replace Neg with Neg' for the rest of the function. // // In other cases we check for the even stronger condition: // // Neg == EltSize - Pos [B] // // for all Neg and Pos. Note that the (or ...) then invokes undefined // behavior if Pos == 0 (and consequently Neg == EltSize). // // We could actually use [A] whenever EltSize is a power of 2, but the // only extra cases that it would match are those uninteresting ones // where Neg and Pos are never in range at the same time. E.g. for // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) // as well as (sub 32, Pos), but: // // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) // // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. unsigned MaskLoBits = 0; if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { if (NegC->getAPIntValue() == EltSize - 1) { Neg = Neg.getOperand(0); MaskLoBits = Log2_64(EltSize); } } } // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) return false; ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) return false; SDValue NegOp1 = Neg.getOperand(1); // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. if (MaskLoBits && Pos.getOpcode() == ISD::AND) if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) if (PosC->getAPIntValue() == EltSize - 1) Pos = Pos.getOperand(0); // The condition we need is now: // // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask // // If NegOp1 == Pos then we need: // // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). APInt Width; if (Pos == NegOp1) Width = NegC->getAPIntValue(); // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. // Then the condition we want to prove becomes: // // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask // // which, again because "x & Mask" is a truncation, becomes: // // NegC & Mask == (EltSize - PosC) & Mask // EltSize & Mask == (NegC + PosC) & Mask else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) Width = PosC->getAPIntValue() + NegC->getAPIntValue(); else return false; } else return false; // Now we just need to check that EltSize & Mask == Width & Mask. if (MaskLoBits) // EltSize & Mask is 0 since Mask is EltSize - 1. return Width.getLoBits(MaskLoBits) == 0; return Width == EltSize; } // A subroutine of MatchRotate used once we have found an OR of two opposite // shifts of Shifted. If Neg == - Pos then the OR reduces // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) // // fold (or (shl x, (*ext (sub 32, y))), // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); } return nullptr; } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) return nullptr; // Not part of a rotate. SDValue RHSShift; // The shift. SDValue RHSMask; // AND value if any. if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) return nullptr; // Not part of a rotate. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) return nullptr; // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) return nullptr; // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); std::swap(LHSShift, RHSShift); std::swap(LHSMask, RHSMask); } unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); if ((LShVal + RShVal) != EltSizeInBits) return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); SDValue Mask = DAG.getConstant(AllBits, DL, VT); if (LHSMask.getNode()) { APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, DAG.getNode(ISD::OR, DL, VT, LHSMask, DAG.getConstant(RHSBits, DL, VT))); } if (RHSMask.getNode()) { APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); Mask = DAG.getNode(ISD::AND, DL, VT, Mask, DAG.getNode(ISD::OR, DL, VT, RHSMask, DAG.getConstant(LHSBits, DL, VT))); } Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } return Rot.getNode(); } // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) return nullptr; // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { LExtOp0 = LHSShiftAmt.getOperand(0); RExtOp0 = RHSShiftAmt.getOperand(0); } SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; return nullptr; } namespace { /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need /// to store whether there was a sign extension involved in the index /// computation. /// (load (i64 add (i64 copyfromreg %c) /// (i64 signextend (add (i8 load %index) /// (i8 1)))) /// vs /// /// (load (i64 add (i64 copyfromreg %c) /// (i64 signextend (i32 add (i32 signextend (i8 load %index)) /// (i32 1))))) struct BaseIndexOffset { SDValue Base; SDValue Index; int64_t Offset; bool IsIndexSignExt; BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, bool IsIndexSignExt) : Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} bool equalBaseIndex(const BaseIndexOffset &Other) { return Other.Base == Base && Other.Index == Index && Other.IsIndexSignExt == IsIndexSignExt; } /// Parses tree in Ptr for base, index, offset addresses. static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, int64_t PartialOffset = 0) { bool IsIndexSignExt = false; // Split up a folded GlobalAddress+Offset into its component parts. if (GlobalAddressSDNode *GA = dyn_cast(Ptr)) if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), SDLoc(GA), GA->getValueType(0), /*Offset=*/PartialOffset, /*isTargetGA=*/false, GA->getTargetFlags()), SDValue(), GA->getOffset(), IsIndexSignExt); } // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // We know that we have at least an ADD instruction. Try to pattern match // the simple case of BASE + OFFSET. if (isa(Ptr->getOperand(1))) { int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); } // Inside a loop the current BASE pointer is calculated using an ADD and a // MUL instruction. In this case Ptr is the actual BASE pointer. // (i64 add (i64 %array_ptr) // (i64 mul (i64 %induction_var) // (i64 %element_size))) if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); // Skip signextends. if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { IndexOffset = IndexOffset->getOperand(0); IsIndexSignExt = true; } // Either the case of Base + Index (no offset) or something else. if (IndexOffset->getOpcode() != ISD::ADD) return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); // Now we have the case of Base + Index + offset. SDValue Index = IndexOffset->getOperand(0); SDValue Offset = IndexOffset->getOperand(1); if (!isa(Offset)) return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Ignore signextends. if (Index->getOpcode() == ISD::SIGN_EXTEND) { Index = Index->getOperand(0); IsIndexSignExt = true; } else IsIndexSignExt = false; int64_t Off = cast(Offset)->getSExtValue(); return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); } }; } // namespace SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (xor x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) return N1; if (ISD::isBuildVectorAllZeros(N1.getNode())) return N0; } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; // fold (xor c1, c2) -> c1^c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; // reassociate xor if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) return RXOR; // fold !(x cc y) -> (x !cc y) SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), isInt); if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { switch (N0.getOpcode()) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); } } } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); SDLoc DL(N0); V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, DAG.getConstant(1, DL, V.getValueType())); AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa(RHS) || isa(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } // fold (xor (and x, y), y) -> (and (not x), y) if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0->getOperand(1) == N1) { SDValue X = N0->getOperand(0); SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); AddToWorklist(NotX.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { SDLoc DL(N); return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ N00C->getAPIntValue(), DL, VT)); } if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ N01C->getAPIntValue(), DL, VT)); } } // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (xor (shl 1, x), -1) -> (rotl ~1, x) // Here is a concrete example of this equivalence: // i16 x == 14 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 // // => // // i16 ~1 == 0b1111111111111110 // i16 rol(~1, 14) == 0b1011111111111111 // // Some additional tips to help conceptualize this transform: // - Try to see the operation as placing a single zero in a value of all ones. // - There exists no value for x which would allow the result to contain zero. // - Values of x larger than the bitwidth are undefined and do not require a // consistent result. // - Pushing the zero left requires shifting one bits in from the right. // A rotate left of ~1 is a nice way of achieving the desired result. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { SDLoc DL(N); return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), N0.getOperand(1)); } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; // Simplify the expression using non-local knowledge. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } /// Handle transforms common to the three shifts, when the shift amount is a /// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. bool HighBitSet = false; // Can we transform this if the high bit is set? switch (LHS->getOpcode()) { default: return SDValue(); case ISD::OR: case ISD::XOR: HighBitSet = false; // We can only transform sra if the high bit is clear. break; case ISD::AND: HighBitSet = true; // We can only transform sra if the high bit is set. break; case ISD::ADD: if (N->getOpcode() != ISD::SHL) return SDValue(); // only shl(add) not sr[al](add). HighBitSet = false; // We can only transform sra if the high bit is clear. break; } // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select.Enable this in other cases when figure out it's exactly profitable. SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || BinOpLHSVal->getOpcode() == ISD::SRA || BinOpLHSVal->getOpcode() == ISD::SRL; bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || BinOpLHSVal->getOpcode() == ISD::SELECT; if ((!isShift || !isa(BinOpLHSVal->getOperand(1))) && !isCopyOrSelect) return SDValue(); if (isCopyOrSelect && N->hasOneUse()) return SDValue(); EVT VT = N->getValueType(0); // If this is a signed shift right, and the high bit is modified by the // logical operation, do not perform the transformation. The highBitSet // boolean indicates the value of the high bit of the constant which would // cause it to be modified for this operation. if (N->getOpcode() == ISD::SRA) { bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); if (BinOpRHSSignSet != HighBitSet) return SDValue(); } if (!TLI.isDesirableToCommuteWithShift(LHS)) return SDValue(); // Fold the constants, shifting the binop RHS by the shift amount. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); assert(isa(NewRHS) && "Folding was not successful!"); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(0)), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { assert(N->getOpcode() == ISD::TRUNCATE); assert(N->getOperand(0).getOpcode() == ISD::AND); // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { SDValue N01 = N->getOperand(0).getOperand(1); if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { SDLoc DL(N); EVT TruncVT = N->getValueType(0); SDValue N00 = N->getOperand(0).getOperand(0); SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); AddToWorklist(Trunc00.getNode()); AddToWorklist(Trunc01.getNode()); return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); } } return SDValue(); } SDValue DAGCombiner::visitRotate(SDNode *N) { // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode())) return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), N->getOperand(0), NewOp1); } return SDValue(); } SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast(N1); // If setcc produces all-one true value then: // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<isConstant()) { if (N0.getOpcode() == ISD::AND) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); BuildVectorSDNode *N01CV = dyn_cast(N01); if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } } } ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<isOpaque()) return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 if (isNullConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // fold (shl undef, x) -> 0 if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SHL) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); APInt c1 = N0C1->getAPIntValue(); APInt c2 = N1C->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); APInt Sum = c1 + c2; if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); return DAG.getNode( ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) // For this to be valid, the second form must not preserve any of the bits // that are shifted out by the inner shift in the first form. This means // the outer shift size must be >= the number of bits added by the ext. // As a corollary, we don't care what kind of ext it is. if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) && N0.getOperand(0).getOpcode() == ISD::SHL) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { APInt c1 = N0Op0C1->getAPIntValue(); APInt c2 = N1C->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); if (c2.uge(OpSizeInBits - InnerShiftSize)) { SDLoc DL(N0); APInt Sum = c1 + c2; if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); return DAG.getNode( ISD::SHL, DL, VT, DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } } // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::SRL) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { uint64_t c1 = N0Op0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); EVT CountVT = NewOp0.getOperand(1).getValueType(); SDLoc DL(N); SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), NewOp0, DAG.getConstant(c2, DL, CountVT)); AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } } } } // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && cast(N0)->Flags.hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t C1 = N0C1->getZExtValue(); uint64_t C2 = N1C->getZExtValue(); SDLoc DL(N); if (C1 <= C2) return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(C2 - C1, DL, N1.getValueType())); return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), DAG.getConstant(C1 - C2, DL, N1.getValueType())); } } // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N0C1->getZExtValue(); if (c1 < OpSizeInBits) { uint64_t c2 = N1C->getZExtValue(); APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2 - c1); SDLoc DL(N); Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { Mask = Mask.lshr(c1 - c2); SDLoc DL(N); Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), DAG.getConstant(c1 - c2, DL, N1.getValueType())); } SDLoc DL(N0); return DAG.getNode(ISD::AND, DL, VT, Shift, DAG.getConstant(Mask, DL, VT)); } } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && isConstantOrConstantVector(N1, /* No Opaques */ true)) { unsigned BitSize = VT.getScalarSizeInBits(); SDLoc DL(N); SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT); SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); AddToWorklist(Shl0.getNode()); AddToWorklist(Shl1.getNode()); return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); if (isConstantOrConstantVector(Shl)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } if (N1C && !N1C->isOpaque()) if (SDValue NewSHL = visitShiftByConstant(N, N1C)) return NewSHL; return SDValue(); } SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // Arithmetic shifting an all-sign-bit value is a no-op. if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (sra c1, c2) -> (sra c1, c2) ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra 0, x) -> 0 if (isNullConstant(N0)) return N0; // fold (sra -1, x) -> -1 if (isAllOnesConstant(N0)) return N0; // fold (sra x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if (VT.isVector()) ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, VT.getVectorNumElements()); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); APInt c1 = N0C1->getAPIntValue(); APInt c2 = N1C->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); APInt Sum = c1 + c2; if (Sum.uge(OpSizeInBits)) Sum = APInt(OpSizeInBits, OpSizeInBits - 1); return DAG.getNode( ISD::SRA, DL, VT, N0.getOperand(0), DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } // fold (sra (shl X, m), (sub result_size, n)) // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for // result_size - n != m. // If truncate is free for the target sext(shl) is likely to result in better // code. if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constanst of the shifts, CN0 = m, CN = n. const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); if (N01C) { LLVMContext &Ctx = *DAG.getContext(); // Determine what the truncate's result bitsize and type would be. EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); if (VT.isVector()) TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); // Determine the residual right-shift amount. int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal // on that type, and the truncate to that type is both legal and free, // perform the transform. if ((ShiftAmt > 0) && TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { SDLoc DL(N); SDValue Amt = DAG.getConstant(ShiftAmt, DL, getShiftAmountTy(N0.getOperand(0).getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Amt); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Shift); return DAG.getNode(ISD::SIGN_EXTEND, DL, N->getValueType(0), Trunc); } } } // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); } // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOperand(1).hasOneUse() && N1C) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { unsigned LargeShiftVal = LargeShift->getZExtValue(); EVT LargeVT = N0Op0.getValueType(); if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { SDLoc DL(N); SDValue Amt = DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, getShiftAmountTy(N0Op0.getOperand(0).getValueType())); SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } // Simplify, based on bits shifted out of the LHS. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) if (SDValue NewSRA = visitShiftByConstant(N, N1C)) return NewSRA; return SDValue(); } SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (srl c1, c2) -> c1 >>u c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 if (isNullConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // if (srl x, c) is known to be zero, return 0 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); APInt c1 = N0C1->getAPIntValue(); APInt c2 = N1C->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); APInt Sum = c1 + c2; if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); return DAG.getNode( ISD::SRL, DL, VT, N0.getOperand(0), DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL && isa(N0.getOperand(0)->getOperand(1))) { uint64_t c1 = cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { SDLoc DL(N0); if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, DL, VT); return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(ISD::SRL, DL, InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, DL, ShiftCountVT))); } } // fold (srl (shl x, c), c) -> (and x, cst2) if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && isConstantOrConstantVector(N1, /* NoOpaques */ true)) { SDLoc DL(N); APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); SDValue Mask = DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1); AddToWorklist(Mask.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); unsigned BitSize = SmallVT.getScalarSizeInBits(); if (N1C->getZExtValue() >= BitSize) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); SDLoc DL0(N0); SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, DL0, getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), DAG.getConstant(Mask, DL, VT)); } } // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { APInt KnownZero, KnownOne; DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. APInt UnknownBits = ~KnownZero; if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. if ((UnknownBits & (UnknownBits - 1)) == 0) { // Okay, we know that only that the single bit specified by UnknownBits // could be set on input to the CTLZ node. If this bit is set, the SRL // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair // to an SRL/XOR pair, which is likely to simplify more. unsigned ShAmt = UnknownBits.countTrailingZeros(); SDValue Op = N0.getOperand(0); if (ShAmt) { SDLoc DL(N0); Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(ShAmt, DL, getShiftAmountTy(Op.getValueType()))); AddToWorklist(Op.getNode()); } SDLoc DL(N); return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT)); } } // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not // demanded. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); if (N1C && !N1C->isOpaque()) if (SDValue NewSRL = visitShiftByConstant(N, N1C)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: // // %a = ... // %b = and i32 %a, 2 // %c = srl i32 %b, 1 // brcond i32 %c ... // // into // // %a = ... // %b = and %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::BRCOND) AddToWorklist(Use); else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { // Also look pass the truncate. Use = *Use->use_begin(); if (Use->getOpcode() == ISD::BRCOND) AddToWorklist(Use); } } return SDValue(); } SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (bswap c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) return N0->getOperand(0); return SDValue(); } SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue N0 = N->getOperand(0); // fold (bitreverse (bitreverse x)) -> x if (N0.getOpcode() == ISD::BITREVERSE) return N0.getOperand(0); return SDValue(); } SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } /// \brief Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG) { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); switch (CC) { case ISD::SETOLT: case ISD::SETOLE: case ISD::SETLT: case ISD::SETLE: case ISD::SETULT: case ISD::SETULE: { unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; if (TLI.isOperationLegal(Opcode, VT)) return DAG.getNode(Opcode, DL, VT, LHS, RHS); return SDValue(); } case ISD::SETOGT: case ISD::SETOGE: case ISD::SETGT: case ISD::SETGE: case ISD::SETUGT: case ISD::SETUGE: { unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; if (TLI.isOperationLegal(Opcode, VT)) return DAG.getNode(Opcode, DL, VT, LHS, RHS); return SDValue(); } default: return SDValue(); } } // TODO: We should handle other cases of selecting between {-1,0,1} here. SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT CondVT = Cond.getValueType(); SDLoc DL(N); // fold (select Cond, 0, 1) -> (xor Cond, 1) // We can't do this reliably if integer based booleans have different contents // to floating point based booleans. This is because we can't tell whether we // have an integer-based boolean or a floating-point-based boolean unless we // can find the SETCC that produced it and inspect its operands. This is // fairly easy if C is the SETCC node, but it can potentially be // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. if (VT.isInteger() && (CondVT == MVT::i1 || (CondVT.isInteger() && TLI.getBooleanContents(false, true) == TargetLowering::ZeroOrOneBooleanContent && TLI.getBooleanContents(false, false) == TargetLowering::ZeroOrOneBooleanContent)) && isNullConstant(N1) && isOneConstant(N2)) { SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); if (VT.bitsEq(CondVT)) return NotCond; return DAG.getZExtOrTrunc(NotCond, DL, VT); } return SDValue(); } SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) return N1; if (const ConstantSDNode *N0C = dyn_cast(N0)) { // fold (select true, X, Y) -> X // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or C, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); if (SDValue V = foldSelectOfConstants(N)) return V; // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. if (VT0 == MVT::i1) { // The code in this block deals with the following 2 equivalences: // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) // The target can specify its preferred form with the // shouldNormalizeToSelectSequence() callback. However we always transform // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. bool normalizeToSequence = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, InnerSelect, N2); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, InnerSelect); } // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), N0, N1_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, N1_1, N2); } // Otherwise see if we can optimize the "and" to a better pattern. if (SDValue Combined = visitANDLike(N0, N1_0, N)) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, N1_1, N2); } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), N0, N2_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, N1, N2_2); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, N1, N2_2); } } } // select (xor Cond, 1), X, Y -> select Cond, Y, X if (VT0 == MVT::i1) { if (N0->getOpcode() == ISD::XOR) { if (auto *C = dyn_cast(N0->getOperand(1))) { SDValue Cond0 = N0->getOperand(0); if (C->isOne()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N2, N1); } } } // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { // select x, y (fcmp lt x, y) -> fminnum x, y // select x, y (fcmp gt x, y) -> fmaxnum x, y // // This is OK if we don't care about what happens if either operand is a // NaN. // // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. const TargetOptions &Options = DAG.getTarget().Options; if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { ISD::CondCode CC = cast(N0.getOperand(2))->get(); if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) return FMinMax; } if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); return SimplifySelect(SDLoc(N), N0, N1, N2); } return SDValue(); } static std::pair SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the inputs. SDValue Lo, Hi, LL, LH, RL, RH; std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); return std::make_pair(Lo, Hi); } // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); EVT VT = N->getValueType(0); int NumElems = VT.getVectorNumElements(); assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR); // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about // binary ones here. if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) return SDValue(); // We're sure we have an even number of elements due to the // concat_vectors we have as arguments to vselect. // Skip BV elements until we find one that's not an UNDEF // After we find an UNDEF element, keep looping until we get to half the // length of the BV and see if all the non-undef nodes are the same. ConstantSDNode *BottomHalf = nullptr; for (int i = 0; i < NumElems / 2; ++i) { if (Cond->getOperand(i)->isUndef()) continue; if (BottomHalf == nullptr) BottomHalf = cast(Cond.getOperand(i)); else if (Cond->getOperand(i).getNode() != BottomHalf) return SDValue(); } // Do the same for the second half of the BuildVector ConstantSDNode *TopHalf = nullptr; for (int i = NumElems / 2; i < NumElems; ++i) { if (Cond->getOperand(i)->isUndef()) continue; if (TopHalf == nullptr) TopHalf = cast(Cond.getOperand(i)); else if (Cond->getOperand(i).getNode() != TopHalf) return SDValue(); } assert(TopHalf && BottomHalf && "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function."); return DAG.getNode( ISD::CONCAT_VECTORS, DL, VT, BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (Level >= AfterLegalizeTypes) return SDValue(); MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); SDValue Data = MSC->getValue(); SDLoc DL(N); // If the MSCATTER data type requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (Mask.getOpcode() != ISD::SETCC) return SDValue(); // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != TargetLowering::TypeSplitVector) return SDValue(); SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); SDValue Chain = MSC->getChain(); EVT MemoryVT = MSC->getMemoryVT(); unsigned Alignment = MSC->getOriginalAlignment(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); SDValue BasePtr = MSC->getBasePtr(); SDValue IndexLo, IndexHi; std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MSC->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MSC->getAAInfo(), MSC->getRanges()); SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (Level >= AfterLegalizeTypes) return SDValue(); MaskedStoreSDNode *MST = dyn_cast(N); SDValue Mask = MST->getMask(); SDValue Data = MST->getValue(); EVT VT = Data.getValueType(); SDLoc DL(N); // If the MSTORE data type requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (Mask.getOpcode() == ISD::SETCC) { // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) return SDValue(); SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); SDValue Chain = MST->getChain(); SDValue Ptr = MST->getBasePtr(); EVT MemoryVT = MST->getMemoryVT(); unsigned Alignment = MST->getOriginalAlignment(); // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, MST->isTruncatingStore(), MST->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MST->isCompressingStore()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, MST->isTruncatingStore(), MST->isCompressingStore()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } return SDValue(); } SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (Level >= AfterLegalizeTypes) return SDValue(); MaskedGatherSDNode *MGT = dyn_cast(N); SDValue Mask = MGT->getMask(); SDLoc DL(N); // If the MGATHER result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (Mask.getOpcode() != ISD::SETCC) return SDValue(); EVT VT = N->getValueType(0); // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) return SDValue(); SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); SDValue Src0 = MGT->getValue(); SDValue Src0Lo, Src0Hi; std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); SDValue Chain = MGT->getChain(); EVT MemoryVT = MGT->getMemoryVT(); unsigned Alignment = MGT->getOriginalAlignment(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue BasePtr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue IndexLo, IndexHi; std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, MMO); SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, MMO); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); // Build a factor node to remember that this load is independent of the // other one. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalized the chain result - switch anything that used the old chain to // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); SDValue RetOps[] = { GatherRes, Chain }; return DAG.getMergeValues(RetOps, DL); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (Level >= AfterLegalizeTypes) return SDValue(); MaskedLoadSDNode *MLD = dyn_cast(N); SDValue Mask = MLD->getMask(); SDLoc DL(N); // If the MLOAD result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (Mask.getOpcode() == ISD::SETCC) { EVT VT = N->getValueType(0); // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) return SDValue(); SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); SDValue Src0 = MLD->getSrc0(); SDValue Src0Lo, Src0Hi; std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Chain = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); EVT MemoryVT = MLD->getMemoryVT(); unsigned Alignment = MLD->getOriginalAlignment(); // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? Alignment/2 : Alignment; EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MLD->isExpandingLoad()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); // Build a factor node to remember that this load is independent of the // other one. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalized the chain result - switch anything that used the old chain to // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); SDValue RetOps[] = { LoadRes, Chain }; return DAG.getMergeValues(RetOps, DL); } return SDValue(); } SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); SDLoc DL(N); // fold (vselect C, X, X) -> X if (N1 == N2) return N1; // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> // vselect (setgt X, -1), X, -X -> // vselect (setl[te] X, 0), -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) if (N0.getOpcode() == ISD::SETCC) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); ISD::CondCode CC = cast(N0.getOperand(2))->get(); bool isAbs = false; bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } } if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. // If the VSELECT result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (N0.getOpcode() == ISD::SETCC) { EVT VT = N->getValueType(0); // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) return SDValue(); SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); // Add the new VSELECT nodes to the work list in case they need to be split // again. AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } // Fold (vselect (build_vector all_ones), N1, N2) -> N1 if (ISD::isBuildVectorAllOnes(N0.getNode())) return N1; // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 if (ISD::isBuildVectorAllZeros(N0.getNode())) return N2; // The ConvertSelectToConcatVector function is assuming both the above // checks for (vselect (build_vector all{ones,zeros) ...) have been made // and addressed. if (N1.getOpcode() == ISD::CONCAT_VECTORS && N2.getOpcode() == ISD::CONCAT_VECTORS && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) return CV; } return SDValue(); } SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); SDValue N3 = N->getOperand(3); SDValue N4 = N->getOperand(4); ISD::CondCode CC = cast(N4)->get(); // fold select_cc lhs, rhs, x, x, cc -> x if (N2 == N3) return N2; // Determine if the condition we're dealing with is constant if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast(SCC.getNode())) { if (!SCCC->isNullValue()) return N2; // cond always true -> true val else return N3; // cond always false -> false val } else if (SCC->isUndef()) { // When the condition is UNDEF, just return the first operand. This is // coherent the DAG creation, no setcc node is created in this case return N2; } else if (SCC.getOpcode() == ISD::SETCC) { // Fold to a simpler select_cc return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); } } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), cast(N->getOperand(2))->get(), SDLoc(N)); } SDValue DAGCombiner::visitSETCCE(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Carry = N->getOperand(2); SDValue Cond = N->getOperand(3); // If Carry is false, fold to a regular SETCC. if (Carry.getOpcode() == ISD::CARRY_FALSE) return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); return SDValue(); } /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). /// Vector extends are not folded if operations are legal; this is to /// avoid introducing illegal build_vector dag nodes. static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes, bool LegalOperations) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 // fold (aext c1) -> c1 if (isa(N0)) return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) EVT SVT = VT.getScalarType(); if (!(VT.isVector() && (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) return nullptr; // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); SmallVector Elts; unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); for (unsigned i=0; i != NumElts; ++i) { SDValue Op = N0->getOperand(i); if (Op->isUndef()) { Elts.push_back(DAG.getUNDEF(SVT)); continue; } SDLoc DL(Op); // Get the constant value and if needed trunc it to the size of the type. // Nodes like build_vector might have constants wider than the scalar type. APInt C = cast(Op)->getAPIntValue().zextOrTrunc(EVTBits); if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); else Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } return DAG.getBuildVector(VT, DL, Elts).getNode(); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); for (SDNode::use_iterator UI = N0.getNode()->use_begin(), UE = N0.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User == N) continue; if (UI.getUse().getResNo() != N0.getResNo()) continue; // FIXME: Only extend SETCC N, N and SETCC N, c for now. if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(User->getOperand(2))->get(); if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) // Sign bits will be lost after a zext. return false; bool Add = false; for (unsigned i = 0; i != 2; ++i) { SDValue UseOp = User->getOperand(i); if (UseOp == N0) continue; if (!isa(UseOp)) return false; Add = true; } if (Add) ExtendNodes.push_back(User); continue; } // If truncates aren't free and there are users we can't // extend, it isn't worthwhile. if (!isTruncFree) return false; // Remember if this value is live-out. if (User->getOpcode() == ISD::CopyToReg) HasCopyToRegUses = true; } if (HasCopyToRegUses) { bool BothLiveOut = false; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { SDUse &Use = UI.getUse(); if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { BothLiveOut = true; break; } } if (BothLiveOut) // Both unextended and extended values are live out. There had better be // a good reason for the transformation. return ExtendNodes.size(); } return true; } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl &SetCCs, SDValue Trunc, SDValue ExtLoad, const SDLoc &DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { SDNode *SetCC = SetCCs[i]; SmallVector Ops; for (unsigned j = 0; j != 2; ++j) { SDValue SOp = SetCC->getOperand(j); if (SOp == Trunc) Ops.push_back(ExtLoad); else Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); } Ops.push_back(SetCC->getOperand(2)); CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); } } // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue N0 = N->getOperand(0); EVT DstVT = N->getValueType(0); EVT SrcVT = N0.getValueType(); assert((N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) && "Unexpected node type (not an extend)!"); // fold (sext (load x)) to multiple smaller sextloads; same for zext. // For example, on a target with legal v4i32, but illegal v8i32, turn: // (v8i32 (sext (v8i16 (load x)))) // into: // (v8i32 (concat_vectors (v4i32 (sextload x)), // (v4i32 (sextload (x + 16))))) // Where uses of the original load, i.e.: // (v8i16 (load x)) // are replaced with: // (v8i16 (truncate // (v8i32 (concat_vectors (v4i32 (sextload x)), // (v4i32 (sextload (x + 16))))))) // // This combine is only applicable to illegal, but splittable, vectors. // All legal types, and illegal non-vector types, are handled elsewhere. // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. // if (N0->getOpcode() != ISD::LOAD) return SDValue(); LoadSDNode *LN0 = cast(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector SetCCs; if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) return SDValue(); ISD::LoadExtType ExtType = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; // Try to split the vector types to get down to legal types. EVT SplitSrcVT = SrcVT; EVT SplitDstVT = DstVT; while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && SplitSrcVT.getVectorNumElements() > 1) { SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; } if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) return SDValue(); SDLoc DL(N); const unsigned NumSplits = DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); const unsigned Stride = SplitSrcVT.getStoreSize(); SmallVector Loads; SmallVector Chains; SDValue BasePtr = LN0->getBasePtr(); for (unsigned Idx = 0; Idx < NumSplits; Idx++) { const unsigned Offset = Idx * Stride; const unsigned Align = MinAlign(LN0->getAlignment(), Offset); SDValue SplitLoad = DAG.getExtLoad( ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, DL, BasePtr.getValueType())); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); } SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); CombineTo(N, NewValue); // Replace uses of the original load (before extension) // with a truncate of the concatenated sextloaded vectors. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); CombineTo(N0.getNode(), Trunc, NewChain); ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, (ISD::NodeType)N->getOpcode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, LegalOperations)) return SDValue(Res, 0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); unsigned OpBits = Op.getScalarValueSizeInBits(); unsigned MidBits = N0.getScalarValueSizeInBits(); unsigned DestBits = VT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign // bits, it is already ready. if (NumSignBits > DestBits-MidBits) return Op; } else if (OpBits < DestBits) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { if (OpBits < DestBits) Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, DAG.getValueType(N0.getValueType())); } } // fold (sext (load x)) -> (sext (truncate (sextload x))) // Only generate vector extloads when 1) they're legal, and 2) they are // deemed desirable by the target. if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !VT.isVector() && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (VT.isVector()) DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; // fold (sext (sextload x)) -> (sext (truncate (sextload x))) // fold (sext ( extload x)) -> (sext (truncate (sextload x))) if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (sext (and/or/xor (load x), cst)) -> // (and/or/xor (sextload x), (sext cst)) if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } if (N0.getOpcode() == ISD::SETCC) { EVT N0VT = N0.getOperand(0).getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && TLI.getBooleanContents(N0VT) == TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. EVT SVT = getSetCCResultType(N0VT); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); if (SVT == MatchingVectorType) { SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) // Here, T can be 1 or -1, depending on the type of the setcc and // getBooleanContents(). unsigned SetCCWidth = N0.getScalarValueSizeInBits(); SDLoc DL(N); // To determine the "true" side of the select, we need to know the high bit // of the value returned by the setcc if it evaluates to true. // If the type of the setcc is i1, then the true case of the select is just // sext(i1 1), that is, -1. // If the type of the setcc is larger (say, i8) then the value of the high // bit depends on getBooleanContents(). So, ask TLI for a real "true" value // of the appropriate width. SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT) : TLI.getConstTrueVal(DAG, VT, DL); if (SDValue SCC = SimplifySelectCC( DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal, DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true)) return SCC; if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { SDLoc DL(N); ISD::CondCode CC = cast(N0.getOperand(2))->get(); SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, DAG.getConstant(0, DL, VT)); } } } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); return SDValue(); } // isTruncateOf - If N is a truncate of some other value, return true, record // the value being truncated in Op and which of Op's bits are zero in KnownZero. // This function computes KnownZero to avoid a duplicated call to // computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero) { APInt KnownOne; if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); DAG.computeKnownBits(Op, KnownZero, KnownOne); return true; } if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || cast(N->getOperand(2))->get() != ISD::SETNE) return false; SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType()); if (isNullConstant(Op0)) Op = Op1; else if (isNullConstant(Op1)) Op = Op0; else return false; DAG.computeKnownBits(Op, KnownZero, KnownOne); if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) return false; return true; } SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, LegalOperations)) return SDValue(Res, 0); // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or // (zext (truncate x)) -> (truncate x) // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. SDValue Op; APInt KnownZero; if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { APInt TruncatedBits = (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? APInt(Op.getValueSizeInBits(), 0) : APInt::getBitsSet(Op.getValueSizeInBits(), N0.getValueSizeInBits(), std::min(Op.getValueSizeInBits(), VT.getSizeInBits())); if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); return Op; } } // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } EVT SrcVT = N0.getOperand(0).getValueType(); EVT MinVT = N0.getValueType(); // Try to mask before the extension to avoid having to generate a larger mask, // possibly over several sub-vectors. if (SrcVT.bitsLT(VT)) { if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { SDValue Op = N0.getOperand(0); Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); AddToWorklist(Op.getNode()); return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } } if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { SDValue Op = N0.getOperand(0); if (SrcVT.bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); AddToWorklist(Op.getNode()); } else if (SrcVT.bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); AddToWorklist(Op.getNode()); } return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } } // Fold (zext (and (trunc x), cst)) -> (and x, cst), // if either of the casts is not free. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType()) || !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); } else if (X.getValueType().bitsGT(VT)) { X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } // fold (zext (load x)) -> (zext (truncate (zextload x))) // Only generate vector extloads when 1) they're legal, and 2) they are // deemed desirable by the target. if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !VT.isVector() && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (VT.isVector()) DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) // Unless (and (load x) cst) will match as a zextload already and has // additional users. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) { if (N0.getOpcode() == ISD::AND) { auto *AndC = cast(N0.getOperand(1)); auto NarrowLoad = false; EVT LoadResultTy = AndC->getValueType(0); EVT ExtVT, LoadedVT; if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, NarrowLoad)) DoXform = false; } if (DoXform) DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI); } if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } // fold (zext (zextload x)) -> (zext (truncate (zextload x))) // fold (zext ( extload x)) -> (zext (truncate (zextload x))) if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } if (N0.getOpcode() == ISD::SETCC) { // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { EVT N00VT = N0.getOperand(0).getValueType(); if (getSetCCResultType(N00VT) == N0.getValueType()) return SDValue(); // We know that the # elements of the results is the same as the # // elements of the compare (and the # elements of the compare result for // that matter). Check to see that they are the same size. If so, we know // that the element size of the sext'd result matches the element size of // the compare operands. SDLoc DL(N); SDValue VecOnes = DAG.getConstant(1, DL, VT); if (VT.getSizeInBits() == N00VT.getSizeInBits()) { // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend. EVT MatchingElementType = EVT::getIntegerVT( *DAG.getContext(), N00VT.getScalarSizeInBits()); EVT MatchingVectorType = EVT::getVectorVT( *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); SDValue VsetCC = DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), VecOnes); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); if (SDValue SCC = SimplifySelectCC( DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true)) return SCC; } // (zext (shl (zext x), cst)) -> (shl (zext x), cst) if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && isa(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) { SDValue ShAmt = N0.getOperand(1); unsigned ShAmtVal = cast(ShAmt)->getZExtValue(); if (N0.getOpcode() == ISD::SHL) { SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - InnerZExt.getOperand(0).getValueSizeInBits(); if (ShAmtVal > KnownZeroBits) return SDValue(); } SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); return DAG.getNode(N0.getOpcode(), DL, VT, DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), ShAmt); } return SDValue(); } SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, LegalOperations)) return SDValue(Res, 0); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (aext (truncate x)) if (N0.getOpcode() == ISD::TRUNCATE) { SDValue TruncOp = N0.getOperand(0); if (TruncOp.getValueType() == VT) return TruncOp; // x iff x size == zext size. if (TruncOp.getValueType().bitsGT(VT)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); } // Fold (aext (and (trunc x), cst)) -> (and x, cst) // if the trunc is not free. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::TRUNCATE && N0.getOperand(1).getOpcode() == ISD::Constant && !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType())) { SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); } else if (X.getValueType().bitsGT(VT)) { X = DAG.getNode(ISD::TRUNCATE, DL, VT, X); } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } // fold (aext (zextload x)) -> (aext (truncate (zextload x))) // fold (aext (sextload x)) -> (aext (truncate (sextload x))) // fold (aext ( extload x)) -> (aext (truncate (extload x))) if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } if (N0.getOpcode() == ISD::SETCC) { // For vectors: // aext(setcc) -> vsetcc // aext(setcc) -> truncate(vsetcc) // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/any extend else { EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); if (SDValue SCC = SimplifySelectCC( DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true)) return SCC; } return SDValue(); } /// See if the specified operand can be simplified with the knowledge that only /// the bits specified by Mask are used. If so, return the simpler operand, /// otherwise return a null SDValue. SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; case ISD::Constant: { const ConstantSDNode *CV = cast(V.getNode()); assert(CV && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); break; } case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) return V.getOperand(1); if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) return V.getOperand(0); break; case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); // Watch out for shift count overflow though. if (Amt >= Mask.getBitWidth()) break; APInt NewMask = Mask << Amt; if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } } return SDValue(); } /// If the result of a wider load is shifted to right of N bits and then /// truncated to a narrower type and where N is a multiple of number of bits of /// the narrower type, transform it to a narrower load from address + N / num of /// bits of new type. If the result is to be extended, also fold the extension /// to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT ExtVT = VT; // This transformation isn't valid for vector loads. if (VT.isVector()) return SDValue(); // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast(N->getOperand(1))->getVT(); } else if (Opc == ISD::SRL) { // Another special-case: SRL is basically zero-extending a narrower value. ExtType = ISD::ZEXTLOAD; N0 = SDValue(N, 0); ConstantSDNode *N01 = dyn_cast(N0.getOperand(1)); if (!N01) return SDValue(); ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!ExtVT.isRound()) return SDValue(); unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); // Is the load width a multiple of size of VT? if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } // At this point, we must have a load or else we can't do the transform. if (!isa(N0)) return SDValue(); // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload // lowering of SRL and an sextload. if (cast(N0)->getExtensionType() == ISD::SEXTLOAD) return SDValue(); // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits()) return SDValue(); } } // If the load is shifted left (and the result isn't shifted back right), // we can fold the truncate through the shift. unsigned ShLeftAmt = 0; if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShLeftAmt = N01->getZExtValue(); N0 = N0.getOperand(0); } } // If we haven't found a load, we can't narrow it. Don't transform one with // multiple uses, this would require adding a new load. if (!isa(N0) || !N0.hasOneUse()) return SDValue(); // Don't change the width of a volatile load. LoadSDNode *LN0 = cast(N0); if (LN0->isVolatile()) return SDValue(); // Verify that we are actually reducing a load width here. if (LN0->getMemoryVT().getSizeInBits() < EVTBits) return SDValue(); // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (LN0->getNumValues() > 2) return SDValue(); // If the load that we're shrinking is an extload and we're not just // discarding the extension we can't simply shrink the load. Bail. // TODO: It would be possible to merge the extensions in some cases. if (LN0->getExtensionType() != ISD::NON_EXTLOAD && LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) return SDValue(); if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) return SDValue(); EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) // It's not possible to generate a constant of extended or untyped type. return SDValue(); // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, DL, PtrType), &Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; if (ShLeftAmt != 0) { EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) ShImmTy = VT; // If the shift amount is as large as the result size (but, presumably, // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, DL, VT); else Result = DAG.getNode(ISD::SHL, DL, VT, Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } // Return the new loaded value. return Result; } SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT EVT = cast(N1)->getVT(); unsigned VTBits = VT.getScalarSizeInBits(); unsigned EVTBits = EVT.getScalarSizeInBits(); if (N0.isUndef()) return DAG.getUNDEF(VT); // fold (sext_in_reg c1) -> c1 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && EVT.bitsLT(cast(N0.getOperand(1))->getVT())) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getScalarValueSizeInBits() <= EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg (zext x)) -> (sext x) // iff we are extending the source sign bit. if (N0.getOpcode() == ISD::ZERO_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getScalarValueSizeInBits() == EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *ShAmt = dyn_cast(N0.getOperand(1))) if (ShAmt->getZExtValue()+EVTBits <= VTBits) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } } // fold (sext_inreg (extload x)) -> (sextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast(N0)->getMemoryVT() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), EVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast(N0)->getMemoryVT() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), EVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } return SDValue(); } SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.isUndef()) return DAG.getUNDEF(VT); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, LegalOperations)) return SDValue(Res, 0); return SDValue(); } SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.isUndef()) return DAG.getUNDEF(VT); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, LegalOperations)) return SDValue(Res, 0); return SDValue(); } SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { // if the source is smaller than the dest, we still need an extend. if (N0.getOperand(0).getValueType().bitsLT(VT)) return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); } // If this is anyext(trunc), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); // Fold extract-and-trunc into a narrow extract. For example: // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) // i32 y = TRUNCATE(i64 x) // -- becomes -- // v16i8 b = BITCAST (v2i64 val) // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) // // Note: We only run this optimization after type legalization (which often // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); EVT TrTy = N->getValueType(0); unsigned NumElem = VecTy.getVectorNumElements(); unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); SDValue EltNo = N0->getOperand(1); if (isa(EltNo) && isTypeLegal(NVT)) { int Elt = cast(EltNo)->getZExtValue(); EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, DAG.getBitcast(NVT, N0.getOperand(0)), DAG.getConstant(Index, DL, IndexTy)); } } // trunc (select c, a, b) -> select c, (trunc a), (trunc b) if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { EVT SrcVT = N0.getValueType(); if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && TLI.isTruncateFree(SrcVT, VT)) { SDLoc SL(N0); SDValue Cond = N0.getOperand(0); SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); } } // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { uint64_t Amt = CAmt->getZExtValue(); unsigned Size = VT.getScalarSizeInBits(); if (Amt < Size) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SHL, SL, VT, Trunc, DAG.getConstant(Amt, SL, AmtVT)); } } } // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to // (2xi32 (buildvector x, y)). if (Level == AfterLegalizeVectorOps && VT.isVector() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && N0.getOperand(0).hasOneUse()) { SDValue BuildVect = N0.getOperand(0); EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); EVT TruncVecEltTy = VT.getVectorElementType(); // Check that the element types match. if (BuildVectEltTy == TruncVecEltTy) { // Now we only need to compute the offset of the truncated elements. unsigned BuildVecNumElts = BuildVect.getNumOperands(); unsigned TruncVecNumElts = VT.getVectorNumElements(); unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; assert((BuildVecNumElts % TruncVecNumElts) == 0 && "Invalid number of elements"); SmallVector Opnds; for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); return DAG.getBuildVector(VT, SDLoc(N), Opnds); } } // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { if (SDValue Shorter = GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()))) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { if (SDValue Reduced = ReduceLoadWidth(N)) return Reduced; // Handle the case where the load remains an extending load even // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); if (!LN0->isVolatile() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); return NewLoad; } } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { SmallVector VTs; SDValue V; unsigned Idx = 0; unsigned NumDefs = 0; for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { SDValue X = N0.getOperand(i); if (!X.isUndef()) { V = X; Idx = i; NumDefs++; } // Stop if more than one members are non-undef. if (NumDefs > 1) break; VTs.push_back(EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), X.getValueType().getVectorNumElements())); } if (NumDefs == 0) return DAG.getUNDEF(VT); if (NumDefs == 1) { assert(V.getNode() && "The single defined operand is empty!"); SmallVector Opnds; for (unsigned i = 0, e = VTs.size(); i != e; ++i) { if (i != Idx) { Opnds.push_back(DAG.getUNDEF(VTs[i])); continue; } SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); AddToWorklist(NV.getNode()); Opnds.push_back(NV); } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); } } // Fold truncate of a bitcast of a vector to an extract of the low vector // element. // // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue VecSrc = N0.getOperand(0); EVT SrcVT = VecSrc.getValueType(); if (SrcVT.isVector() && SrcVT.getScalarType() == VT && (!LegalOperations || TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { SDLoc SL(N); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, DAG.getConstant(0, SL, IdxVT)); } } // Simplify the operands using demanded-bits information. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue Elt = N->getOperand(i); if (Elt.getOpcode() != ISD::MERGE_VALUES) return Elt.getNode(); return Elt.getOperand(Elt.getResNo()).getNode(); } /// build_pair (load, load) -> load /// if load locations are consecutive. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); LoadSDNode *LD1 = dyn_cast(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), Align); } return SDValue(); } static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi // and Lo parts; on big-endian machines it doesn't. return DAG.getDataLayout().isBigEndian() ? 1 : 0; } static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { // If this is not a bitcast to an FP type or if the target doesn't have // IEEE754-compliant FP logic, we're done. EVT VT = N->getValueType(0); if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) return SDValue(); // TODO: Use splat values for the constant-checking below and remove this // restriction. SDValue N0 = N->getOperand(0); EVT SourceVT = N0.getValueType(); if (SourceVT.isVector()) return SDValue(); unsigned FPOpcode; APInt SignMask; switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); break; // TODO: ISD::OR --> ISD::FNABS? default: return SDValue(); } // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X SDValue LogicOp0 = N0.getOperand(0); ConstantSDNode *LogicOp1 = dyn_cast(N0.getOperand(1)); if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && LogicOp0.getOpcode() == ISD::BITCAST && LogicOp0->getOperand(0).getValueType() == VT) return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); return SDValue(); } SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize, since afterward the target may be depending // on the bitconvert. // First check to see if this is all constant. if (!LegalTypes && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && VT.isVector()) { bool isSimple = cast(N0)->isConstant(); EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); } // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { // If we can't allow illegal operations, we need to check that this is just // a fp -> int or int -> conversion and that the resulting operation will // be legal. if (!LegalOperations || (isa(N0) && VT.isFloatingPoint() && !VT.isVector() && TLI.isOperationLegal(ISD::ConstantFP, VT)) || (isa(N0) && VT.isInteger() && !VT.isVector() && TLI.isOperationLegal(ISD::Constant, VT))) return DAG.getBitcast(VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) return DAG.getBitcast(VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile() && // Do not remove the cast if the types differ in endian layout. TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); unsigned OrigAlign = LN0->getAlignment(); bool Fast = false; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, LN0->getAddressSpace(), OrigAlign, &Fast) && Fast) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), OrigAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) return V; // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // // For ppc_fp128: // fold (bitcast (fneg x)) -> // flipbit = signbit // (xor (bitcast x) (build_pair flipbit, flipbit)) // // fold (bitcast (fabs x)) -> // flipbit = (and (extract_element (bitcast x), 0), signbit) // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); SDLoc DL(N); if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { assert(VT.getSizeInBits() == 128); SDValue SignBit = DAG.getConstant( APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); SDValue FlipBit; if (N0.getOpcode() == ISD::FNEG) { FlipBit = SignBit; AddToWorklist(FlipBit.getNode()); } else { assert(N0.getOpcode() == ISD::FABS); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), SDLoc(NewConv))); AddToWorklist(Hi.getNode()); FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); AddToWorklist(FlipBit.getNode()); } SDValue FlipBits = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, NewConv, DAG.getConstant(SignBit, DL, VT)); assert(N0.getOpcode() == ISD::FABS); return DAG.getNode(ISD::AND, DL, VT, NewConv, DAG.getConstant(~SignBit, DL, VT)); } // fold (bitconvert (fcopysign cst, x)) -> // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. // // For ppc_fp128: // fold (bitcast (fcopysign cst, x)) -> // flipbit = (and (extract_element // (xor (bitcast cst), (bitcast x)), 0), // signbit) // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); AddToWorklist(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. SDLoc DL(X); X = DAG.getNode(ISD::SRL, DL, X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, DL, X.getValueType())); AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorklist(X.getNode()); } if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); AddToWorklist(X.getNode()); SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); AddToWorklist(XorResult.getNode()); SDValue XorResult64 = DAG.getNode( ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), SDLoc(XorResult))); AddToWorklist(XorResult64.getNode()); SDValue FlipBit = DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); AddToWorklist(FlipBit.getNode()); SDValue FlipBits = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } } // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. if (N0.getOpcode() == ISD::BUILD_PAIR) if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of // float vectors bitcast to integer vectors) into shuffles. // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && N0->getOpcode() == ISD::VECTOR_SHUFFLE && VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { ShuffleVectorSDNode *SVN = cast(N0); // If operands are a bitcast, peek through if it casts the original VT. // If operands are a constant, just bitcast back to original VT. auto PeekThroughBitcast = [&](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) return DAG.getBitcast(VT, Op); return SDValue(); }; SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); if (!(SV0 && SV1)) return SDValue(); int MaskScale = VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); SmallVector NewMask; for (int M : SVN->getMask()) for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); if (!LegalMask) { std::swap(SV0, SV1); ShuffleVectorSDNode::commuteMask(NewMask); LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); } if (LegalMask) return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); } return SDValue(); } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); } /// We know that BV is a build_vector node with Constant, ConstantFP or Undef /// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. if (SrcEltVT == DstEltVT) return SDValue(BV, 0); unsigned SrcBitSize = SrcEltVT.getSizeInBits(); unsigned DstBitSize = DstEltVT.getSizeInBits(); // If this is a conversion of N elements of one type to N elements of another // type, convert each element. This handles FP<->INT cases. if (SrcBitSize == DstBitSize) { EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, BV->getValueType(0).getVectorNumElements()); // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, DAG.getBitcast(DstEltVT, BV->getOperand(0))); SmallVector Ops; for (SDValue Op : BV->op_values()) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); Ops.push_back(DAG.getBitcast(DstEltVT, Op)); AddToWorklist(Ops.back().getNode()); } return DAG.getBuildVector(VT, SDLoc(BV), Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to // handle annoying details of growing/shrinking FP values, we convert them to // int first. if (SrcEltVT.isFloatingPoint()) { // Convert the input float vector to a int vector where the elements are the // same sizes. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } // Now we know the input is an integer vector. If the output is a FP type, // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } SDLoc DL(BV); // Okay, we know the src/dst types are both integers of differing types. // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); if (SrcBitSize < DstBitSize) { unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; i += NumInputsPerOutput) { bool isLE = DAG.getDataLayout().isLittleEndian(); APInt NewBits = APInt(DstBitSize, 0); bool EltIsUndef = true; for (unsigned j = 0; j != NumInputsPerOutput; ++j) { // Shift the previously computed bits over. NewBits <<= SrcBitSize; SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); if (Op.isUndef()) continue; EltIsUndef = false; NewBits |= cast(Op)->getAPIntValue(). zextOrTrunc(SrcBitSize).zext(DstBitSize); } if (EltIsUndef) Ops.push_back(DAG.getUNDEF(DstEltVT)); else Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getBuildVector(VT, DL, Ops); } // Finally, this must be the case where we are shrinking elements: each input // turns into multiple outputs. unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, NumOutputsPerInput*BV->getNumOperands()); SmallVector Ops; for (const SDValue &Op : BV->op_values()) { if (Op.isUndef()) { Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } APInt OpVal = cast(Op)-> getAPIntValue().zextOrTrunc(SrcBitSize); for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); OpVal = OpVal.lshr(DstBitSize); } // For big endian targets, swap the order of the pieces of each element. if (DAG.getDataLayout().isBigEndian()) std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } return DAG.getBuildVector(VT, DL, Ops); } /// Try to perform FMA combining on a given FADD node. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; bool AllowFusion = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); ; if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (Aggressive && N0.getOpcode() == ISD::FMUL && N1.getOpcode() == ISD::FMUL) { if (N0.getNode()->use_size() > N1.getNode()->use_size()) std::swap(N0, N1); } // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), N1); } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && (Aggressive || N1->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), N0); } // Look through FP_EXTEND nodes to do more combining. if (AllowFusion && LookThroughFPExt) { // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FMUL) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1); } // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (N10.getOpcode() == ISD::FMUL) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0); } } // More folding opportunities when target permits. if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF // are currently only supported on binary nodes. if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), N1)); } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF // are currently only supported on binary nodes. if (Options.UnsafeFPMath && N1->getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL && N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(2).getOperand(0), N1.getOperand(2).getOperand(1), N0)); } if (AllowFusion && LookThroughFPExt) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; if (N0.getOpcode() == PreferredFusedOpcode) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (N020.getOpcode() == ISD::FMUL) return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), N1); } } // fold (fadd (fpext (fma x, y, (fmul u, v))), z) // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. auto FoldFAddFPExtFMAFMul = [&] ( SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X), DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); if (N002.getOpcode() == ISD::FMUL) return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), N1); } } // fold (fadd x, (fma y, z, (fpext (fmul u, v))) // -> (fma y, z, (fma (fpext u), (fpext v), x)) if (N1.getOpcode() == PreferredFusedOpcode) { SDValue N12 = N1.getOperand(2); if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); if (N120.getOpcode() == ISD::FMUL) return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), N0); } } // fold (fadd x, (fpext (fma y, z, (fmul u, v))) // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (N10.getOpcode() == PreferredFusedOpcode) { SDValue N102 = N10.getOperand(2); if (N102.getOpcode() == ISD::FMUL) return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), N0); } } } } return SDValue(); } /// Try to perform FMA combining on a given FSUB node. SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; bool AllowFusion = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, N1)); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FMUL && (Aggressive || N1->hasOneUse())) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), N0); // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N00), N01, DAG.getNode(ISD::FNEG, SL, VT, N1)); } // Look through FP_EXTEND nodes to do more combining. if (AllowFusion && LookThroughFPExt) { // fold (fsub (fpext (fmul x, y)), z) // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FMUL) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, N1)); } // fold (fsub x, (fpext (fmul y, z))) // -> (fma (fneg (fpext y)), (fpext z), x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); if (N10.getOpcode() == ISD::FMUL) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0); } // fold (fsub (fpext (fneg (fmul, x, y))), z) // -> (fneg (fma (fpext x), (fpext y), z)) // Note: This could be removed with appropriate canonicalization of the // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent // from implementing the canonicalization in visitFSUB. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FNEG) { SDValue N000 = N00.getOperand(0); if (N000.getOpcode() == ISD::FMUL) { return DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), N1)); } } } // fold (fsub (fneg (fpext (fmul, x, y))), z) // -> (fneg (fma (fpext x)), (fpext y), z) // Note: This could be removed with appropriate canonicalization of the // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent // from implementing the canonicalization in visitFSUB. if (N0.getOpcode() == ISD::FNEG) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FP_EXTEND) { SDValue N000 = N00.getOperand(0); if (N000.getOpcode() == ISD::FMUL) { return DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), N1)); } } } } // More folding opportunities when target permits. if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF // are currently only supported on binary nodes. if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, N1))); } // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF // are currently only supported on binary nodes. if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0)); } if (AllowFusion && LookThroughFPExt) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); if (N020.getOpcode() == ISD::FMUL) return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, N1))); } } // fold (fsub (fpext (fma x, y, (fmul u, v))), z) // -> (fma (fpext x), (fpext y), // (fma (fpext u), (fpext v), (fneg z))) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == PreferredFusedOpcode) { SDValue N002 = N00.getOperand(2); if (N002.getOpcode() == ISD::FMUL) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, N1))); } } // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) if (N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N1.getOperand(2).getOperand(0); if (N120.getOpcode() == ISD::FMUL) { SDValue N1200 = N120.getOperand(0); SDValue N1201 = N120.getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0)); } } // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) // -> (fma (fneg (fpext y)), (fpext z), // (fma (fneg (fpext u)), (fpext v), x)) // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { SDValue N100 = N1.getOperand(0).getOperand(0); SDValue N101 = N1.getOperand(0).getOperand(1); SDValue N102 = N1.getOperand(0).getOperand(2); if (N102.getOpcode() == ISD::FMUL) { SDValue N1020 = N102.getOperand(0); SDValue N1021 = N102.getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0)); } } } } return SDValue(); } /// Try to perform FMA combining on a given FMUL node based on the distributive /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, /// subtraction instead of addition). SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); const TargetOptions &Options = DAG.getTarget().Options; // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. if (!Options.NoInfsFPMath) return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // Floating-point multiply-add with intermediate rounding. This can result // in a less precise result due to the changed rounding order. bool HasFMAD = Options.UnsafeFPMath && (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) auto FuseFADD = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); if (XC1 && XC1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); } return SDValue(); }; if (SDValue FMA = FuseFADD(N0, N1)) return FMA; if (SDValue FMA = FuseFADD(N1, N0)) return FMA; // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) auto FuseFSUB = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); if (XC0 && XC0->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, Y); if (XC0 && XC0->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); if (XC1 && XC1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); } return SDValue(); }; if (SDValue FMA = FuseFSUB(N0, N1)) return FMA; if (SDValue FMA = FuseFSUB(N1, N0)) return FMA; return SDValue(); } SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { // fold (fadd A, 0) -> A if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) if (N1C->isZero()) return N0; } // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags), Flags); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, DL, VT); // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), DAG.getConstantFP(1.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), DAG.getConstantFP(2.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); } } if (N1.getOpcode() == ISD::FMUL) { bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), DAG.getConstantFP(1.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), DAG.getConstantFP(2.0, DL, VT), Flags); return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); } } if (N0.getOpcode() == ISD::FADD && AllowNewConst) { bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, N1, DAG.getConstantFP(3.0, DL, VT), Flags); } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(3.0, DL, VT), Flags); } } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (AllowNewConst && N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT), Flags); } } } // enable-unsafe-fp-math // FADD -> FMA combines: if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); } SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } } // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) return N0; // (fsub x, x) -> 0.0 if (N0 == N1) return DAG.getConstantFP(0.0f, DL, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { SDValue N10 = N1->getOperand(0); SDValue N11 = N1->getOperand(1); if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) return GetNegatedExpression(N11, DAG, LegalOperations); if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); } SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; } // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) return N0; if (Options.UnsafeFPMath) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) if (N0.getOpcode() == ISD::FMUL) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts // may have been generated during lowering. SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); auto *BV1 = dyn_cast(N1); auto *BV00 = dyn_cast(N00); auto *BV01 = dyn_cast(N01); // Check 1: Make sure that the first operand of the inner multiply is NOT // a constant. Otherwise, we may induce infinite looping. if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { // Check 2: Make sure that the second operand of the inner multiply and // the second operand of the outer multiply are constants. if ((N1CFP && isConstOrConstSplatFP(N01)) || (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); } } } // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. if (N0.getOpcode() == ISD::FADD && (N0.getOperand(0) == N0.getOperand(1)) && N0.hasOneUse()) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations), Flags); } } // FMUL -> FMA combines: if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); } SDValue DAGCombiner::visitFMA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // Constant fold FMA. if (isa(N0) && isa(N1) && isa(N2)) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } if (Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) return N2; } // TODO: The FMA node should have flags that propagate to these nodes. if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // TODO: FMA nodes should have flags that propagate to the created nodes. // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); if (Options.UnsafeFPMath) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && isConstantFPBuildVectorOrConstantFP(N1) && isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), &Flags), &Flags); } // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) if (N0.getOpcode() == ISD::FMUL && isConstantFPBuildVectorOrConstantFP(N1) && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), &Flags), N2); } } // (fma x, 1, y) -> (fadd x, y) // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, DL, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } } if (Options.UnsafeFPMath) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT), &Flags), &Flags); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT), &Flags), &Flags); } } return SDValue(); } // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) // Notice that this is not always beneficial. One reason is different targets // may have different costs for FDIV and FMUL, so sometimes the cost of two // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags *Flags = N->getFlags(); if (!UnsafeMath && !Flags->hasAllowReciprocal()) return SDValue(); // Skip if current node is a reciprocal. SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); if (N0CFP && N0CFP->isExactlyValue(1.0)) return SDValue(); // Exit early if the target does not want this transform or if there can't // possibly be enough uses of the divisor to make the transform worthwhile. SDValue N1 = N->getOperand(1); unsigned MinUses = TLI.combineRepeatedFPDivisors(); if (!MinUses || N1->use_size() < MinUses) return SDValue(); // Find all FDIV users of the same divisor. // Use a set because duplicates may be present in the user list. SetVector Users; for (auto *U : N1->uses()) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) Users.insert(U); } } // Now that we have the actual number of divisor uses, make sure it meets // the minimum threshold specified by the target. if (Users.size() < MinUses) return SDValue(); EVT VT = N->getValueType(0); SDLoc DL(N); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); // Dividend / Divisor -> Dividend * Reciprocal for (auto *U : Users) { SDValue Dividend = U->getOperand(0); if (Dividend != FPOne) { SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, Reciprocal, Flags); CombineTo(U, NewNode); } else if (U != Reciprocal.getNode()) { // In the absence of fast-math-flags, this user node is always the // same node as Reciprocal, but with FMF they may be different nodes. CombineTo(U, Reciprocal); } } return SDValue(N, 0); // N was replaced. } SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); if (Options.UnsafeFPMath) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); // Only do the transform if the reciprocal is a legal fp immediate that // isn't too nasty (eg NaN, denormal, ...). if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty (!LegalOperations || // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM // backend)... we should handle this gracefully after Legalize. // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(Recip, DL, VT), Flags); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, // it's still worthwhile to get rid of the FSQRT if possible. SDValue SqrtOp; SDValue OtherOp; if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { SqrtOp = N1.getOperand(0); OtherOp = N1.getOperand(1); } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { SqrtOp = N1.getOperand(1); OtherOp = N1.getOperand(0); } if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations), Flags); } } if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) return CombineRepeatedDivisors; return SDValue(); } SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, &cast(N)->Flags); return SDValue(); } SDValue DAGCombiner::visitFSQRT(SDNode *N) { if (!DAG.getTarget().Options.UnsafeFPMath) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); // TODO: FSQRT nodes should have flags that propagate to the created nodes. // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); return buildSqrtEstimate(N0, &Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) /// copysign(x, fp_round(y)) -> copysign(x, y) static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { SDValue N1 = N->getOperand(1); if ((N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)) { // Do not optimize out type conversion of f128 type yet. // For some targets like x86_64, configuration is changed to keep one f128 // value in one SSE register, but instruction selection cannot handle // FCOPYSIGN on SSE registers yet. EVT N1VT = N1->getValueType(0); EVT N1Op0VT = N1->getOperand(0)->getValueType(0); return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); } return false; } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { const APFloat &V = N1CFP->getValueAPF(); // copysign(x, c1) -> fabs(x) iff ispos(c1) // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } // copysign(fabs(x), y) -> copysign(x, y) // copysign(fneg(x), y) -> copysign(x, y) // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); } SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desirable only if SELECT_CC can be lowered. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> // (select_cc x, y, 1.0, 0.0,, cc) if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(0).getOperand(2) }; return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } return SDValue(); } SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desirable only if SELECT_CC can be lowered. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } return SDValue(); } // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) return SDValue(); SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; // We can safely assume the conversion won't overflow the output range, // because (for example) (uint8_t)18293.f is undefined behavior. // Since we can assume the conversion won't overflow, our decision as to // whether the input will fit in the float should depend on the minimum // of the input range and output range. // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; unsigned ActualSize = std::min(InputSize, OutputSize); const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); // We can only fold away the float conversion if the input range can be // represented exactly in the float range. if (APFloat::semanticsPrecision(sem) >= ActualSize) { if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOp, SDLoc(N), VT, Src); } if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); return DAG.getBitcast(VT, Src); } return SDValue(); } SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp if (N0CFP) return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) return N0.getOperand(0); // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; // Skip this folding if it results in an fp_round from f80 to f16. // // f80 to f16 always generates an expensive (and as yet, unimplemented) // libcall to __truncxfhf2 instead of selecting native f16 conversion // instructions from f32 or f64. Moreover, the first (value-preserving) // fp_round from f80 to either f32 or f64 may become a NOP in platforms like // x86. if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) return SDValue(); // If the first fp_round isn't a value preserving truncation, it might // introduce a tie in the second fp_round, that wouldn't occur in the // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { SDLoc DL(N); return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Tmp.getNode()); return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } return SDValue(); } SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT EVT = cast(N->getOperand(1))->getVT(); ConstantFPSDNode *N0CFP = dyn_cast(N0); // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { SDLoc DL(N); SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); } return SDValue(); } SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) return SDValue(); // fold (fp_extend c1fp) -> c1fp if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) if (N0.getOpcode() == ISD::FP16_TO_FP && TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, In, N0.getOperand(1)); return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } return SDValue(); } SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); } SDValue DAGCombiner::visitFFLOOR(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } // FIXME: FNEG and FABS have a lot in common; refactor. SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // Constant fold FNEG. if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { APInt SignMask; if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x80... per scalar element // and splat it. SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x80... SignMask = APInt::getSignBit(IntVT.getSizeInBits()); } SDLoc DL0(N0); Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); return DAG.getBitcast(VT, Int); } } // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL && (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { ConstantFPSDNode *CFP1 = dyn_cast(N0.getOperand(1)); if (CFP1) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); if (Level >= AfterLegalizeDAG && (TLI.isFPImmLegal(CVal, VT) || TLI.isOperationLegal(ISD::ConstantFP, VT))) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), &cast(N0)->Flags); } } return SDValue(); } SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); } // Canonicalize to constant on RHS. if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); return SDValue(); } SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); } // Canonicalize to constant on RHS. if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); return SDValue(); } SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading // constant pool values. if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { APInt SignMask; if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x7f... per scalar element // and splat it. SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x7f... SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); } SDLoc DL(N0); Int = DAG.getNode(ISD::AND, DL, IntVT, Int, DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); return DAG.getBitcast(N->getValueType(0), Int); } } return SDValue(); } SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); // If N is a constant we could fold this into a fallthrough or unconditional // branch. However that doesn't happen very often in normal code, because // Instcombine/SimplifyCFG should have handled the available opportunities. // If we did this folding here, it would be necessary to update the // MachineBasicBlock CFG, which is awkward. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); } if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && (N1.getOperand(0).hasOneUse() && N1.getOperand(0).getOpcode() == ISD::SRL))) { SDNode *Trunc = nullptr; if (N1.getOpcode() == ISD::TRUNCATE) { // Look pass the truncate. Trunc = N1.getNode(); N1 = N1.getOperand(0); } // Match this pattern so that we can generate simpler code: // // %a = ... // %b = and i32 %a, 2 // %c = srl i32 %b, 1 // brcond i32 %c ... // // into // // %a = ... // %b = and i32 %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // // This applies only when the AND constant value has one bit set and the // SRL constant is equal to the log2 of the AND constant. The back-end is // smart enough to convert the result into a TEST/JMP sequence. SDValue Op0 = N1.getOperand(0); SDValue Op1 = N1.getOperand(1); if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { const APInt &AndConst = cast(AndOp1)->getAPIntValue(); if (AndConst.isPowerOf2() && cast(Op1)->getAPIntValue()==AndConst.logBase2()) { SDLoc DL(N); SDValue SetCC = DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, DL, Op0.getValueType()), ISD::SETNE); SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. CombineTo(N, NewBRCond, false); // Truncate is dead. if (Trunc) deleteAndRecombine(Trunc); // Replace the uses of SRL with SETCC WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); deleteAndRecombine(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } if (Trunc) // Restore N1 if the above transformation doesn't match. N1 = N->getOperand(1); } // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { SDNode *TheXor = N1.getNode(); SDValue Op0 = TheXor->getOperand(0); SDValue Op1 = TheXor->getOperand(1); if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. if (SDValue Tmp = visitXOR(TheXor)) { if (Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); dbgs() << "\nWith: "; Tmp.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, Tmp); deleteAndRecombine(TheXor); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } // visitXOR has changed XOR's operands or replaced the XOR completely, // bail out. return SDValue(N, 0); } } if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; if (isOneConstant(Op0) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR) { TheXor = Op0.getNode(); Equal = true; } EVT SetCCVT = N1.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); deleteAndRecombine(N1.getNode()); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } } return SDValue(); } // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. // SDValue DAGCombiner::visitBR_CC(SDNode *N) { CondCodeSDNode *CC = cast(N->getOperand(1)); SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); // If N is a constant we could fold this into a fallthrough or unconditional // branch. However that doesn't happen very often in normal code, because // Instcombine/SimplifyCFG should have handled the available opportunities. // If we did this folding here, it would be necessary to update the // MachineBasicBlock CFG, which is awkward. // Use SimplifySetCC to simplify SETCC's. SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), SDLoc(N), false); if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), Simp.getOperand(1), N->getOperand(4)); return SDValue(); } /// Return true if 'Use' is a load or a store that uses N as its base pointer /// and that N may be folded in the load / store addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT; unsigned AS; if (LoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); } else if (StoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); AS = ST->getAddressSpace(); } else return false; TargetLowering::AddrMode AM; if (N->getOpcode() == ISD::ADD) { ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (Offset) // [reg +/- imm] AM.BaseOffs = Offset->getSExtValue(); else // [reg +/- reg] AM.Scale = 1; } else if (N->getOpcode() == ISD::SUB) { ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (Offset) // [reg +/- imm] AM.BaseOffs = -Offset->getSExtValue(); else // [reg +/- reg] AM.Scale = 1; } else return false; return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, VT.getTypeForEVT(*DAG.getContext()), AS); } /// Try turning a load/store into a pre-indexed load/store when the base /// pointer is an add or subtract and it has other uses besides the load/store. /// After the transformation, the new indexed load/store has effectively folded /// the add/subtract in and all of its other uses are redirected to the /// new load/store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; bool isLoad = true; SDValue Ptr; EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; VT = LD->getMemoryVT(); if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) return false; Ptr = LD->getBasePtr(); } else if (StoreSDNode *ST = dyn_cast(N)) { if (ST->isIndexed()) return false; VT = ST->getMemoryVT(); if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) return false; Ptr = ST->getBasePtr(); isLoad = false; } else { return false; } // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || Ptr.getNode()->hasOneUse()) return false; // Ask the target to do addressing mode selection. SDValue BasePtr; SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) return false; // Backends without true r+i pre-indexed forms may need to pass a // constant base with a variable offset so that constant coercion // will work with the patterns in canonical form. bool Swapped = false; if (isa(BasePtr)) { std::swap(BasePtr, Offset); Swapped = true; } // Don't create a indexed load / store with zero offset. if (isNullConstant(Offset)) return false; // Try turning it into a pre-indexed load / store except when: // 1) The new base ptr is a frame index. // 2) If N is a store and the new base ptr is either the same as or is a // predecessor of the value being stored. // 3) Another use of old base ptr is a predecessor of N. If ptr is folded // that would create a cycle. // 4) All uses are load / store ops that use it as old base ptr. // Check #1. Preinc'ing a frame index would require copying the stack pointer // (plus the implicit offset) to a register to preinc anyway. if (isa(BasePtr) || isa(BasePtr)) return false; // Check #2. if (!isLoad) { SDValue Val = cast(N)->getValue(); if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) return false; } // Caches for hasPredecessorHelper. SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(N); // If the offset is a constant, there may be other adds of constants that // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. SmallVector OtherUses; if (isa(Offset)) for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), UE = BasePtr.getNode()->use_end(); UI != UE; ++UI) { SDUse &Use = UI.getUse(); // Skip the use that is Ptr and uses of other results from BasePtr's // node (important for nodes that return multiple results). if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && Use.getUser()->getOpcode() != ISD::SUB) { OtherUses.clear(); break; } SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); if (!isa(Op1)) { OtherUses.clear(); break; } // FIXME: In some cases, we can be smarter about this. if (Op1.getValueType() != Offset.getValueType()) { OtherUses.clear(); break; } OtherUses.push_back(Use.getUser()); } if (Swapped) std::swap(BasePtr, Offset); // Now check for #3 and #4. bool RealUse = false; for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) return false; // If Ptr may be folded in addressing mode of other use, then it's // not profitable to do this transformation. if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) RealUse = true; } if (!RealUse) return false; SDValue Result; if (isLoad) Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); else Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. deleteAndRecombine(N); if (Swapped) std::swap(BasePtr, Offset); // Replace other uses of BasePtr that can be updated to use Ptr for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { unsigned OffsetIdx = 1; if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) OffsetIdx = 0; assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && "Expected BasePtr operand"); // We need to replace ptr0 in the following expression: // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expresion that needs to be re-written. // // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 ConstantSDNode *CN = cast(OtherUses[i]->getOperand(OffsetIdx)); int X0, X1, Y0, Y1; const APInt &Offset0 = CN->getAPIntValue(); APInt Offset1 = cast(Offset)->getAPIntValue(); X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; APInt CNV = Offset0; if (X0 < 0) CNV = -CNV; if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; else CNV = CNV - Offset1; SDLoc DL(OtherUses[i]); // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); deleteAndRecombine(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); return true; } /// Try to combine a load/store with a add/sub of the base pointer node into a /// post-indexed load/store. The transformation folded the add/subtract into the /// new indexed load/store effectively and all of its uses are redirected to the /// new load/store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; bool isLoad = true; SDValue Ptr; EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; VT = LD->getMemoryVT(); if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) return false; Ptr = LD->getBasePtr(); } else if (StoreSDNode *ST = dyn_cast(N)) { if (ST->isIndexed()) return false; VT = ST->getMemoryVT(); if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) return false; Ptr = ST->getBasePtr(); isLoad = false; } else { return false; } if (Ptr.getNode()->hasOneUse()) return false; for (SDNode *Op : Ptr.getNode()->uses()) { if (Op == N || (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) continue; SDValue BasePtr; SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { // Don't create a indexed load / store with zero offset. if (isNullConstant(Offset)) continue; // Try turning it into a post-indexed load / store except when // 1) All uses are load / store ops that use it as base ptr (and // it may be folded as addressing mmode). // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. if (isa(BasePtr) || isa(BasePtr)) continue; // Check for #1. bool TryNext = false; for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; // If all the uses are load / store addresses, then don't do the // transformation. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ bool RealUse = false; for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } if (!RealUse) { TryNext = true; break; } } } if (TryNext) continue; // Check for #2 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { SDValue Result = isLoad ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM) : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. deleteAndRecombine(N); // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(isLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } } } return false; } /// \brief Return the base-pointer arithmetic from an indexed \p LD. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { ISD::MemIndexedMode AM = LD->getAddressingMode(); assert(AM != ISD::UNINDEXED); SDValue BP = LD->getOperand(1); SDValue Inc = LD->getOperand(2); // Some backends use TargetConstants for load offsets, but don't expect // TargetConstants in general ADD nodes. We can convert these constants into // regular Constants (if the constant is not opaque). assert((Inc.getOpcode() != ISD::TargetConstant || !cast(Inc)->isOpaque()) && "Cannot split out indexing using opaque target constants"); if (Inc.getOpcode() == ISD::TargetConstant) { ConstantSDNode *ConstInc = cast(Inc); Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), ConstInc->getValueType(0)); } unsigned Opc = (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); } SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). if (!LD->isVolatile()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { // It's not safe to use the two value CombineTo variant here. e.g. // v1, chain2 = load chain1, loc // v2, chain3 = load chain2, loc // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); if (N->use_empty()) deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); // If this load has an opaque TargetConstant offset, then we cannot split // the indexing into an add/sub directly (that TargetConstant may not be // valid for a different type of node, and we cannot convert an opaque // target constant into a regular constant). bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && cast(LD->getOperand(2))->isOpaque(); if (!N->hasAnyUseOfValue(0) && ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); SDValue Index; if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { Index = SplitIndexingFromLoad(LD); // Try to fold the base pointer arithmetic into subsequent loads and // stores. AddUsersToWorklist(N); } else Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } // If this load is directly stored, replace the load value with the stored // value. // TODO: Handle store large -> read small portion. // TODO: Handle TRUNCSTORE/LOADEXT if (OptLevel != CodeGenOpt::None && ISD::isNormalLoad(N) && !LD->isVolatile()) { if (ISD::isNON_TRUNCStore(Chain.getNode())) { StoreSDNode *PrevST = cast(Chain); if (PrevST->getBasePtr() == Ptr && PrevST->getValue().getValueType() == N->getValueType(0)) return CombineTo(N, Chain.getOperand(1), Chain); } } // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), Align, LD->getMemOperand()->getFlags(), LD->getAAInfo()); if (NewLoad.getNode() != N) return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif if (UseAA && LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); // If there is a better chain. if (Chain != BetterChain) { SDValue ReplLoad; // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), BetterChain, Ptr, LD->getMemOperand()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), BetterChain, Ptr, LD->getMemoryVT(), LD->getMemOperand()); } // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. AddToWorklist(Token.getNode()); // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); } } // Try transforming N to an indexed load. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); // Try to slice up N to more direct loads if the slices are mapped to // different register banks or pairing can take place. if (SliceUpLoad(N)) return SDValue(N, 0); return SDValue(); } namespace { /// \brief Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// /// Then, it will be rewriten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// /// SliceTy is deduced from the number of bits that are actually used to /// build Inst. struct LoadedSlice { /// \brief Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. bool ForCodeSize; /// Various cost. unsigned Loads; unsigned Truncates; unsigned CrossRegisterBanksCopies; unsigned ZExts; unsigned Shift; Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} /// \brief Get the cost of one isolated slice. Cost(const LoadedSlice &LS, bool ForCodeSize = false) : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); if (TruncType != LoadedType && !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) ZExts = 1; } /// \brief Account for slicing gain in the current cost. /// Slicing provide a few gains like removing a shift or a /// truncate. This method allows to grow the cost of the original /// load with the gain from this slice. void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) ++Shift; // If this slice can merge a cross register bank copy, account for it. if (LS.canMergeExpensiveCrossRegisterBankCopy()) ++CrossRegisterBanksCopies; } Cost &operator+=(const Cost &RHS) { Loads += RHS.Loads; Truncates += RHS.Truncates; CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; ZExts += RHS.ZExts; Shift += RHS.Shift; return *this; } bool operator==(const Cost &RHS) const { return Loads == RHS.Loads && Truncates == RHS.Truncates && CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && ZExts == RHS.ZExts && Shift == RHS.Shift; } bool operator!=(const Cost &RHS) const { return !(*this == RHS); } bool operator<(const Cost &RHS) const { // Assume cross register banks copies are as expensive as loads. // FIXME: Do we want some more target hooks? unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; // Unless we are optimizing for code size, consider the // expensive operation first. if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) return ExpensiveOpsLHS < ExpensiveOpsRHS; return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); } bool operator>(const Cost &RHS) const { return RHS < *this; } bool operator<=(const Cost &RHS) const { return !(RHS < *this); } bool operator>=(const Cost &RHS) const { return !(*this < RHS); } }; // The last instruction that represent the slice. This should be a // truncate instruction. SDNode *Inst; // The original load instruction. LoadSDNode *Origin; // The right shift amount in bits from the original load. unsigned Shift; // The DAG from which Origin came from. // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} /// \brief Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. APInt getUsedBits() const { // Reproduce the trunc(lshr) sequence: // - Start from the truncated value. // - Zero extend to the desired bit width. // - Shift left. assert(Origin && "No original load to compare against."); unsigned BitWidth = Origin->getValueSizeInBits(0); assert(Inst && "This slice is not bound to an instruction"); assert(Inst->getValueSizeInBits(0) <= BitWidth && "Extracted slice is bigger than the whole type!"); APInt UsedBits(Inst->getValueSizeInBits(0), 0); UsedBits.setAllBits(); UsedBits = UsedBits.zext(BitWidth); UsedBits <<= Shift; return UsedBits; } /// \brief Get the size of the slice to be loaded in bytes. unsigned getLoadedSize() const { unsigned SliceSize = getUsedBits().countPopulation(); assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); return SliceSize / 8; } /// \brief Get the type that will be loaded for this slice. /// Note: This may not be the final type for the slice. EVT getLoadedType() const { assert(DAG && "Missing context"); LLVMContext &Ctxt = *DAG->getContext(); return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); } /// \brief Get the alignment of the load used for this slice. unsigned getAlignment() const { unsigned Alignment = Origin->getAlignment(); unsigned Offset = getOffsetFromBase(); if (Offset != 0) Alignment = MinAlign(Alignment, Alignment + Offset); return Alignment; } /// \brief Check if this slice can be rewritten with legal operations. bool isLegal() const { // An invalid slice is not legal. if (!Origin || !Inst || !DAG) return false; // Offsets are for indexed load only, we do not handle that. if (!Origin->getOffset().isUndef()) return false; const TargetLowering &TLI = DAG->getTargetLoweringInfo(); // Check that the type is legal. EVT SliceType = getLoadedType(); if (!TLI.isTypeLegal(SliceType)) return false; // Check that the load is legal for this type. if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) return false; // Check that the offset can be computed. // 1. Check its type. EVT PtrType = Origin->getBasePtr().getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) return false; // 2. Check that it fits in the immediate. if (!TLI.isLegalAddImmediate(getOffsetFromBase())) return false; // 3. Check that the computation is legal. if (!TLI.isOperationLegal(ISD::ADD, PtrType)) return false; // Check that the zext is legal if it needs one. EVT TruncateType = Inst->getValueType(0); if (TruncateType != SliceType && !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) return false; return true; } /// \brief Get the offset in bytes of this slice in the original chunk of /// bits. /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); bool IsBigEndian = DAG->getDataLayout().isBigEndian(); assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); uint64_t Offset = Shift / 8; unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; assert(!(Origin->getValueSizeInBits(0) & 0x7) && "The size of the original loaded type is not a multiple of a" " byte."); // If Offset is bigger than TySizeInBytes, it means we are loading all // zeros. This should have been optimized before in the process. assert(TySizeInBytes > Offset && "Invalid shift amount for given loaded size"); if (IsBigEndian) Offset = TySizeInBytes - Offset - getLoadedSize(); return Offset; } /// \brief Generate the sequence of instructions to load the slice /// represented by this object and redirect the uses of this slice to /// this new sequence of instructions. /// \pre this->Inst && this->Origin are valid Instructions and this /// object passed the legal check: LoadedSlice::isLegal returned true. /// \return The last instruction of the sequence used to load the slice. SDValue loadSlice() const { assert(Inst && Origin && "Unable to replace a non-existing slice."); const SDValue &OldBaseAddr = Origin->getBasePtr(); SDValue BaseAddr = OldBaseAddr; // Get the offset in that chunk of bytes w.r.t. the endianness. int64_t Offset = static_cast(getOffsetFromBase()); assert(Offset >= 0 && "Offset too big to fit in int64_t!"); if (Offset) { // BaseAddr = BaseAddr + Offset. EVT ArithType = BaseAddr.getValueType(); SDLoc DL(Origin); BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, DAG->getConstant(Offset, DL, ArithType)); } // Create the type of the loaded slice according to its size. EVT SliceType = getLoadedType(); // Create the load for the slice. SDValue LastInst = DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, Origin->getPointerInfo().getWithOffset(Offset), getAlignment(), Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); if (SliceType != FinalType) LastInst = DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); return LastInst; } /// \brief Check if this slice can be merged with an expensive cross register /// bank copy. E.g., /// i = load i32 /// f = bitcast i32 i to float bool canMergeExpensiveCrossRegisterBankCopy() const { if (!Inst || !Inst->hasOneUse()) return false; SDNode *Use = *Inst->use_begin(); if (Use->getOpcode() != ISD::BITCAST) return false; assert(DAG && "Missing context"); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); EVT ResVT = Use->getValueType(0); const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); const TargetRegisterClass *ArgRC = TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; // At this point, we know that we perform a cross-register-bank copy. // Check if it is expensive. const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); // Assume bitcasts are cheap, unless both register classes do not // explicitly share a common sub class. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) return false; // Check if it will be merged with the load. // 1. Check the alignment constraint. unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( ResVT.getTypeForEVT(*DAG->getContext())); if (RequiredAlignment > getAlignment()) return false; // 2. Check that the load is a legal operation for that type. if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; // 3. Check that we do not have a zext in the way. if (Inst->getValueType(0) != getLoadedType()) return false; return true; } }; } /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! if (UsedBits.isAllOnesValue()) return true; // Get rid of the unused bits on the right. APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); // Get rid of the unused bits on the left. if (NarrowedUsedBits.countLeadingZeros()) NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); // Check that the chunk of bits is completely used. return NarrowedUsedBits.isAllOnesValue(); } /// \brief Check whether or not \p First and \p Second are next to each other /// in memory. This means that there is no hole between the bits loaded /// by \p First and the bits loaded by \p Second. static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second) { assert(First.Origin == Second.Origin && First.Origin && "Unable to match different memory origins."); APInt UsedBits = First.getUsedBits(); assert((UsedBits & Second.getUsedBits()) == 0 && "Slices are not supposed to overlap."); UsedBits |= Second.getUsedBits(); return areUsedBitsDense(UsedBits); } /// \brief Adjust the \p GlobalLSCost according to the target /// paring capabilities and the layout of the slices. /// \pre \p GlobalLSCost should account for at least as many loads as /// there is in the slices in \p LoadedSlices. static void adjustCostForPairing(SmallVectorImpl &LoadedSlices, LoadedSlice::Cost &GlobalLSCost) { unsigned NumberOfSlices = LoadedSlices.size(); // If there is less than 2 elements, no pairing is possible. if (NumberOfSlices < 2) return; // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. std::sort(LoadedSlices.begin(), LoadedSlices.end(), [](const LoadedSlice &LHS, const LoadedSlice &RHS) { assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); }); const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. const LoadedSlice *First = nullptr; const LoadedSlice *Second = nullptr; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { Second = &LoadedSlices[CurrSlice]; // If First is NULL, it means we start a new pair. // Get to the next slice. if (!First) continue; EVT LoadedType = First->getLoadedType(); // If the types of the slices are different, we cannot pair them. if (LoadedType != Second->getLoadedType()) continue; // Check if the target supplies paired loads for this type. unsigned RequiredAlignment = 0; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. Second = nullptr; continue; } // Check if we meet the alignment requirement. if (RequiredAlignment > First->getAlignment()) continue; // Check that both loads are next to each other in memory. if (!areSlicesNextToEachOther(*First, *Second)) continue; assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); --GlobalLSCost.Loads; // Move to the next pair. Second = nullptr; } } /// \brief Check the profitability of all involved LoadedSlice. /// Currently, it is considered profitable if there is exactly two /// involved slices (1) which are (2) next to each other in memory, and /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). /// /// Note: The order of the elements in \p LoadedSlices may be modified, but not /// the elements themselves. /// /// FIXME: When the cost model will be mature enough, we can relax /// constraints (1) and (2). static bool isSlicingProfitable(SmallVectorImpl &LoadedSlices, const APInt &UsedBits, bool ForCodeSize) { unsigned NumberOfSlices = LoadedSlices.size(); if (StressLoadSlicing) return NumberOfSlices > 1; // Check (1). if (NumberOfSlices != 2) return false; // Check (2). if (!areUsedBitsDense(UsedBits)) return false; // Check (3). LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); // The original code has one big load. OrigCost.Loads = 1; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { const LoadedSlice &LS = LoadedSlices[CurrSlice]; // Accumulate the cost of all the slices. LoadedSlice::Cost SliceCost(LS, ForCodeSize); GlobalSlicingCost += SliceCost; // Account as cost in the original configuration the gain obtained // with the current slices. OrigCost.addSliceGain(LS); } // If the target supports paired load, adjust the cost accordingly. adjustCostForPairing(LoadedSlices, GlobalSlicingCost); return OrigCost > GlobalSlicingCost; } /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) /// operations, split it in the various pieces being extracted. /// /// This sort of thing is introduced by SROA. /// This slicing takes care not to insert overlapping loads. /// \pre LI is a simple load (i.e., not an atomic or volatile load). bool DAGCombiner::SliceUpLoad(SDNode *N) { if (Level < AfterLegalizeDAG) return false; LoadSDNode *LD = cast(N); if (LD->isVolatile() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; // Keep track of already used bits to detect overlapping values. // In that case, we will just abort the transformation. APInt UsedBits(LD->getValueSizeInBits(0), 0); SmallVector LoadedSlices; // Check if this load is used as several smaller chunks of bits. // Basically, look for uses in trunc or trunc(lshr) and record a new chain // of computation for each trunc. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); UI != UIEnd; ++UI) { // Skip the uses of the chain. if (UI.getUse().getResNo() != 0) continue; SDNode *User = *UI; unsigned Shift = 0; // Check if this is a trunc(lshr). if (User->getOpcode() == ISD::SRL && User->hasOneUse() && isa(User->getOperand(1))) { Shift = cast(User->getOperand(1))->getZExtValue(); User = *User->use_begin(); } // At this point, User is a Truncate, iff we encountered, trunc or // trunc(lshr). if (User->getOpcode() != ISD::TRUNCATE) return false; // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. // Moreover, if we shifted with a non-8-bits multiple, the slice // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) return 0; // Build the slice for this chain of computations. LoadedSlice LS(User, LD, Shift, &DAG); APInt CurrentUsedBits = LS.getUsedBits(); // Check if this slice overlaps with another. if ((CurrentUsedBits & UsedBits) != 0) return false; // Update the bits used globally. UsedBits |= CurrentUsedBits; // Check if the new slice would be legal. if (!LS.isLegal()) return false; // Record the slice. LoadedSlices.push_back(LS); } // Abort slicing if it does not seem to be profitable. if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) return false; ++SlicedLoads; // Rewrite each chain to use an independent load. // By construction, each chain can be represented by a unique load. // Prepare the argument for the new token factor for all the slices. SmallVector ArgChains; for (SmallVectorImpl::const_iterator LSIt = LoadedSlices.begin(), LSItEnd = LoadedSlices.end(); LSIt != LSItEnd; ++LSIt) { SDValue SliceInst = LSIt->loadSlice(); CombineTo(LSIt->Inst, SliceInst, true); if (SliceInst.getOpcode() != ISD::LOAD) SliceInst = SliceInst.getOperand(0); assert(SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"); ArgChains.push_back(SliceInst.getValue(1)); } SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); return true; } /// Check to see if V is (and load (ptr), imm), where the load is having /// specific bytes cleared out. If so, return the byte size being masked out /// and the shift amount. static std::pair CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair Result(0, 0); // Check for the structure we're looking for. if (V->getOpcode() != ISD::AND || !isa(V->getOperand(1)) || !ISD::isNormalLoad(V->getOperand(0).getNode())) return Result; // Check the chain and pointer. LoadSDNode *LD = cast(V->getOperand(0)); if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. // The store should be chained directly to the load or be an operand of a // tokenfactor. if (LD == Chain.getNode()) ; // ok. else if (Chain->getOpcode() != ISD::TokenFactor) return Result; // Fail. else { bool isOk = false; for (const SDValue &ChainOp : Chain->op_values()) if (ChainOp.getNode() == LD) { isOk = true; break; } if (!isOk) return Result; } // This only handles simple types. if (V.getValueType() != MVT::i16 && V.getValueType() != MVT::i32 && V.getValueType() != MVT::i64) return Result; // Check the constant mask. Invert it so that the bits being masked out are // 0 and the bits being kept are 1. Use getSExtValue so that leading bits // follow the sign bit for uniformity. uint64_t NotMask = ~cast(V->getOperand(1))->getSExtValue(); unsigned NotMaskLZ = countLeadingZeros(NotMask); if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. unsigned NotMaskTZ = countTrailingZeros(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. // See if we have a continuous run of bits. If so, we have 0*1+0* if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) return Result; // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. if (V.getValueType() != MVT::i64 && NotMaskLZ) NotMaskLZ -= 64-V.getValueSizeInBits(); unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; switch (MaskedBytes) { case 1: case 2: case 4: break; default: return Result; // All one mask, or 5-byte mask. } // Verify that the first bit starts at a multiple of mask so that the access // is aligned the same as the access width. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; Result.first = MaskedBytes; Result.second = NotMaskTZ/8; return Result; } /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { unsigned NumBytes = MaskInfo.first; unsigned ByteShift = MaskInfo.second; SelectionDAG &DAG = DC->getDAG(); // Check to see if IVal is all zeros in the part being masked in by the 'or' // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. MVT VT = MVT::getIntegerVT(NumBytes*8); if (!DC->isTypeLegal(VT)) return nullptr; // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) { SDLoc DL(IVal); IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, DAG.getConstant(ByteShift*8, DL, DC->getShiftAmountTy(IVal.getValueType()))); } // Figure out the offset for the store and the alignment of the access. unsigned StOffset; unsigned NewAlign = St->getAlignment(); if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; SDValue Ptr = St->getBasePtr(); if (StOffset) { SDLoc DL(IVal); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), NewAlign) .getNode(); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try /// narrowing the load and store if it would end up being a win for performance /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast(N); if (ST->isVolatile()) return SDValue(); SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); EVT VT = Value.getValueType(); if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) return SDValue(); unsigned Opc = Value.getOpcode(); // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst // is a byte mask indicating a consecutive number of bytes, check to see if // Y is known to provide just those bytes. If so, we try to replace the // load + replace + store sequence with a single (narrower) store, which makes // the load dead. if (Opc == ISD::OR) { std::pair MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) return SDValue(NewST, 0); // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) return SDValue(NewST, 0); } if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); SDValue N0 = Value.getOperand(0); if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast(N0); if (LD->getBasePtr() != Ptr || LD->getPointerInfo().getAddrSpace() != ST->getPointerInfo().getAddrSpace()) return SDValue(); // Find the type to narrow it the load / op / store to. SDValue N1 = Value.getOperand(1); unsigned BitWidth = N1.getValueSizeInBits(); APInt Imm = cast(N1)->getAPIntValue(); if (Opc == ISD::AND) Imm ^= APInt::getAllOnesValue(BitWidth); if (Imm == 0 || Imm.isAllOnesValue()) return SDValue(); unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && (NewVT.getStoreSizeInBits() != NewBW || !TLI.isOperationLegalOrCustom(Opc, NewVT) || !TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); // If the lsb changed does not start at the type bitwidth boundary, // start at the previous one. if (ShAmt % NewBW) ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, std::min(BitWidth, ShAmt + NewBW)); if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) NewImm ^= APInt::getAllOnesValue(NewBW); uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, SDLoc(LD), Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, SDLoc(Value), NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); AddToWorklist(NewPtr.getNode()); AddToWorklist(NewLD.getNode()); AddToWorklist(NewVal.getNode()); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; } } return SDValue(); } /// For a given floating point load / store pair, if the load value isn't used /// by any other operations, then consider transforming the pair to integer /// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && Value.hasOneUse() && Chain == SDValue(Value.getNode(), 1)) { LoadSDNode *LD = cast(Value); EVT VT = LD->getMemoryVT(); if (!VT.isFloatingPoint() || VT != ST->getMemoryVT() || LD->isNonTemporal() || ST->isNonTemporal() || LD->getPointerInfo().getAddrSpace() != 0 || ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) return SDValue(); unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LDAlign); SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), STAlign); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; } return SDValue(); } // This is a helper function for visitMUL to check the profitability // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). // MulNode is the original multiply, AddNode is (add x, c1), // and ConstNode is c2. // // If the (add x, c1) has multiple uses, we could increase // the number of adds if we make this transformation. // It would only be worth doing this if we can remove a // multiply in the process. Check for that here. // To illustrate: // (A + c1) * c3 // (A + c2) * c3 // We're checking for cases where we have common "c3 * A" expressions. bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue &AddNode, SDValue &ConstNode) { APInt Val; // If the add only has one use, this would be OK to do. if (AddNode.getNode()->hasOneUse()) return true; // Walk all the users of the constant with which we're multiplying. for (SDNode *Use : ConstNode->uses()) { if (Use == MulNode) // This use is the one we're on right now. Skip it. continue; if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. SDNode *OtherOp; SDNode *MulVar = AddNode.getOperand(0).getNode(); // OtherOp is what we're multiplying against the constant. if (Use->getOperand(0) == ConstNode) OtherOp = Use->getOperand(1).getNode(); else OtherOp = Use->getOperand(0).getNode(); // Check to see if multiply is with the same operand of our "add". // // ConstNode = CONST // Use = ConstNode * A <-- visiting Use. OtherOp is A. // ... // AddNode = (A + c1) <-- MulVar is A. // = AddNode * ConstNode <-- current visiting instruction. // // If we make this transformation, we will have a common // multiply (ConstNode * A) that we can save. if (OtherOp == MulVar) return true; // Now check to see if a future expansion will give us a common // multiply. // // ConstNode = CONST // AddNode = (A + c1) // ... = AddNode * ConstNode <-- current visiting instruction. // ... // OtherOp = (A + c2) // Use = OtherOp * ConstNode <-- visiting Use. // // If we make this transformation, we will have a common // multiply (CONST * A) after we also do the same transformation // to the "t2" instruction. if (OtherOp->getOpcode() == ISD::ADD && DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && OtherOp->getOperand(0).getNode() == MulVar) return true; } } // Didn't find a case where this would be profitable. return false; } SDValue DAGCombiner::getMergedConstantVectorStore( SelectionDAG &DAG, const SDLoc &SL, ArrayRef Stores, SmallVectorImpl &Chains, EVT Ty) const { SmallVector BuildVector; for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { StoreSDNode *St = cast(Stores[I].MemNode); Chains.push_back(St->getChain()); BuildVector.push_back(St->getValue()); } return DAG.getBuildVector(Ty, SL, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector) { // Make sure we have something to merge. if (NumStores < 2) return false; int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned LatestNodeUsed = 0; for (unsigned i=0; i < NumStores; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the // latest store node which is *used* and replaced by the wide store. if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) LatestNodeUsed = i; } SmallVector Chains; // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { bool IsVec = MemVT.isVector(); unsigned Elts = NumStores; if (IsVec) { // When merging vector stores, get the total number of elements. Elts *= MemVT.getVectorNumElements(); } // Get the type for the merged vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); } else { SmallVector Ops; for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = St->getValue(); // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, Ty, Ops); } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); unsigned SizeInBits = NumStores * ElementSizeBytes * 8; APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = DAG.getDataLayout().isLittleEndian(); for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); Chains.push_back(St->getChain()); SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { StoreInt |= C->getAPIntValue().zext(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } assert(!Chains.empty()); SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); if (UseAA) { // Replace all merged stores with the new store. for (unsigned i = 0; i < NumStores; ++i) CombineTo(StoreNodes[i].MemNode, NewStore); } else { // Replace the last store with the new store. CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumStores; ++i) { if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); // ReplaceAllUsesWith will replace all uses that existed when it was // called, but graph optimizations may cause new ones to appear. For // example, the case in pr14333 looks like // // St's chain -> St -> another store -> X // // And the only difference from St to the other store is the chain. // When we change it's chain to be St's chain they become identical, // get CSEed and the net result is that X is now a use of St. // Since we know that St is redundant, just iterate. while (!St->use_empty()) DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); deleteAndRecombine(St); } } StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end()); return true; } void DAGCombiner::getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl &StoreNodes, SmallVectorImpl &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return; // Do not handle stores to undef base pointers. if (BasePtr.Base.isUndef()) return; // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind // or instruction which has a different base pointer. EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); if (UseAA) { // Look at other users of the same chain. Stores on the same chain do not // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized // to be on the same chain, so don't bother looking at adjacent chains. SDValue Chain = St->getChain(); for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { if (StoreSDNode *OtherST = dyn_cast(*I)) { if (I.getOperandNo() != 0) continue; if (OtherST->isVolatile() || OtherST->isIndexed()) continue; if (OtherST->getMemoryVT() != MemVT) continue; BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); if (Ptr.equalBaseIndex(BasePtr)) StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); } } return; } while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) break; // Find the base pointer and offset for this memory node. BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) break; // The memory operands must not be volatile. if (Index->isVolatile() || Index->isIndexed()) break; // No truncation. if (Index->isTruncatingStore()) break; // The stored memory type must be the same. if (Index->getMemoryVT() != MemVT) break; // We do not allow under-aligned stores in order to prevent // overriding stores. NOTE: this is a bad hack. Alignment SHOULD // be irrelevant here; what MATTERS is that we not move memory // operations that potentially overlap past each-other. if (Index->getAlignment() < MemVT.getStoreSize()) break; // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); // Find the next memory operand in the chain. If the next operand in the // chain is a store then move up and continue the scan with the next // memory operand. If the next operand is a load save it and use alias // information to check if it interferes with anything. SDNode *NextInChain = Index->getChain().getNode(); while (1) { if (StoreSDNode *STn = dyn_cast(NextInChain)) { // We found a store node. Use it for the next iteration. Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { if (Ldn->isVolatile()) { Index = nullptr; break; } // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); continue; } else { Index = nullptr; break; } } } } // We need to check that merging these stores does not cause a loop // in the DAG. Any store candidate may depend on another candidate // indirectly through its operand (we already consider dependencies // through the chain). Check in parallel by searching up from // non-chain operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes) { SmallPtrSet Visited; SmallVector Worklist; // search ops of store candidates for (unsigned i = 0; i < StoreNodes.size(); ++i) { SDNode *n = StoreNodes[i].MemNode; // Potential loops may happen only through non-chain operands for (unsigned j = 1; j < n->getNumOperands(); ++j) Worklist.push_back(n->getOperand(j).getNode()); } // search through DAG. We can stop early if we find a storenode for (unsigned i = 0; i < StoreNodes.size(); ++i) { if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist)) return false; } return true; } bool DAGCombiner::MergeConsecutiveStores( StoreSDNode* St, SmallVectorImpl &StoreNodes) { if (OptLevel == CodeGenOpt::None) return false; EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( Attribute::NoImplicitFloat); // This function cannot currently deal with non-byte-sized memory sizes. if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) return false; if (!MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); bool IsLoadSrc = isa(StoredVal); bool IsConstantSrc = isa(StoredVal) || isa(StoredVal); bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) return false; // Don't merge vectors into wider vectors if the source data comes from loads. // TODO: This restriction can be lifted by using logic similar to the // ExtractVecSrc case. if (MemVT.isVector() && IsLoadSrc) return false; // Only look at ends of store sequences. SDValue Chain = SDValue(St, 0); if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; // Save the LoadSDNodes that we find in the chain. // We need to make sure that these nodes do not interfere with // any of the store nodes. SmallVector AliasLoadNodes; getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; // only do dependence check in AA case bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) return false; // Sort the memory operands according to their distance from the // base pointer. As a secondary criteria: make sure stores coming // later in the code come first in the list. This is important for // the non-UseAA case, because we're merging stores into the FINAL // store along a chain which potentially contains aliasing stores. // Thus, if there are multiple stores to the same address, the last // one can be considered for merging but not the others. std::sort(StoreNodes.begin(), StoreNodes.end(), [](MemOpLink LHS, MemOpLink RHS) { return LHS.OffsetFromBase < RHS.OffsetFromBase || (LHS.OffsetFromBase == RHS.OffsetFromBase && LHS.SequenceNum < RHS.SequenceNum); }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. unsigned LastConsecutiveStore = 0; int64_t StartAddress = StoreNodes[0].OffsetFromBase; for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { // Check that the addresses are consecutive starting from the second // element in the list of stores. if (i > 0) { int64_t CurrAddress = StoreNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; } // Check if this store interferes with any of the loads that we found. // If we find a load that alias with this store. Stop the sequence. if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) { return isAlias(Ldn, StoreNodes[i].MemNode); })) break; // Mark this node as useful. LastConsecutiveStore = i; } // The node with the lowest store address. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { unsigned LastLegalType = 0; unsigned LastLegalVectorType = 0; bool NonZero = false; for (unsigned i=0; i(StoreNodes[i].MemNode); SDValue StoredVal = St->getValue(); if (ConstantSDNode *C = dyn_cast(StoredVal)) { NonZero |= !C->isNullValue(); } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) { NonZero |= !C->getConstantFPValue()->isNullValue(); } else { // Non-constant. break; } // Find a legal type for the constant store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast; if (TLI.isTypeLegal(StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i+1; // Or check whether a truncstore is legal. } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i + 1; } } // We only use vectors if the constant is known to be zero or the target // allows it and the function is not marked with the noimplicitfloat // attribute. if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, FirstStoreAS)) && !NoVectors) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); if (TLI.isTypeLegal(Ty) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) LastLegalVectorType = i + 1; } } // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, UseVector); } // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. if (IsExtractVecSrc) { unsigned NumStoresToMerge = 0; bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); unsigned StoreValOpcode = St->getValue().getOpcode(); // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a vector. // It should be possible to handle mixed sources, but load sources need // more careful handling (see the block of code below that handles // consecutive loads). if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && StoreValOpcode != ISD::EXTRACT_SUBVECTOR) return false; // Find a legal type for the vector store. unsigned Elts = i + 1; if (IsVec) { // When merging vector stores, get the total number of elements. Elts *= MemVT.getVectorNumElements(); } EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; if (TLI.isTypeLegal(Ty) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) NumStoresToMerge = i + 1; } return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, false, true); } // Below we handle the case of multiple consecutive stores that // come from multiple consecutive loads. We merge them into a single // wide load and a single wide store. // Look for load nodes which are used by the stored values. SmallVector LoadNodes; // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; for (unsigned i=0; i(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast(St->getValue()); if (!Ld) break; // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) break; // The memory operands must not be volatile. if (Ld->isVolatile() || Ld->isIndexed()) break; // We do not accept ext loads. if (Ld->getExtensionType() != ISD::NON_EXTLOAD) break; // The stored memory type must be the same. if (Ld->getMemoryVT() != MemVT) break; BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. if (!LdPtr.equalBaseIndex(LdBasePtr)) break; } else { // Check that all other base pointers are the same as this one. LdBasePtr = LdPtr; } // We found a potential memory operand to merge. LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); } if (LoadNodes.size() < 2) return false; // If we have load/store pair instructions and we only have two values, // don't bother. unsigned RequiredAlignment; if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && St->getAlignment() >= RequiredAlignment) return false; LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); unsigned FirstLoadAS = FirstLoad->getAddressSpace(); unsigned FirstLoadAlign = FirstLoad->getAlignment(); // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. unsigned LastConsecutiveLoad = 0; // This variable refers to the size and not index in the array. unsigned LastLegalVectorType = 0; unsigned LastLegalIntegerType = 0; StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { // All loads must share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; int64_t CurrAddress = LoadNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; LastConsecutiveLoad = i; // Find a legal type for the vector store. EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } // Find a legal type for the integer store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i+1; } } // Only use vector types if the vector type is larger than the integer type. // If they are the same, use integers. bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); // We add +1 here because the LastXXX variables refer to location while // the NumElem refers to array/index size. unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; NumElem = std::min(LastLegalType, NumElem); if (NumElem < 2) return false; // Collect the chains from all merged stores. SmallVector MergeStoreChains; MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; igetChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; if (UseVectorTy) { JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), FirstLoadAlign); SDValue NewStoreChain = DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); SDValue NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstStoreAlign); // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { LoadSDNode *Ld = cast(LoadNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); } if (UseAA) { // Replace the all stores with the new store. for (unsigned i = 0; i < NumElem; ++i) CombineTo(StoreNodes[i].MemNode, NewStore); } else { // Replace the last store with the new store. CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem; ++i) { // Remove all Store nodes. if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); deleteAndRecombine(St); } } StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end()); return true; } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { SDLoc SL(ST); SDValue ReplStore; // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), ST->getMemoryVT(), ST->getMemOperand()); } else { ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), ST->getMemOperand()); } // Create token to keep both nodes around. SDValue Token = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, ST->getChain(), ReplStore); // Make sure the new and old chains are cleaned up. AddToWorklist(Token.getNode()); // Don't add users to work list. return CombineTo(ST, Token, false); } SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { SDValue Value = ST->getValue(); if (Value.getOpcode() == ISD::TargetConstantFP) return SDValue(); SDLoc DL(ST); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); const ConstantFPSDNode *CFP = cast(Value); // NOTE: If the original store is volatile, this transform must not increase // the number of stores. For example, on x86-32 an f64 can be stored in one // processor operation but an i64 (which is not legal) requires two. So the // transform should not be done in this case. SDValue Tmp; switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: case MVT::f128: case MVT::ppcf128: return SDValue(); case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), SDLoc(CFP), MVT::i32); return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); } return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getAlignment(), MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } return SDValue(); } } SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); // If this is a store of a bit convert, store the input value if the // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { EVT SVT = Value.getOperand(0).getValueType(); if (((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { unsigned OrigAlign = ST->getAlignment(); bool Fast = false; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, ST->getAddressSpace(), OrigAlign, &Fast) && Fast) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), OrigAlign, ST->getMemOperand()->getFlags(), ST->getAAInfo()); } } } // Turn 'store undef, Ptr' -> nothing. if (Value.isUndef() && ST->isUnindexed()) return Chain; // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), Align, ST->getMemOperand()->getFlags(), ST->getAAInfo()); if (NewStore.getNode() != N) return CombineTo(ST, NewStore, true); } } } // Try transforming a pair floating point load / store ops to integer // load / store ops. if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif if (UseAA && ST->isUnindexed()) { // FIXME: We should do this even without AA enabled. AA will just allow // FindBetterChain to work in more situations. The problem with this is that // any combine that expects memory operations to be on consecutive chains // first needs to be updated to look for users of the same chain. // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. if (findBetterNeighborChains(ST)) { // replaceStoreChain uses CombineTo, which handled all of the worklist // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); } Chain = ST->getChain(); } // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && Value.getValueType().isInteger()) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" SDValue Shorter = GetDemandedBits( Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits())); AddToWorklist(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits( Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits()))) return SDValue(N, 0); } // If this is a load followed by a store to the same location, then the store // is dead/noop. if (LoadSDNode *Ld = dyn_cast(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && ST->isUnindexed() && !ST->isVolatile() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { // The store is dead, remove it. return Chain; } } // If this is a store followed by a store with the same value to the same // location, then the store is dead/noop. if (StoreSDNode *ST1 = dyn_cast(Chain)) { if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && !ST1->isVolatile()) { // The store is dead, remove it. return Chain; } } // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() && ST->isUnindexed() && TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. if (!LegalTypes) { for (;;) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. SmallVector StoreNodes; bool Changed = MergeConsecutiveStores(ST, StoreNodes); if (!Changed) break; if (any_of(StoreNodes, [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) { // ST has been merged and no longer exists. return SDValue(N, 0); } } } // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // // Make sure to do this only after attempting to merge stores in order to // avoid changing the types of some subset of stores due to visit order, // preventing their merging. if (isa(Value)) { if (SDValue NewSt = replaceStoreOfFPConstant(ST)) return NewSt; } if (SDValue NewSt = splitMergedValStore(ST)) return NewSt; return ReduceLoadOpStoreWidth(N); } /// For the instruction sequence of store below, F and I values /// are bundled together as an i64 value before being stored into memory. /// Sometimes it is more efficent to generate separate stores for F and I, /// which can remove the bitwise instructions or sink them to colder places. /// /// (store (or (zext (bitcast F to i32) to i64), /// (shl (zext I to i64), 32)), addr) --> /// (store F, addr) and (store I, addr+4) /// /// Similarly, splitting for other merged store can also be beneficial, like: /// For pair of {i32, i32}, i64 store --> two i32 stores. /// For pair of {i32, i16}, i64 store --> two i32 stores. /// For pair of {i16, i16}, i32 store --> two i16 stores. /// For pair of {i16, i8}, i32 store --> two i16 stores. /// For pair of {i8, i8}, i16 store --> two i8 stores. /// /// We allow each target to determine specifically which kind of splitting is /// supported. /// /// The store patterns are commonly seen from the simple code snippet below /// if only std::make_pair(...) is sroa transformed before inlined into hoo. /// void goo(const std::pair &); /// hoo() { /// ... /// goo(std::make_pair(tmp, ftmp)); /// ... /// } /// SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); SDValue Val = ST->getValue(); SDLoc DL(ST); // Match OR operand. if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) return SDValue(); // Match SHL operand and get Lower and Higher parts of Val. SDValue Op1 = Val.getOperand(0); SDValue Op2 = Val.getOperand(1); SDValue Lo, Hi; if (Op1.getOpcode() != ISD::SHL) { std::swap(Op1, Op2); if (Op1.getOpcode() != ISD::SHL) return SDValue(); } Lo = Op2; Hi = Op1.getOperand(0); if (!Op1.hasOneUse()) return SDValue(); // Match shift amount to HalfValBitSize. unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; ConstantSDNode *ShAmt = dyn_cast(Op1.getOperand(1)); if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) return SDValue(); // Lo and Hi are zero-extended from int with size less equal than 32 // to i64. if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || !Lo.getOperand(0).getValueType().isScalarInteger() || Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || !Hi.getOperand(0).getValueType().isScalarInteger() || Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) return SDValue(); // Use the EVT of low and high parts before bitcast as the input // of target query. EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) ? Lo.getOperand(0).getValueType() : Lo.getValueType(); EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) ? Hi.getOperand(0).getValueType() : Hi.getValueType(); if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) return SDValue(); // Start to split store. unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); // Change the sizes of Lo and Hi's value types to HalfValBitSize. EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getAlignment(), MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); // Higher value store. SDValue St1 = DAG.getStore(St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), Alignment / 2, MMOFlags, AAInfo); return St1; } SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); SDLoc DL(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.isUndef()) return InVec; EVT VT = InVec.getValueType(); // If we can't generate a legal BUILD_VECTOR, exit if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return SDValue(); // Check that we know which element is being inserted if (!isa(EltNo)) return SDValue(); unsigned Elt = cast(EltNo)->getZExtValue(); // Canonicalize insert_vector_elt dag nodes. // Example: // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) // // Do this only if the child insert_vector node has one use; also // do this only if indices are both constants and Idx1 < Idx0. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() && isa(InVec.getOperand(2))) { unsigned OtherElt = cast(InVec.getOperand(2))->getZExtValue(); if (Elt < OtherElt) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InVec.getOperand(0), InVal, EltNo); AddToWorklist(NewOp.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); } } // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector Ops; // Do not combine these two vectors if the output vector will not replace // the input vector. if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.isUndef()) { unsigned NElts = VT.getVectorNumElements(); Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); } else { return SDValue(); } // Insert the element if (Elt < Ops.size()) { // All the operands of BUILD_VECTOR must have the same type; // we enforce that here. EVT OpVT = Ops[0].getValueType(); if (InVal.getValueType() != OpVT) InVal = OpVT.bitsGT(InVal.getValueType()) ? DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); Ops[Elt] = InVal; } // Return the new vector return DAG.getBuildVector(VT, DL, Ops); } SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { assert(!OriginalLoad->isVolatile()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VecEltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) return SDValue(); Align = NewAlign; SDValue NewPtr = OriginalLoad->getBasePtr(); SDValue Offset; EVT PtrType = NewPtr.getValueType(); MachinePointerInfo MPI; SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); Offset = DAG.getNode( ISD::MUL, DL, PtrType, Offset, DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); MPI = OriginalLoad->getPointerInfo(); } NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. // Use ReplaceAllUsesOfValuesWith to do the replacement. // Note that this replacement assumes that the extractvalue is the only // use of the load; that's okay because we don't want to perform this // transformation in other cases anyway. SDValue Load; SDValue Chain; if (ResultVT.bitsGT(VecEltVT)) { // If the result type of vextract is wider than the load, then issue an // extending load instead. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, Align, OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Align, OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); else Load = DAG.getBitcast(ResultVT, Load); } WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. AddToWorklist(Load.getNode()); AddUsersToWorklist(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. AddToWorklist(EVE); ++OpsNarrowed; return SDValue(EVE, 0); } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); EVT VT = InVec.getValueType(); EVT NVT = N->getValueType(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); } return InOp; } SDValue EltNo = N->getOperand(1); ConstantSDNode *ConstEltNo = dyn_cast(EltNo); // extract_vector_elt (build_vector x, y), 1 -> y if (ConstEltNo && InVec.getOpcode() == ISD::BUILD_VECTOR && TLI.isTypeLegal(VT) && (InVec.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VT))) { SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); EVT InEltVT = Elt.getValueType(); // Sometimes build_vector's scalar input types do not match result type. if (NVT == InEltVT) return Elt; // TODO: It may be useful to truncate if free if the build_vector implicitly // converts. } // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && ConstEltNo->isNullValue() && VT.isInteger()) { SDValue BCSrc = InVec.getOperand(0); if (BCSrc.getValueType().isScalarInteger()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); } // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val // // This only really matters if the index is non-constant since other combines // on the constant elements already work. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && EltNo == InVec.getOperand(2)) { SDValue Elt = InVec.getOperand(1); return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; } // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector // without using extract_subvector. However, if we can find an underlying // scalar value, then we can always use that. if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast(InVec); // Find the new index to extract from. int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); // Extracting an undef index is undef. if (OrigElt == -1) return DAG.getUNDEF(NVT); // Select the right vector half to extract from. SDValue SVInVec; if (OrigElt < NumElem) { SVInVec = InVec->getOperand(0); } else { SVInVec = InVec->getOperand(1); OrigElt -= NumElem; } if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { SDValue InOp = SVInVec.getOperand(OrigElt); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); } return InOp; } // FIXME: We should handle recursing on other vector shuffles and // scalar_to_vector here as well. if (!LegalOperations) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } } bool BCNumEltsChanged = false; EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; // If the result of load has to be truncated, then it's not necessarily // profitable. if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) return SDValue(); if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) BCNumEltsChanged = true; InVec = InVec.getOperand(0); ExtVT = BCVT.getVectorElementType(); } // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && ISD::isNormalLoad(InVec.getNode()) && !N->getOperand(1)->hasPredecessor(InVec.getNode())) { SDValue Index = N->getOperand(1); if (LoadSDNode *OrigLoad = dyn_cast(InVec)) { if (!OrigLoad->isVolatile()) { return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, OrigLoad); } } } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) if (ConstEltNo) { int Elt = cast(EltNo)->getZExtValue(); LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); LN0 = cast(InVec.getOperand(0)); } else if ((SVN = dyn_cast(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) return SDValue(); // Select the input vector, guarding against out of range extract vector. unsigned NumElems = VT.getVectorNumElements(); int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); InVec = InVec.getOperand(0); } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); } } // Make sure we found a non-volatile load and the extractelement is // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. if (Elt == -1) return DAG.getUNDEF(LVT); return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); } return SDValue(); } // Simplify (build_vec (ext )) to (bitcast (build_vec )) SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { // We perform this optimization post type-legalization because // the type-legalizer often scalarizes integer-promoted vectors. // Performing this optimization before may create bit-casts which // will be type-legalized to complex code sequences. // We perform this optimization only before the operation legalizer because we // may introduce illegal operations. if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) return SDValue(); unsigned NumInScalars = N->getNumOperands(); SDLoc DL(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR // optimizations. We do not handle sign-extend because we can't fill the sign // using shuffles. EVT SourceType = MVT::Other; bool AllAnyExt = true; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. if (In.isUndef()) continue; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; // Abort if the element is not an extension. if (!ZeroExt && !AnyExt) { SourceType = MVT::Other; break; } // The input is a ZeroExt or AnyExt. Check the original type. EVT InTy = In.getOperand(0).getValueType(); // Check that all of the widened source types are the same. if (SourceType == MVT::Other) // First time. SourceType = InTy; else if (InTy != SourceType) { // Multiple income types. Abort. SourceType = MVT::Other; break; } // Check if all of the extends are ANY_EXTENDs. AllAnyExt &= AnyExt; } // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. EVT OutScalarTy = VT.getScalarType(); bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); // Create a new simpler BUILD_VECTOR sequence which other optimizations can // turn into a single shuffle instruction. if (!ValidTypes) return SDValue(); bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): DAG.getConstant(0, DL, SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector Ops(NewBVElems, Filler); // Populate the new build_vector for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Cast = N->getOperand(i); assert((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || Cast.isUndef()) && "Invalid cast opcode"); SDValue In; if (Cast.isUndef()) In = DAG.getUNDEF(SourceType); else In = Cast->getOperand(0); unsigned Index = isLE ? (i * ElemRatio) : (i * ElemRatio + (ElemRatio - 1)); assert(Index < Ops.size() && "Invalid index"); Ops[Index] = In; } // The type of the new BUILD_VECTOR node. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); assert(VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"); // Check if the new vector type is legal. if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); // Bitcast to the desired type. return DAG.getBitcast(VT, BV); } SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); SDLoc DL(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; unsigned NumDefs = 0; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); unsigned Opc = In.getOpcode(); if (Opc == ISD::UNDEF) continue; // If all scalar values are floats and converted from integers. if (Opcode == ISD::DELETED_NODE && (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { Opcode = Opc; } if (Opc != Opcode) return SDValue(); EVT InVT = In.getOperand(0).getValueType(); // If all scalar values are typed differently, bail out. It's chosen to // simplify BUILD_VECTOR of integer types. if (SrcVT == MVT::Other) SrcVT = InVT; if (SrcVT != InVT) return SDValue(); NumDefs++; } // If the vector has just one element defined, it's not worth to fold it into // a vectorized one. if (NumDefs < 2) return SDValue(); assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) && "Should only handle conversion from integer to float."); assert(SrcVT != MVT::Other && "Cannot determine source type!"); EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) return SDValue(); // Just because the floating-point vector type is legal does not necessarily // mean that the corresponding integer vector type is. if (!isTypeLegal(NVT)) return SDValue(); SmallVector Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); if (In.isUndef()) Opnds.push_back(DAG.getUNDEF(SrcVT)); else Opnds.push_back(In.getOperand(0)); } SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, DL, VT, BV); } SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx) { MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); EVT VT = N->getValueType(0); EVT InVT1 = VecIn1.getValueType(); EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; unsigned Vec2Offset = InVT1.getVectorNumElements(); unsigned NumElems = VT.getVectorNumElements(); unsigned ShuffleNumElems = NumElems; // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. if (InVT1 != VT || InVT2 != VT) { if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { // If the output vector length is a multiple of both input lengths, // we can concatenate them and pad the rest with undefs. unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); assert(NumConcats >= 2 && "Concat needs at least two inputs!"); SmallVector ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); ConcatOps[0] = VecIn1; ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); VecIn2 = SDValue(); } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { if (!TLI.isExtractSubvectorCheap(VT, NumElems)) return SDValue(); if (!VecIn2.getNode()) { // If we only have one input vector, and it's twice the size of the // output, split it in two. VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, DAG.getConstant(NumElems, DL, IdxTy)); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); // Since we now have shorter input vectors, adjust the offset of the // second vector's start. Vec2Offset = NumElems; } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { // VecIn1 is wider than the output, and we have another, possibly // smaller input. Pad the smaller input with undefs, shuffle at the // input vector width, and extract the output. // The shuffle type is different than VT, so check legality again. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) return SDValue(); - if (InVT1 != InVT2) + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } ShuffleNumElems = NumElems * 2; } else { // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider // than VecIn1. We can't handle this for now - this case will disappear // when we start sorting the vectors by type. return SDValue(); } } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. // TODO: Move this check upwards, so that if we have bad type // mismatches, we don't create any DAG nodes. return SDValue(); } } // Initialize mask to undef. SmallVector Mask(ShuffleNumElems, -1); // Only need to run up to the number of elements actually used, not the // total number of elements in the shuffle - if we are shuffling a wider // vector, the high lanes should be set to undef. for (unsigned i = 0; i != NumElems; ++i) { if (VectorMask[i] <= 0) continue; unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); if (VectorMask[i] == (int)LeftIdx) { Mask[i] = ExtIndex; } else if (VectorMask[i] == (int)LeftIdx + 1) { Mask[i] = Vec2Offset + ExtIndex; } } // The type the input vectors may have changed above. InVT1 = VecIn1.getValueType(); // If we already have a VecIn2, it should have the same type as VecIn1. // If we don't, get an undef/zero vector of the appropriate type. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); if (ShuffleNumElems > NumElems) Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); return Shuffle; } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If the types of the vectors we're extracting from allow it, // turn this into a vector_shuffle node. SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) return SDValue(); // May only combine to shuffle after legalize if shuffle is legal. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); bool UsesZeroVector = false; unsigned NumElems = N->getNumOperands(); // Record, for each element of the newly built vector, which input vector // that element comes from. -1 stands for undef, 0 for the zero vector, // and positive values for the input vectors. // VectorMask maps each element to its vector number, and VecIn maps vector // numbers to their initial SDValues. SmallVector VectorMask(NumElems, -1); SmallVector VecIn; VecIn.push_back(SDValue()); for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = N->getOperand(i); if (Op.isUndef()) continue; // See if we can use a blend with a zero vector. // TODO: Should we generalize this to a blend with an arbitrary constant // vector? if (isNullConstant(Op) || isNullFPConstant(Op)) { UsesZeroVector = true; VectorMask[i] = 0; continue; } // Not an undef or zero. If the input is something other than an // EXTRACT_VECTOR_ELT with a constant index, bail out. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Op.getOperand(1))) return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); // All inputs must have the same element type as the output. if (VT.getVectorElementType() != ExtractedFromVec.getValueType().getVectorElementType()) return SDValue(); // Have we seen this input vector before? // The vectors are expected to be tiny (usually 1 or 2 elements), so using // a map back from SDValues to numbers isn't worth it. unsigned Idx = std::distance( VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); if (Idx == VecIn.size()) VecIn.push_back(ExtractedFromVec); VectorMask[i] = Idx; } // If we didn't find at least one input vector, bail out. if (VecIn.size() < 2) return SDValue(); // TODO: We want to sort the vectors by descending length, so that adjacent // pairs have similar length, and the longer vector is always first in the // pair. // TODO: Should this fire if some of the input vectors has illegal type (like // it does now), or should we let legalization run its course first? // Shuffle phase: // Take pairs of vectors, and shuffle them so that the result has elements // from these vectors in the correct places. // For example, given: // t10: i32 = extract_vector_elt t1, Constant:i64<0> // t11: i32 = extract_vector_elt t2, Constant:i64<0> // t12: i32 = extract_vector_elt t3, Constant:i64<0> // t13: i32 = extract_vector_elt t1, Constant:i64<1> // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 // We will generate: // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 // t21: v4i32 = vector_shuffle t3, undef SmallVector Shuffles; for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { unsigned LeftIdx = 2 * In + 1; SDValue VecLeft = VecIn[LeftIdx]; SDValue VecRight = (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, VecRight, LeftIdx)) Shuffles.push_back(Shuffle); else return SDValue(); } // If we need the zero vector as an "ingredient" in the blend tree, add it // to the list of shuffles. if (UsesZeroVector) Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) : DAG.getConstantFP(0.0, DL, VT)); // If we only have one shuffle, we're done. if (Shuffles.size() == 1) return Shuffles[0]; // Update the vector mask to point to the post-shuffle vectors. for (int &Vec : VectorMask) if (Vec == 0) Vec = Shuffles.size() - 1; else Vec = (Vec - 1) / 2; // More than one shuffle. Generate a binary tree of blends, e.g. if from // the previous step we got the set of shuffles t10, t11, t12, t13, we will // generate: // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 // t11: v8i32 = vector_shuffle t3, t4 // t12: v8i32 = vector_shuffle t5, t6 // t13: v8i32 = vector_shuffle t7, t8 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 // t21: v8i32 = vector_shuffle t12, t13 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 // Make sure the initial size of the shuffle list is even. if (Shuffles.size() % 2) Shuffles.push_back(DAG.getUNDEF(VT)); for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { if (CurSize % 2) { Shuffles[CurSize] = DAG.getUNDEF(VT); CurSize++; } for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { int Left = 2 * In; int Right = 2 * In + 1; SmallVector Mask(NumElems, -1); for (unsigned i = 0; i != NumElems; ++i) { if (VectorMask[i] == Left) { Mask[i] = i; VectorMask[i] = In; } else if (VectorMask[i] == Right) { Mask[i] = i + NumElems; VectorMask[i] = In; } } Shuffles[In] = DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); } } return Shuffles[0]; } SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); // A vector built entirely of undefs is undef. if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; if (SDValue V = reduceBuildVecToShuffle(N)) return V; return SDValue(); } static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT OpVT = N->getOperand(0).getValueType(); // If the operands are legal vectors, leave them alone. if (TLI.isTypeLegal(OpVT)) return SDValue(); SDLoc DL(N); EVT VT = N->getValueType(0); SmallVector Ops; EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); // Keep track of what we encounter. bool AnyInteger = false; bool AnyFP = false; for (const SDValue &Op : N->ops()) { if (ISD::BITCAST == Op.getOpcode() && !Op.getOperand(0).getValueType().isVector()) Ops.push_back(Op.getOperand(0)); else if (ISD::UNDEF == Op.getOpcode()) Ops.push_back(ScalarUndef); else return SDValue(); // Note whether we encounter an integer or floating point scalar. // If it's neither, bail out, it could be something weird like x86mmx. EVT LastOpVT = Ops.back().getValueType(); if (LastOpVT.isFloatingPoint()) AnyFP = true; else if (LastOpVT.isInteger()) AnyInteger = true; else return SDValue(); } // If any of the operands is a floating point scalar bitcast to a vector, // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. if (AnyFP) { SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); if (AnyInteger) { for (SDValue &Op : Ops) { if (Op.getValueType() == SVT) continue; if (Op.isUndef()) Op = ScalarUndef; else Op = DAG.getBitcast(SVT, Op); } } } EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, VT.getSizeInBits() / SVT.getSizeInBits()); return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at // most two distinct vectors the same size as the result, attempt to turn this // into a legal shuffle. static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT OpVT = N->getOperand(0).getValueType(); int NumElts = VT.getVectorNumElements(); int NumOpElts = OpVT.getVectorNumElements(); SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); SmallVector Mask; for (SDValue Op : N->ops()) { // Peek through any bitcast. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. if (Op.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) return SDValue(); // What vector are we extracting the subvector from and at what index? SDValue ExtVec = Op.getOperand(0); // We want the EVT of the original extraction to correctly scale the // extraction index. EVT ExtVT = ExtVec.getValueType(); // Peek through any bitcast. while (ExtVec.getOpcode() == ISD::BITCAST) ExtVec = ExtVec.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. if (ExtVec.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } if (!isa(Op.getOperand(1))) return SDValue(); int ExtIdx = cast(Op.getOperand(1))->getZExtValue(); // Ensure that we are extracting a subvector from a vector the same // size as the result. if (ExtVT.getSizeInBits() != VT.getSizeInBits()) return SDValue(); // Scale the subvector index to account for any bitcast. int NumExtElts = ExtVT.getVectorNumElements(); if (0 == (NumExtElts % NumElts)) ExtIdx /= (NumExtElts / NumElts); else if (0 == (NumElts % NumExtElts)) ExtIdx *= (NumElts / NumExtElts); else return SDValue(); // At most we can reference 2 inputs in the final shuffle. if (SV0.isUndef() || SV0 == ExtVec) { SV0 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx); } else if (SV1.isUndef() || SV1 == ExtVec) { SV1 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx + NumElts); } else { return SDValue(); } } if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) return SDValue(); return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), DAG.getBitcast(VT, SV1), Mask); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) return N->getOperand(0); // Check if all of the operands are undefs. EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); // Optimize concat_vectors where all but the first of the vectors are undef. if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { return Op.isUndef(); })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). if (In->getOpcode() == ISD::BITCAST && !In->getOperand(0)->getValueType(0).isVector()) { SDValue Scalar = In->getOperand(0); // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) if (Scalar->getOpcode() == ISD::TRUNCATE && !TLI.isTypeLegal(Scalar.getValueType()) && TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) Scalar = Scalar->getOperand(0); EVT SclTy = Scalar->getValueType(0); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) return SDValue(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VT.getSizeInBits() / SclTy.getSizeInBits()); if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) return SDValue(); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); return DAG.getBitcast(VT, Res); } } // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) auto IsBuildVectorOrUndef = [](const SDValue &Op) { return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); }; if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { SmallVector Opnds; EVT SVT = VT.getScalarType(); EVT MinVT = SVT; if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different // operand types. Get the smallest type and truncate all operands to it. bool FoundMinVT = false; for (const SDValue &Op : N->ops()) if (ISD::BUILD_VECTOR == Op.getOpcode()) { EVT OpSVT = Op.getOperand(0)->getValueType(0); MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; FoundMinVT = true; } assert(FoundMinVT && "Concat vector type mismatch"); } for (const SDValue &Op : N->ops()) { EVT OpVT = Op.getValueType(); unsigned NumElts = OpVT.getVectorNumElements(); if (ISD::UNDEF == Op.getOpcode()) Opnds.append(NumElts, DAG.getUNDEF(MinVT)); if (ISD::BUILD_VECTOR == Op.getOpcode()) { if (SVT.isFloatingPoint()) { assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); } else { for (unsigned i = 0; i != NumElts; ++i) Opnds.push_back( DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); } } } assert(VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"); return DAG.getBuildVector(VT, SDLoc(N), Opnds); } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that // place the incoming vectors at the exact same location. SDValue SingleSource = SDValue(); unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); if (Op.isUndef()) continue; // Check if this is the identity extract: if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) return SDValue(); // Find the single incoming vector for the extract_subvector. if (SingleSource.getNode()) { if (Op.getOperand(0) != SingleSource) return SDValue(); } else { SingleSource = Op.getOperand(0); // Check the source type is the same as the type of the result. // If not, this concat may extend the vector, so we can not // optimize it away. if (SingleSource.getValueType() != N->getValueType(0)) return SDValue(); } unsigned IdentityIndex = i * PartNumElem; ConstantSDNode *CS = dyn_cast(Op.getOperand(1)); // The extract index must be constant. if (!CS) return SDValue(); // Check that we are reading from the identity index. if (CS->getZExtValue() != IdentityIndex) return SDValue(); } if (SingleSource.getNode()) return SingleSource; return SDValue(); } SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); if (V->getOpcode() == ISD::CONCAT_VECTORS) { // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible // Only operand 0 is checked as 'concat' assumes all inputs of the same // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."); return V->getOperand(Idx / NumElems); } // Skip bitcasting if (V->getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); EVT SmallVT = V->getOperand(1).getValueType(); if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); // Only handle cases where both indexes are constants with the same type. ConstantSDNode *ExtIdx = dyn_cast(N->getOperand(1)); ConstantSDNode *InsIdx = dyn_cast(V->getOperand(2)); if (InsIdx && ExtIdx && InsIdx->getValueType(0).getSizeInBits() <= 64 && ExtIdx->getValueType(0).getSizeInBits() <= 64) { // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) // Into: // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) return DAG.getBitcast(NVT, V->getOperand(1)); return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), N->getOperand(1)); } } return SDValue(); } static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, SDValue V, SelectionDAG &DAG) { SDLoc DL(V); EVT VT = V.getValueType(); switch (V.getOpcode()) { default: return V; case ISD::CONCAT_VECTORS: { EVT OpVT = V->getOperand(0).getValueType(); int OpSize = OpVT.getVectorNumElements(); SmallBitVector OpUsedElements(OpSize, false); bool FoundSimplification = false; SmallVector NewOps; NewOps.reserve(V->getNumOperands()); for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { SDValue Op = V->getOperand(i); bool OpUsed = false; for (int j = 0; j < OpSize; ++j) if (UsedElements[i * OpSize + j]) { OpUsedElements[j] = true; OpUsed = true; } NewOps.push_back( OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) : DAG.getUNDEF(OpVT)); FoundSimplification |= Op == NewOps.back(); OpUsedElements.reset(); } if (FoundSimplification) V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); return V; } case ISD::INSERT_SUBVECTOR: { SDValue BaseV = V->getOperand(0); SDValue SubV = V->getOperand(1); auto *IdxN = dyn_cast(V->getOperand(2)); if (!IdxN) return V; int SubSize = SubV.getValueType().getVectorNumElements(); int Idx = IdxN->getZExtValue(); bool SubVectorUsed = false; SmallBitVector SubUsedElements(SubSize, false); for (int i = 0; i < SubSize; ++i) if (UsedElements[i + Idx]) { SubVectorUsed = true; SubUsedElements[i] = true; UsedElements[i + Idx] = false; } // Now recurse on both the base and sub vectors. SDValue SimplifiedSubV = SubVectorUsed ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) : DAG.getUNDEF(SubV.getValueType()); SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); return V; } } } static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, SDValue N1, SelectionDAG &DAG) { EVT VT = SVN->getValueType(0); int NumElts = VT.getVectorNumElements(); SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); for (int M : SVN->getMask()) if (M >= 0 && M < NumElts) N0UsedElements[M] = true; else if (M >= NumElts) N1UsedElements[M - NumElts] = true; SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); if (S0 == N0 && S1 == N1) return SDValue(); return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ShuffleVectorSDNode *SVN = cast(N); SmallVector Ops; EVT ConcatVT = N0.getOperand(0).getValueType(); unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; // Special case: shuffle(concat(A,B)) can be more efficiently represented // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high // half vector elements. if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); N1 = DAG.getUNDEF(ConcatVT); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); } // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; bool AllUndef = true, NoUndef = true; for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { if (SVN->getMaskElt(J) >= 0) AllUndef = false; else NoUndef = false; } if (NoUndef) { if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); for (unsigned J = 1; J != NumElemsPerConcat; ++J) if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) return SDValue(); unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; if (FirstElt < N0.getNumOperands()) Ops.push_back(N0.getOperand(FirstElt)); else Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); } else if (AllUndef) { Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); } else { // Mixed with general masks and undefs, can't do optimization. return SDValue(); } } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. // // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always // a simplification in some sense, but it isn't appropriate in general: some // BUILD_VECTORs are substantially cheaper than others. The general case // of a BUILD_VECTOR requires inserting each element individually (or // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of // all constants is a single constant pool load. A BUILD_VECTOR where each // element is identical is a splat. A BUILD_VECTOR where most of the operands // are undef lowers to a small number of element insertions. // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't // fold shuffles if the resulting BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT = SVN->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = SVN->getOperand(0); SDValue N1 = SVN->getOperand(1); if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) return SDValue(); if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) return SDValue(); } SmallVector Ops; SmallSet DuplicateOps; for (int M : SVN->getMask()) { SDValue Op = DAG.getUNDEF(VT.getScalarType()); if (M >= 0) { int Idx = M < (int)NumElts ? M : M - NumElts; SDValue &S = (M < (int)NumElts ? N0 : N1); if (S.getOpcode() == ISD::BUILD_VECTOR) { Op = S.getOperand(Idx); } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { if (Idx == 0) Op = S.getOperand(0); } else { // Operand can't be combined - bail out. return SDValue(); } } // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is // fine, but it's likely to generate low-quality code if the target can't // reconstruct an appropriate shuffle. if (!Op.isUndef() && !isa(Op) && !isa(Op)) if (!DuplicateOps.insert(Op).second) return SDValue(); Ops.push_back(Op); } // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. EVT SVT = VT.getScalarType(); if (SVT.isInteger()) for (SDValue &Op : Ops) SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); if (SVT != VT.getScalarType()) for (SDValue &Op : Ops) Op = TLI.isZExtFree(Op.getValueType(), SVT) ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef if (N0.isUndef() && N1.isUndef()) return DAG.getUNDEF(VT); ShuffleVectorSDNode *SVN = cast(N); // Canonicalize shuffle v, v -> v, undef if (N0 == N1) { SmallVector NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); } // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N0.isUndef()) return DAG.getCommutedVectorShuffle(*SVN); // Remove references to rhs if it is undef if (N1.isUndef()) { bool Changed = false; SmallVector NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= (int)NumElts) { Idx = -1; Changed = true; } NewMask.push_back(Idx); } if (Changed) return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. if (V->getOpcode() == ISD::BITCAST) { SDValue ConvInput = V->getOperand(0); if (ConvInput.getValueType().isVector() && ConvInput.getValueType().getVectorNumElements() == NumElts) V = ConvInput.getNode(); } if (V->getOpcode() == ISD::BUILD_VECTOR) { assert(V->getNumOperands() == NumElts && "BUILD_VECTOR has wrong number of operands"); SDValue Base; bool AllSame = true; for (unsigned i = 0; i != NumElts; ++i) { if (!V->getOperand(i).isUndef()) { Base = V->getOperand(i); break; } } // Splat of , return if (!Base.getNode()) return N0; for (unsigned i = 0; i != NumElts; ++i) { if (V->getOperand(i) != Base) { AllSame = false; break; } } // Splat of , return if (AllSame) return N0; // Canonicalize any other splat as a build_vector. const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); SmallVector Ops(NumElts, Splatted); SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) NewBV = DAG.getBitcast(VT, NewBV); return NewBV; } } // There are various patterns used to build up a vector from smaller vectors, // subvectors, or elements. Scan chains of these and replace unused insertions // or components with undef. if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) return S; if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.isUndef() || (N1.getOpcode() == ISD::CONCAT_VECTORS && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { if (SDValue V = partitionShuffleOfConcats(N, DAG)) return V; } // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) return Res; // If this shuffle only has a single input that is a bitcasted shuffle, // attempt to merge the 2 shuffles and suitably bitcast the inputs/output // back to their original types. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { // Peek through the bitcast only if there is one user. SDValue BC0 = N0; while (BC0.getOpcode() == ISD::BITCAST) { if (!BC0.hasOneUse()) break; BC0 = BC0.getOperand(0); } auto ScaleShuffleMask = [](ArrayRef Mask, int Scale) { if (Scale == 1) return SmallVector(Mask.begin(), Mask.end()); SmallVector NewMask; for (int M : Mask) for (int s = 0; s != Scale; ++s) NewMask.push_back(M < 0 ? -1 : Scale * M + s); return NewMask; }; if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { EVT SVT = VT.getScalarType(); EVT InnerVT = BC0->getValueType(0); EVT InnerSVT = InnerVT.getScalarType(); // Determine which shuffle works with the smaller scalar type. EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; EVT ScaleSVT = ScaleVT.getScalarType(); if (TLI.isTypeLegal(ScaleVT) && 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); // Scale the shuffle masks to the smaller scalar type. ShuffleVectorSDNode *InnerSVN = cast(BC0); SmallVector InnerMask = ScaleShuffleMask(InnerSVN->getMask(), InnerScale); SmallVector OuterMask = ScaleShuffleMask(SVN->getMask(), OuterScale); // Merge the shuffle masks. SmallVector NewMask; for (int M : OuterMask) NewMask.push_back(M < 0 ? -1 : InnerMask[M]); // Test for shuffle mask legality over both commutations. SDValue SV0 = BC0->getOperand(0); SDValue SV1 = BC0->getOperand(1); bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); if (!LegalMask) { std::swap(SV0, SV1); ShuffleVectorSDNode::commuteMask(NewMask); LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); } if (LegalMask) { SV0 = DAG.getBitcast(ScaleVT, SV0); SV1 = DAG.getBitcast(ScaleVT, SV1); return DAG.getBitcast( VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); } } } } // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(N1->getOperand(0).getValueType() == VT && "Shuffle types don't match"); SDValue SV0 = N1->getOperand(0); SDValue SV1 = N1->getOperand(1); bool HasSameOp0 = N0 == SV0; bool IsSV1Undef = SV1.isUndef(); if (HasSameOp0 || IsSV1Undef || N0 == SV1) // Commute the operands of this shuffle so that next rule // will trigger. return DAG.getCommutedVectorShuffle(*SVN); } // Try to fold according to rules: // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. // Only fold if this shuffle is the only user of the other shuffle. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast(N0); // Don't try to fold splats; they're likely to simplify somehow, or they // might be free. if (OtherSV->isSplat()) return SDValue(); // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); SDValue SV0, SV1; SmallVector Mask; // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx < 0) { // Propagate Undef. Mask.push_back(Idx); continue; } SDValue CurrentVec; if (Idx < (int)NumElts) { // This shuffle index refers to the inner shuffle N0. Lookup the inner // shuffle mask to identify which vector is actually referenced. Idx = OtherSV->getMaskElt(Idx); if (Idx < 0) { // Propagate Undef. Mask.push_back(Idx); continue; } CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) : OtherSV->getOperand(1); } else { // This shuffle index references an element within N1. CurrentVec = N1; } // Simple case where 'CurrentVec' is UNDEF. if (CurrentVec.isUndef()) { Mask.push_back(-1); continue; } // Canonicalize the shuffle index. We don't know yet if CurrentVec // will be the first or second operand of the combined shuffle. Idx = Idx % NumElts; if (!SV0.getNode() || SV0 == CurrentVec) { // Ok. CurrentVec is the left hand side. // Update the mask accordingly. SV0 = CurrentVec; Mask.push_back(Idx); continue; } // Bail out if we cannot convert the shuffle pair into a single shuffle. if (SV1.getNode() && SV1 != CurrentVec) return SDValue(); // Ok. CurrentVec is the right hand side. // Update the mask accordingly. SV1 = CurrentVec; Mask.push_back(Idx + NumElts); } // Check if all indices in Mask are Undef. In case, propagate Undef. bool isUndefMask = true; for (unsigned i = 0; i != NumElts && isUndefMask; ++i) isUndefMask &= Mask[i] < 0; if (isUndefMask) return DAG.getUNDEF(VT); if (!SV0.getNode()) SV0 = DAG.getUNDEF(VT); if (!SV1.getNode()) SV1 = DAG.getUNDEF(VT); // Avoid introducing shuffles with illegal mask. if (!TLI.isShuffleMaskLegal(Mask, VT)) { ShuffleVectorSDNode::commuteMask(Mask); if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) std::swap(SV0, SV1); } // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } return SDValue(); } SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SDValue InVal = N->getOperand(0); EVT VT = N->getValueType(0); // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); // FIXME: We could support implicit truncation if the shuffle can be // scaled to a smaller vector scalar type. ConstantSDNode *C0 = dyn_cast(EltNo); if (C0 && VT == InVec.getValueType() && VT.getScalarType() == InVal.getValueType()) { SmallVector NewMask(VT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; if (TLI.isShuffleMaskLegal(NewMask, VT)) return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), NewMask); } } return SDValue(); } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(1).getValueType() == N1.getValueType() && N0.getOperand(2) == N2) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), N1, N2); if (N0.getValueType() != N1.getValueType()) return SDValue(); // If the input vector is a concatenation, and the insert replaces // one of the halves, we can optimize into a single concat_vectors. if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast(N2)->getAPIntValue(); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) if (InsIdx == 0) return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1, N0.getOperand(1)); // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors X, Z) if (InsIdx == VT.getVectorNumElements() / 2) return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0), N1); } return SDValue(); } SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { SDValue N0 = N->getOperand(0); // fold (fp_to_fp16 (fp16_to_fp op)) -> op if (N0->getOpcode() == ISD::FP16_TO_FP) return N0->getOperand(0); return SDValue(); } SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) if (N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), N0.getOperand(0)); } } return SDValue(); } /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDLoc DL(N); // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. if (LegalOperations) return SDValue(); if (N->getOpcode() != ISD::AND) return SDValue(); if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); if (RHS.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); EVT RVT = RHS.getValueType(); unsigned NumElts = RHS.getNumOperands(); // Attempt to create a valid clear mask, splitting the mask into // sub elements and checking to see if each is // all zeros or all ones - suitable for shuffle masking. auto BuildClearMask = [&](int Split) { int NumSubElts = NumElts * Split; int NumSubBits = RVT.getScalarSizeInBits() / Split; SmallVector Indices; for (int i = 0; i != NumSubElts; ++i) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); if (Elt.isUndef()) { Indices.push_back(-1); continue; } APInt Bits; if (isa(Elt)) Bits = cast(Elt)->getAPIntValue(); else if (isa(Elt)) Bits = cast(Elt)->getValueAPF().bitcastToAPInt(); else return SDValue(); // Extract the sub element from the constant bit mask. if (DAG.getDataLayout().isBigEndian()) { Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); } else { Bits = Bits.lshr(SubIdx * NumSubBits); } if (Split > 1) Bits = Bits.trunc(NumSubBits); if (Bits.isAllOnesValue()) Indices.push_back(i); else if (Bits == 0) Indices.push_back(i + NumSubElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); SDValue Zero = DAG.getConstant(0, DL, ClearVT); return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, DAG.getBitcast(ClearVT, LHS), Zero, Indices)); }; // Determine maximum split level (byte level masking). int MaxSplit = 1; if (RVT.getScalarSizeInBits() % 8 == 0) MaxSplit = RVT.getScalarSizeInBits() / 8; for (int Split = 1; Split <= MaxSplit; ++Split) if (RVT.getScalarSizeInBits() % Split == 0) if (SDValue S = BuildClearMask(Split)) return S; return SDValue(); } /// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Ops[] = {LHS, RHS}; // See if we can constant fold the vector operation. if (SDValue Fold = DAG.FoldConstantVectorArithmetic( N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) return Fold; // Try to convert a constant mask AND into a shuffle clear mask. if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). if (LegalTypes && isa(LHS) && isa(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef()) { ShuffleVectorSDNode *SVN0 = cast(LHS); ShuffleVectorSDNode *SVN1 = cast(RHS); if (SVN0->getMask().equals(SVN1->getMask())) { EVT VT = N->getValueType(0); SDValue UndefVector = LHS.getOperand(1); SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS.getOperand(0), RHS.getOperand(0), N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, SVN0->getMask()); } } return SDValue(); } SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2) { assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, cast(N0.getOperand(2))->get()); // If we got a simplified select_cc node back from SimplifySelectCC, then // break it down into a new SETCC node, and a new SELECT node, and then return // the SELECT node, since we were called with a SELECT node. if (SCC.getNode()) { // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorklist(SETCC.getNode()); return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, SCC.getOperand(2), SCC.getOperand(3)); } return SCC; } return SDValue(); } /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values /// being selected between, see if we can simplify the select. Callers of this /// should assume that TheSelect is deleted if this returns true. As such, they /// should return the appropriate thing (e.g. the node) back to the top-level of /// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) SDValue Sqrt = RHS; ISD::CondCode CC; SDValue CmpLHS; const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { CC = dyn_cast(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { CC = dyn_cast(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } } if (Zero && Zero->isZero() && Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || CC == ISD::SETULT || CC == ISD::SETLT)) { // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) CombineTo(TheSelect, Sqrt); return true; } } } // Cannot simplify select with vector condition if (TheSelect->getOperand(0).getValueType().isVector()) return false; // If this is a select from two identical things, try to pull the operation // through the select. if (LHS.getOpcode() != RHS.getOpcode() || !LHS.hasOneUse() || !RHS.hasOneUse()) return false; // If this is a load and the token chain is identical, replace the select // of two loads with a load through a select of the address to load from. // This triggers in things like "select bool X, 10.0, 123.0" after the FP // constants have been dropped into the constant pool. if (LHS.getOpcode() == ISD::LOAD) { LoadSDNode *LLD = cast(LHS); LoadSDNode *RLD = cast(RHS); // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. LLD->isVolatile() || RLD->isVolatile() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || // If this is an EXTLOAD, the VT's must match. LLD->getMemoryVT() != RLD->getMemoryVT() || // If this is an EXTLOAD, the kind of extension must match. (LLD->getExtensionType() != RLD->getExtensionType() && // The only exception is if one of the extensions is anyext. LLD->getExtensionType() != ISD::EXTLOAD && RLD->getExtensionType() != ISD::EXTLOAD) || // FIXME: this discards src value information. This is // over-conservative. It would be beneficial to be able to remember // both potential memory locations. Since we are discarding // src value info, don't do the transformation if the memory // locations are not in the default address space. LLD->getPointerInfo().getAddrSpace() != 0 || RLD->getPointerInfo().getAddrSpace() != 0 || !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), LLD->getBasePtr().getValueType())) return false; // Check that the select condition doesn't reach either load. If so, // folding this will induce a cycle into the DAG. If not, this is safe to // xform, so create a select of the addresses. SDValue Addr; if (TheSelect->getOpcode() == ISD::SELECT) { SDNode *CondNode = TheSelect->getOperand(0).getNode(); if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) return false; // The loads must not depend on one another. if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; Addr = DAG.getSelect(SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), RLD->getBasePtr()); } else { // Otherwise SELECT_CC SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); if ((LLD->hasAnyUseOfValue(1) && (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || (RLD->hasAnyUseOfValue(1) && (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), TheSelect->getOperand(1), LLD->getBasePtr(), RLD->getBasePtr(), TheSelect->getOperand(4)); } SDValue Load; // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; if (!RLD->isDereferenceable()) MMOFlags &= ~MachineMemOperand::MODereferenceable; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { // FIXME: Discards pointer and AA info. Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), LLD->getChain(), Addr, MachinePointerInfo(), Alignment, MMOFlags); } else { // FIXME: Discards pointer and AA info. Load = DAG.getExtLoad( LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); } // Users of the select now use the result of the load. CombineTo(TheSelect, Load); // Users of the old loads now use the new load's chain. We know the // old-load value is dead now. CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); return true; } return false; } /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and /// bitwise 'and'. SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { // If this is a select where the false operand is zero and the compare is a // check of the sign bit, see if we can perform the "gzip trick": // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); if (!isNullConstant(N3) || !XType.bitsGE(AType)) return SDValue(); // If the comparison is testing for a positive value, we have to invert // the sign bit mask, so only do that transform if the target has a bitwise // 'and not' instruction (the invert is free). if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { // (X > -1) ? A : 0 // (X > 0) ? X : 0 <-- This is canonical signed max. if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) return SDValue(); } else if (CC == ISD::SETLT) { // (X < 0) ? A : 0 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) return SDValue(); } else { return SDValue(); } // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit // constant. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); auto *N2C = dyn_cast(N2.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); AddToWorklist(Shift.getNode()); } if (CC == ISD::SETGT) Shift = DAG.getNOT(DL, Shift, AType); return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); AddToWorklist(Shift.getNode()); } if (CC == ISD::SETGT) Shift = DAG.getNOT(DL, Shift, AType); return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode())) { // fold select_cc true, x, y -> x // fold select_cc false, x, y -> y return !SCCC->isNullValue() ? N2 : N3; } // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast(N1)) { // Allow either -0.0 or 0.0 if (CFP->isZero()) { // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs if ((CC == ISD::SETGE || CC == ISD::SETGT) && N0 == N2 && N3.getOpcode() == ISD::FNEG && N2 == N3.getOperand(0)) return DAG.getNode(ISD::FABS, DL, VT, N0); // select (setl[te] X, +/-0.0), fneg(X), X -> fabs if ((CC == ISD::SETLT || CC == ISD::SETLE) && N0 == N3 && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N3) return DAG.getNode(ISD::FABS, DL, VT, N3); } } // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 // in it. This is a win when the constant is not otherwise available because // it replaces two constant pool loads with one. We only do this if the FP // type is known to be legal, because if it isn't, then we are before legalize // types an we want the other legalization to happen first (e.g. to avoid // messing with soft float) and if the ConstantFP is not legal, because if // it is legal, we may not need to store the FP constant in a constant pool. if (ConstantFPSDNode *TV = dyn_cast(N2)) if (ConstantFPSDNode *FV = dyn_cast(N3)) { if (TLI.isTypeLegal(N2.getValueType()) && (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != TargetLowering::Legal && !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && // If both constants have multiple uses, then we won't need to do an // extra load, they are likely around in registers for other users. (TV->hasOneUse() || FV->hasOneUse())) { Constant *Elts[] = { const_cast(FV->getConstantFPValue()), const_cast(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); const DataLayout &TD = DAG.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast(CPIdx)->getAlignment(); // Get the offsets to the 0 and 1 element of the array so that we can // select between them. SDValue Zero = DAG.getIntPtrConstant(0, DL); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorklist(Cond.getNode()); SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); AddToWorklist(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorklist(CPIdx.getNode()); return DAG.getLoad( TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); } } if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) return V; // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) // where y is has a single bit set. // A plaintext description would be, we can turn the SELECT_CC into an AND // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); ConstantSDNode *ConstAndRHS = dyn_cast(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); } } // fold select C, 16, 0 -> shl C, 4 if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && TLI.getBooleanContents(N0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. if (NotExtCompare && N2C->isOne()) return SDValue(); // Get a SetCC of the condition // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); if (N2.getValueType().bitsLT(SCC.getValueType())) Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType()); else Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } else { SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } AddToWorklist(SCC.getNode()); AddToWorklist(Temp.getNode()); if (N2C->isOne()) return Temp; // shl setcc result by log2 n2c return DAG.getNode( ISD::SHL, DL, N2.getValueType(), Temp, DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } } // Check to see if this is an integer abs. // select_cc setg[te] X, 0, X, -X -> // select_cc setgt X, -1, X, -X -> // select_cc setl[te] X, 0, -X, X -> // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) if (N1C) { ConstantSDNode *SubC = nullptr; if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || (N1C->isAllOnesValue() && CC == ISD::SETGT)) && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) SubC = dyn_cast(N3.getOperand(0)); else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || (N1C->isOne() && CC == ISD::SETLT)) && N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) SubC = dyn_cast(N2.getOperand(0)); EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { SDLoc DL(N0); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, DL, XType, N0, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue ValueOnZero = N2; SDValue Count = N3; // If the condition is NE instead of E, swap the operands. if (CC == ISD::SETNE) std::swap(ValueOnZero, Count); // Check if the value on zero is a constant equal to the bits in the type. if (auto *ValueOnZeroC = dyn_cast(ValueOnZero)) { if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { // If the other operand is cttz/cttz_zero_undef of N0, and cttz is // legal, combine to just cttz. if ((Count.getOpcode() == ISD::CTTZ || Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && N0 == Count.getOperand(0) && (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) return DAG.getNode(ISD::CTTZ, DL, VT, N0); // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is // legal, combine to just ctlz. if ((Count.getOpcode() == ISD::CTLZ || Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && N0 == Count.getOperand(0) && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) return DAG.getNode(ISD::CTLZ, DL, VT, N0); } } } return SDValue(); } /// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } /// Given an ISD::SDIV node expressing a divide by constant, return /// a DAG expression to select that will generate the same value by multiplying /// by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); // Avoid division by zero. if (C->isNullValue()) return SDValue(); std::vector Built; SDValue S = TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); for (SDNode *N : Built) AddToWorklist(N); return S; } /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a /// DAG expression that will generate the same value by right shifting. SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); // Avoid division by zero. if (C->isNullValue()) return SDValue(); std::vector Built; SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); for (SDNode *N : Built) AddToWorklist(N); return S; } /// Given an ISD::UDIV node expressing a divide by constant, return a DAG /// expression that will generate the same value by multiplying by a magic /// number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); // Avoid division by zero. if (C->isNullValue()) return SDValue(); std::vector Built; SDValue S = TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); for (SDNode *N : Built) AddToWorklist(N); return S; } /// Determines the LogBase2 value for a non-null input value using the /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { EVT VT = V.getValueType(); unsigned EltBits = VT.getScalarSizeInBits(); SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); return LogBase2; } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal, we need to find the zero of the function: /// F(X) = A X - 1 [which has a zero at X = 1/A] /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); // TODO: Handle half and/or extended types? EVT VT = Op.getValueType(); if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. MachineFunction &MF = DAG.getMachineFunction(); int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); if (Enabled == TLI.ReciprocalEstimate::Disabled) return SDValue(); // Estimates may be explicitly enabled for this type with a custom number of // refinement steps. int Iterations = TLI.getDivRefinementSteps(VT, MF); if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); if (Iterations) { EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); // Newton iterations: Est = Est + Est (1 - Arg * Est) for (int i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } } return Est; } return SDValue(); } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal sqrt, we need to find the zero of the function: /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); AddToWorklist(HalfArg.getNode()); HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); AddToWorklist(NewEst.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) { Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); AddToWorklist(Est.getNode()); } return Est; } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal sqrt, we need to find the zero of the function: /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); // This routine must enter the loop below to work correctly // when (Reciprocal == false). assert(Iterations > 0); // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); AddToWorklist(AE.getNode()); SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); AddToWorklist(AEE.getNode()); SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) // (notice a common subexpression) SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); } else { // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); AddToWorklist(Est.getNode()); } return Est; } /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); // TODO: Handle half and/or extended types? EVT VT = Op.getValueType(); if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. MachineFunction &MF = DAG.getMachineFunction(); int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); if (Enabled == TLI.ReciprocalEstimate::Disabled) return SDValue(); // Estimates may be explicitly enabled for this type with a custom number of // refinement steps. int Iterations = TLI.getSqrtRefinementSteps(VT, MF); bool UseOneConstNR = false; if (SDValue Est = TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, Reciprocal)) { AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); if (!Reciprocal) { // Unfortunately, Est is now NaN if the input was exactly 0.0. // Select out this case and force the answer to 0.0. EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); EVT CCVT = getSetCCResultType(VT); SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp, FPZero, Est); AddToWorklist(Est.getNode()); } } return Est; } return SDValue(); } SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { return buildSqrtEstimateImpl(Op, Flags, true); } SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { return buildSqrtEstimateImpl(Op, Flags, false); } /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { if (ConstantSDNode *C = dyn_cast(Base.getOperand(1))) { Base = Base.getOperand(0); Offset += C->getZExtValue(); } } // Return the underlying GlobalValue, and update the Offset. Return false // for GlobalAddressSDNode since the same GlobalAddress may be represented // by multiple nodes with different offsets. if (GlobalAddressSDNode *G = dyn_cast(Base)) { GV = G->getGlobal(); Offset += G->getOffset(); return false; } // Return the underlying Constant value, and update the Offset. Return false // for ConstantSDNodes since the same constant pool entry may be represented // by multiple nodes with different offsets. if (ConstantPoolSDNode *C = dyn_cast(Base)) { CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() : (const void *)C->getConstVal(); Offset += C->getOffset(); return false; } // If it's any of the following then it can't alias with anything but itself. return isa(Base); } /// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. if (Op0->getBasePtr() == Op1->getBasePtr()) return true; // If they are both volatile then they cannot be reordered. if (Op0->isVolatile() && Op1->isVolatile()) return true; // If one operation reads from invariant memory, and the other may store, they // cannot alias. These should really be checking the equivalent of mayWrite, // but it only matters for memory nodes other than load /store. if (Op0->isInvariant() && Op1->writeMem()) return false; if (Op1->isInvariant() && Op0->writeMem()) return false; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; const void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), Base2, Offset2, GV2, CV2); // If they have a same base address then check to see if they overlap. if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. // To catch this case, look up the actual index of frame indices to compute // the real alias relationship. if (isFrameIndex1 && isFrameIndex2) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); Offset1 += MFI.getObjectOffset(cast(Base1)->getIndex()); Offset2 += MFI.getObjectOffset(cast(Base2)->getIndex()); return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); } // Otherwise, if we know what the bases are, and they aren't identical, then // we know they cannot alias. if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) return false; // If we know required SrcValue1 and SrcValue2 have relatively large alignment // compared to the size and offset of the access, we may be able to prove they // do not alias. This check is conservative for now to catch cases created by // splitting vector types. if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && (Op0->getMemoryVT().getSizeInBits() >> 3 == Op1->getMemoryVT().getSizeInBits() >> 3) && (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); // There is no overlap between these relatively aligned accesses of similar // size, return no alias. if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) return false; } bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif if (UseAA && Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. int64_t MinOffset = std::min(Op0->getSrcValueOffset(), Op1->getSrcValueOffset()); int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + Op0->getSrcValueOffset() - MinOffset; int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + Op1->getSrcValueOffset() - MinOffset; AliasResult AAResult = AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, UseTBAA ? Op0->getAAInfo() : AAMDNodes()), MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == NoAlias) return false; } // Otherwise we have to assume they alias. return true; } /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases) { SmallVector Chains; // List of chains to visit. SmallPtrSet Visited; // Visited node set. // Get alias information for node. bool IsLoad = isa(N) && !cast(N)->isVolatile(); // Starting off. Chains.push_back(OriginalChain); unsigned Depth = 0; // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the // chain until we reach the depth limit. // // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. if (Depth > TLI.getGatherAllAliasesMaxDepth()) { Aliases.clear(); Aliases.push_back(OriginalChain); return; } // Don't bother if we've been before. if (!Visited.insert(Chain.getNode()).second) continue; switch (Chain.getOpcode()) { case ISD::EntryToken: // Entry token is ideal chain operand, but handled in FindBetterChain. break; case ISD::LOAD: case ISD::STORE: { // Get alias information for Chain. bool IsOpLoad = isa(Chain.getNode()) && !cast(Chain.getNode())->isVolatile(); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && isAlias(cast(N), cast(Chain.getNode()))) { Aliases.push_back(Chain); } else { // Look further up the chain. Chains.push_back(Chain.getOperand(0)); ++Depth; } break; } case ISD::TokenFactor: // We have to check each of the operands of the token factor for "small" // token factors, so we queue them up. Adding the operands to the queue // (stack) in reverse order maintains the original order and increases the // likelihood that getNode will find a matching token factor (CSE.) if (Chain.getNumOperands() > 16) { Aliases.push_back(Chain); break; } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); ++Depth; break; default: // For all other instructions we will just have to take what we can get. Aliases.push_back(Chain); break; } } } /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain /// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { SmallVector Aliases; // Ops for replacing token factor. // Accumulate all the aliases to this node. GatherAllAliases(N, OldChain, Aliases); // If no operands then chain to entry token. if (Aliases.size() == 0) return DAG.getEntryNode(); // If a single operand then chain to it. We don't need to revisit it. if (Aliases.size() == 1) return Aliases[0]; // Construct a custom tailored token factor. return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. if (BasePtr.Base.isUndef()) return false; SmallVector ChainedStores; ChainedStores.push_back(St); // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind // or instruction which has a different base pointer. StoreSDNode *Index = St; while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) break; if (Index->isVolatile() || Index->isIndexed()) break; // Find the base pointer and offset for this memory node. BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) break; // Find the next memory operand in the chain. If the next operand in the // chain is a store then move up and continue the scan with the next // memory operand. If the next operand is a load save it and use alias // information to check if it interferes with anything. SDNode *NextInChain = Index->getChain().getNode(); while (true) { if (StoreSDNode *STn = dyn_cast(NextInChain)) { // We found a store node. Use it for the next iteration. if (STn->isVolatile() || STn->isIndexed()) { Index = nullptr; break; } ChainedStores.push_back(STn); Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { NextInChain = Ldn->getChain().getNode(); continue; } else { Index = nullptr; break; } } } bool MadeChangeToSt = false; SmallVector, 8> BetterChains; for (StoreSDNode *ChainedStore : ChainedStores) { SDValue Chain = ChainedStore->getChain(); SDValue BetterChain = FindBetterChain(ChainedStore, Chain); if (Chain != BetterChain) { if (ChainedStore == St) MadeChangeToSt = true; BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } // Do all replacements after finding the replacements to make to avoid making // the chains more complicated by introducing new TokenFactors. for (auto Replacement : BetterChains) replaceStoreChain(Replacement.first, Replacement.second); return MadeChangeToSt; } /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); } Index: projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (revision 313894) @@ -1,1058 +1,1073 @@ //==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements the Emit routines for the SelectionDAG class, which creates // MachineInstrs based on the decisions of the SelectionDAG instruction // selection. // //===----------------------------------------------------------------------===// #include "InstrEmitter.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" /// MinRCSize - Smallest register class we allow when constraining virtual /// registers. If satisfying all register class constraints would require /// using a smaller register class, emit a COPY to a new virtual register /// instead. const unsigned MinRCSize = 4; /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional glue operands (which do /// not go into the resulting MachineInstr). unsigned InstrEmitter::CountResults(SDNode *Node) { unsigned N = Node->getNumValues(); while (N && Node->getValueType(N - 1) == MVT::Glue) --N; if (N && Node->getValueType(N - 1) == MVT::Other) --N; // Skip over chain result. return N; } /// countOperands - The inputs to target nodes have any actual inputs first, /// followed by an optional chain operand, then an optional glue operand. /// Compute the number of actual operands that will go into the resulting /// MachineInstr. /// /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding /// the chain and glue. These operands may be implicit on the machine instr. static unsigned countOperands(SDNode *Node, unsigned NumExpUses, unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. NumImpUses = N - NumExpUses; for (unsigned I = N; I > NumExpUses; --I) { if (isa(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast(Node->getOperand(I - 1))) if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) continue; NumImpUses = N - I; break; } return N; } /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap &VRBaseMap) { unsigned VRBase = 0; if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); return; } // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = nullptr; MVT VT = Node->getSimpleValueType(ResNo); // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) UseRC = TLI->getRegClassFor(VT); if (!IsClone && !IsCloned) for (SDNode *User : Node->uses()) { bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) Match = false; } else { for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { SDValue Op = User->getOperand(i); if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; MVT VT = Node->getSimpleValueType(Op.getResNo()); if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = nullptr; if (i+II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); } if (!UseRC) UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) UseRC = ComRC; } } } } MatchReg &= Match; if (VRBase) break; } const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); } else if (UseRC) { assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); } // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. if (MatchReg && SrcRC->getCopyCost() < 0) { VRBase = SrcReg; } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); } SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } /// getDstOfCopyToRegUse - If the only use of the specified result number of /// node is a CopyToReg, return its destination register. Return 0 otherwise. unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const { if (!Node->hasOneUse()) return 0; SDNode *User = *Node->use_begin(); if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned Reg = cast(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) return Reg; } return 0; } void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in // the 32-bit float super-class (X86::FR32). if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { const TargetRegisterClass *VTRC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (RC) VTRC = TRI->getCommonSubClass(RC, VTRC); if (VTRC) RC = VTRC; } if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); VRBase = cast(Node->getOperand(i-NumResults))->getReg(); assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } if (!VRBase && !IsClone && !IsCloned) for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; MIB.addReg(VRBase, RegState::Define); break; } } } } // Create the result registers for this node and add the result regs to // the machine instruction. if (VRBase == 0) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); MIB.addReg(VRBase, RegState::Define); } // If this def corresponds to a result of the SDNode insert the VRBase into // the lookup map. if (i < NumResults) { SDValue Op(Node, i); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } } } /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. unsigned InstrEmitter::getVR(SDValue Op, DenseMap &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getSimpleValueType()); VReg = MRI->createVirtualRegister(RC); } BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; } DenseMap::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is /// not in the required register class. void InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. unsigned VReg = getVR(Op, VRBaseMap); const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && MCID.OpInfo[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it, but first attempt to // shrink VReg's register class within reason. For example, if VReg == GR32 // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. if (II) { const TargetRegisterClass *OpRC = nullptr; if (IIOpNum < II->getNumOperands()) OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF); if (OpRC) { const TargetRegisterClass *ConstrainedRC = MRI->constrainRegClass(VReg, OpRC, MinRCSize); if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); unsigned NewVReg = MRI->createVirtualRegister(OpRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; } else { assert(ConstrainedRC->isAllocatable() && "Constraining an allocatable VReg produced an unallocatable class?"); } } } // If this value has only one use, that use is a kill. This is a // conservative approximation. InstrEmitter does trivial coalescing // with CopyFromReg nodes, so don't emit kill flags for them. // Avoid kill flags on Schedule cloned nodes, since there will be // multiple uses. // Tied operands are never killed, so we need to check that. And that // means we need to determine the index of the operand. bool isKill = Op.hasOneUse() && Op.getNode()->getOpcode() != ISD::CopyFromReg && !IsDebug && !(IsClone || IsCloned); if (isKill) { unsigned Idx = MIB->getNumOperands(); while (Idx > 0 && MIB->getOperand(Idx-1).isReg() && MIB->getOperand(Idx-1).isImplicit()) --Idx; bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) | getDebugRegState(IsDebug)); } /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast(Op)) { MIB.addImm(C->getSExtValue()); } else if (ConstantFPSDNode *F = dyn_cast(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast(Op)) { // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); MIB.addReg(R->getReg(), getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast(Op)) { MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags()); } else if (BasicBlockSDNode *BBNode = dyn_cast(Op)) { MIB.addMBB(BBNode->getBasicBlock()); } else if (FrameIndexSDNode *FI = dyn_cast(Op)) { MIB.addFrameIndex(FI->getIndex()); } else if (JumpTableSDNode *JT = dyn_cast(Op)) { MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast(Op)) { int Offset = CP->getOffset(); unsigned Align = CP->getAlignment(); Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { Align = MF->getDataLayout().getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! Align = MF->getDataLayout().getTypeAllocSize(Type); } } unsigned Idx; MachineConstantPool *MCP = MF->getConstantPool(); if (CP->isMachineConstantPoolEntry()) Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); else Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast(Op)) { MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); } else if (auto *SymNode = dyn_cast(Op)) { MIB.addSym(SymNode->getMCSymbol()); } else if (BlockAddressSDNode *BA = dyn_cast(Op)) { MIB.addBlockAddress(BA->getBlockAddress(), BA->getOffset(), BA->getTargetFlags()); } else if (TargetIndexSDNode *TI = dyn_cast(Op)) { MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg // within reason. if (RC && RC != VRC) RC = MRI->constrainRegClass(VReg, RC, MinRCSize); // VReg has been adjusted. It can be used with SubIdx operands now. if (RC) return VReg; // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual // register instead. RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); unsigned NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) .addReg(VReg); return NewReg; } /// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast(User->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } } } if (Opc == TargetOpcode::EXTRACT_SUBREG) { // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no // constraints on the %dst register, COPY can target all legal register // classes. unsigned SubIdx = cast(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); - unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned Reg; + MachineInstr *DefMI; + RegisterSDNode *R = dyn_cast(Node->getOperand(0)); + if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Reg = R->getReg(); + DefMI = nullptr; + } else { + Reg = getVR(Node->getOperand(0), VRBaseMap); + DefMI = MRI->getVRegDef(Reg); + } + unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx && TRC == MRI->getRegClass(SrcReg)) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 // to a copy // r1026 = copy r1024 VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); MRI->clearKillFlags(SrcReg); } else { - // VReg may not support a SubIdx sub-register, and we may need to + // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = ConstrainForSubReg(Reg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + MachineInstrBuilder CopyMI = + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + CopyMI.addReg(Reg, 0, SubIdx); + else + CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); unsigned SubIdx = cast(N2)->getZExtValue(); // Figure out the register class to create for the destreg. It should be // the largest legal register class supporting SubIdx sub-registers. // RegisterCoalescer will constrain it further if it decides to eliminate // the INSERT_SUBREG instruction. // // %dst = INSERT_SUBREG %src, %sub, SubIdx // // is lowered by TwoAddressInstructionPass to: // // %dst = COPY %src // %dst:SubIdx = COPY %sub // // There is no constraint on the %src register class. // const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) VRBase = MRI->createVirtualRegister(SRC); // Create the insert_subreg or subreg_to_reg machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast(N0); MIB.addImm(SD->getZExtValue()); } else AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); MBB->insert(InsertPos, MIB); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination /// register is constrained to be in a particular register class. /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, DenseMap &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); assert((NumOps & 1) == 1 && "REG_SEQUENCE must have an odd number of operands!"); for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { RegisterSDNode *R = dyn_cast(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { unsigned SubIdx = cast(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); if (SRC && SRC != RC) { MRI->setRegClass(NewVReg, SRC); RC = SRC; } } } AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); } MBB->insert(InsertPos, MIB); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap &VRBaseMap) { uint64_t Offset = SD->getOffset(); MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()) .addImm(Offset) .addMetadata(Var) .addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { SDNode *Node = SD->getSDNode(); SDValue Op = SDValue(Node, SD->getResNo()); // It's possible we replaced this SDNode with other(s) and therefore // didn't generate code for it. It's better to catch these cases where // they happen and transfer the debug info, but trying to guarantee that // in all cases would be very fragile; this is a safeguard for any // that were missed. DenseMap::iterator I = VRBaseMap.find(Op); if (I==VRBaseMap.end()) MIB.addReg(0U); // undef else AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast(V)) { if (CI->getBitWidth() > 64) MIB.addCImm(CI); else MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast(V)) { MIB.addFPImm(CF); } else { // Could be an Undef. In any case insert an Undef so we can see what we // dropped. MIB.addReg(0U); } } else { // Insert an Undef so we can see what we dropped. MIB.addReg(0U); } // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) MIB.addImm(Offset); else { assert(Offset == 0 && "direct value cannot have an offset"); MIB.addReg(0U, RegState::Debug); } MIB.addMetadata(Var); MIB.addMetadata(Expr); return &*MIB; } /// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } // Handle COPY_TO_REGCLASS specially. if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumDefs = II.getNumDefs(); const MCPhysReg *ScratchRegs = nullptr; // Handle STACKMAP and PATCHPOINT specially and then use the generic code. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { // Stackmaps do not have arguments and do not preserve their calling // convention. However, to simplify runtime support, they clobber the same // scratch registers as AnyRegCC. unsigned CC = CallingConv::AnyReg; if (Opc == TargetOpcode::PATCHPOINT) { CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); NumDefs = NumResults; } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) assert(NumMIOperands >= II.getNumOperands() && "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif // Create the new machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add scratch registers as implicit def and early clobber if (ScratchRegs) for (unsigned i = 0; ScratchRegs[i]; ++i) MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | RegState::EarlyClobber); // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast(Node)->memoperands_begin(), cast(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MIB); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: // // 1. When there is a use of a Node value beyond the explicitly defined // virtual registers, we emit a CopyFromReg for one of the implicitly // defined physregs. This only happens when HasPhysRegOuts is true. // // 2. A CopyFromReg reading a physreg may be glued to this instruction. // // 3. A glued instruction may implicitly use a physreg. // // 4. A glued instruction may use a RegisterSDNode operand. // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. SmallVector UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } // Scan the glue chain for any used physregs. if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { if (F->getOpcode() == ISD::CopyFromReg) { UsedRegs.push_back(cast(F->getOperand(1))->getReg()); continue; } else if (F->getOpcode() == ISD::CopyToReg) { // Skip CopyToReg nodes that are internal to the glue chain. continue; } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); UsedRegs.append(MCID.getImplicitUses(), MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { unsigned Reg = R->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } } // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(*MIB, Node); } /// EmitSpecialNode - Generate machine code for a target-independent node and /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { unsigned SrcReg; SDValue SrcVal = Node->getOperand(2); if (RegisterSDNode *R = dyn_cast(SrcVal)) SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); unsigned DestReg = cast(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { unsigned SrcReg = cast(Node->getOperand(1))->getReg(); EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: { MCSymbol *S = cast(Node)->getLabel(); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::EH_LABEL)).addSym(S); break; } case ISD::LIFETIME_START: case ISD::LIFETIME_END: { unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; FrameIndexSDNode *FI = dyn_cast(Node->getOperand(1)); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) .addFrameIndex(FI->getIndex()); break; } case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast(AsmStrV)->getSymbol(); MIB.addExternalSymbol(AsmStr); // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore // bits. int64_t ExtraInfo = cast(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); MIB.addImm(ExtraInfo); // Remember to operand index of the group flags. SmallVector GroupIdx; // Remember registers that are part of early-clobber defs. SmallVector ECRegs; // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast(Node->getOperand(i))->getZExtValue(); const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); GroupIdx.push_back(MIB->getNumOperands()); MIB.addImm(Flags); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. MIB.addReg(Reg, RegState::Define | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { unsigned DefGroup = 0; if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { unsigned DefIdx = GroupIdx[DefGroup] + 1; unsigned UseIdx = GroupIdx.back() + 1; for (unsigned j = 0; j != NumVals; ++j) MIB->tieOperands(DefIdx + j, UseIdx + j); } } break; } } // GCC inline assembly allows input operands to also be early-clobber // output operands (so long as the operand is written only after it's // used), but this does not match the semantics of our early-clobber flag. // If an early-clobber operand register is also an input operand register, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); } } // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast(MDV)->getMD(); if (MD) MIB.addMetadata(MD); MBB->insert(InsertPos, MIB); break; } } } /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos) : MF(mbb->getParent()), MRI(&MF->getRegInfo()), TII(MF->getSubtarget().getInstrInfo()), TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) {} Index: projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (revision 313894) @@ -1,9347 +1,9356 @@ //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements routines for translating from LLVM IR into SelectionDAG IR. // //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" #include #include using namespace llvm; #define DEBUG_TYPE "isel" /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; static cl::opt LimitFPPrecision("limit-float-precision", cl::desc("Generate low-precision inline sequences " "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); static cl::opt EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); /// Minimum jump table density for normal functions. static cl::opt JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, cl::desc("Minimum density for building a jump table in " "a normal function")); /// Minimum jump table density for -Os or -Oz functions. static cl::opt OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, cl::desc("Minimum density for building a jump table in " "an optsize function")); // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to // recognize and avoid this situation within each individual analysis, and // future analyses are likely to have the same behavior. Limiting DAG width is // the safe approach and will be especially important with global DAGs. // // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st // sequence over this should have been converted to llvm.memcpy by the // frontend. It is easy to induce this behavior with .ll code such as: // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, Optional AssertOp = None) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. if (ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. unsigned RoundParts = NumParts & (NumParts - 1) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); SDValue Lo, Hi; EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, DAG.getConstant(Lo.getValueSizeInBits(), DL, TLI.getPointerTy(DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && "Unexpected split"); SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } // There is now one part, held in Val. Correct it to match ValueVT. // PartEVT is the type of the register class that holds the value. // ValueVT is the type of the inline asm operation. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) return Val; if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && ValueVT.bitsLT(PartEVT)) { // For an FP value in an integer part, we need to truncate to the right // width first. PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); } // Handle types that have the same size. if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle types with different sizes. if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp.hasValue()) Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode( ISD::FP_ROUND, DL, ValueVT, Val, DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } llvm_unreachable("Unknown mismatch!"); } static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const Twine &ErrMsg) { const Instruction *I = dyn_cast_or_null(V); if (!V) return Ctx.emitError(ErrMsg); const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast(I)) if (isa(CI->getCalledValue())) return Ctx.emitError(I, ErrMsg + AsmError); return Ctx.emitError(I, ErrMsg); } /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger than ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; // Handle a multi-element vector. if (NumParts > 1) { EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(RegisterVT.getSizeInBits() == Parts[0].getSimpleValueType().getSizeInBits() && "Part type sizes don't match!"); // Assemble the parts into intermediate operands. SmallVector Ops(NumIntermediates); if (NumIntermediates == NumParts) { // If the register was not expanded, truncate or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, ValueVT, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) return Val; if (PartEVT.isVector()) { // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Vector/Vector bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } // Trivial bitcast if the types are the same size and the destination // vector type is legal. if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartEVT) Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && "Copying to an illegal type!"); if (NumParts == 0) return; assert(!ValueVT.isVector() && "Vector case handled elsewhere"); EVT PartEVT = PartVT; if (PartEVT == ValueVT) { assert(NumParts == 1 && "No-op copy with multiple parts!"); Parts[0] = Val; return; } if (NumParts * PartBits > ValueVT.getSizeInBits()) { // If the parts cover more bits than the value has, promote the value. if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { if (ValueVT.isFloatingPoint()) { // FP values need to be bitcast, then extended if they are being put // into a larger container. ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); if (PartVT == MVT::x86mmx) Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. assert(NumParts == 1 && PartEVT != ValueVT); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); if (PartVT == MVT::x86mmx) Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. ValueVT = Val.getValueType(); assert(NumParts * PartBits == ValueVT.getSizeInBits() && "Failed to tile the value with PartVT!"); if (NumParts == 1) { if (PartEVT != ValueVT) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "scalar-to-vector conversion failed"); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } Parts[0] = Val; return; } // Expand the value into multiple parts. if (NumParts & (NumParts - 1)) { // The number of parts is not a power of 2. Split off and copy the tail. assert(PartVT.isInteger() && ValueVT.isInteger() && "Do not know what to expand to!"); unsigned RoundParts = 1 << Log2_32(NumParts); unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); NumParts = RoundParts; ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. Parts[0] = DAG.getNode(ISD::BITCAST, DL, EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); SDValue &Part0 = Parts[i]; SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); } } } if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); } /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (NumParts == 1) { EVT PartEVT = PartVT; if (PartEVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in // undef elements. SmallVector Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); // FIXME: Use CONCAT for 2x -> 4x. //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && PartEVT.getVectorElementType().bitsGE( ValueVT.getVectorElementType()) && PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; return; } // Handle a multi-element vector. EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, DAG.getConstant(i * (NumElements / NumIntermediates), DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); else Ops[i] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. assert(NumIntermediates != 0 && "division by zero"); assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } RegsForValue::RegsForValue() {} RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); for (EVT ValueVT : ValueVTs) { unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); Reg += NumRegs; } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from /// this value and returns the result as a ValueVT value. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Assemble the legal parts into the final values. SmallVector Values(ValueVTs.size()); SmallVector Parts; for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; if (!Flag) { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); } else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); *Flag = P.getValue(2); } Chain = P.getValue(1); Parts[i] = P; // If the source register was virtual and if we know something about it, // add an assert node. if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || !RegisterVT.isInteger() || RegisterVT.isVector()) continue; const FunctionLoweringInfo::LiveOutInfo *LOI = FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); if (!LOI) continue; unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); if (NumZeroBits == RegSize) { // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; EVT FromVT(MVT::Other); if (NumSignBits == RegSize) { isSExt = true; // ASSERT SEXT 1 FromVT = MVT::i1; } else if (NumZeroBits >= RegSize - 1) { isSExt = false; // ASSERT ZEXT 1 FromVT = MVT::i1; } else if (NumSignBits > RegSize - 8) { isSExt = true; // ASSERT SEXT 8 FromVT = MVT::i8; } else if (NumZeroBits >= RegSize - 8) { isSExt = false; // ASSERT ZEXT 8 FromVT = MVT::i8; } else if (NumSignBits > RegSize - 16) { isSExt = true; // ASSERT SEXT 16 FromVT = MVT::i16; } else if (NumZeroBits >= RegSize - 16) { isSExt = false; // ASSERT ZEXT 16 FromVT = MVT::i16; } else if (NumSignBits > RegSize - 32) { isSExt = true; // ASSERT SEXT 32 FromVT = MVT::i32; } else if (NumZeroBits >= RegSize - 32) { isSExt = false; // ASSERT ZEXT 32 FromVT = MVT::i32; } else { continue; } // Add an assertion node. assert(FromVT != MVT::Other); Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, RegisterVT, P, DAG.getValueType(FromVT)); } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V, ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); Part += NumParts; } // Copy the parts into the registers. SmallVector Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; if (!Flag) { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); *Flag = Part.getValue(1); } Chains[i] = Part.getValue(0); } if (NumRegs == 1 || Flag) // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is // flagged to it. That is the CopyToReg nodes and the user are considered // a single scheduling unit. If we create a TokenFactor and return it as // chain, then the TokenFactor is both a predecessor (operand) of the // user as well as a successor (the TF operands are flagged to the user). // c1, f1 = CopyToReg // c2, f2 = CopyToReg // c3 = TokenFactor c1, c2 // ... // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); else if (!Regs.empty() && TargetRegisterInfo::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. // Don't do this for tied operands that can use the regclass information // from the def. const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { // If we clobbered the stack pointer, MFI should know about it. assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()); } } } } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, const TargetLibraryInfo *li) { AA = &aa; GFI = gfi; LibInfo = li; DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering /// or PHI node updating; that information is cleared out as it is /// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; StatepointLowering.clear(); } /// clearDanglingDebugInfo - Clear the dangling debug information /// map. This function is separated from the clear so that debug /// information that is dangling in a basic block can be properly /// resolved in a different basic block. This allows the /// SelectionDAG to resolve dangling debug information attached /// to PHI nodes. void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any /// prior load instructions. /// SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); if (PendingLoads.size() == 1) { SDValue Root = PendingLoads[0]; DAG.setRoot(Root); PendingLoads.clear(); return Root; } // Otherwise, we have to make a token factor node. SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; } /// getControlRoot - Similar to getRoot, but instead of flushing all the /// PendingLoad items, flush all the PendingExports items. It is necessary /// to do this before emitting a terminator instruction. /// SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); if (PendingExports.empty()) return Root; // Turn all of the CopyToReg chains into one factored node. if (Root.getOpcode() != ISD::EntryToken) { unsigned i = 0, e = PendingExports.size(); for (; i != e; ++i) { assert(PendingExports[i].getNode()->getNumOperands() > 1); if (PendingExports[i].getNode()->getOperand(0) == Root) break; // Don't add the root if we already indirectly depend on it. } if (i == e) PendingExports.push_back(Root); } Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingExports); PendingExports.clear(); DAG.setRoot(Root); return Root; } void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (isa(&I)) { HandlePHINodesInSuccessorBlocks(I.getParent()); } ++SDNodeOrder; CurInst = &I; visit(I.getOpcode(), I); if (!isa(&I) && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; } void SelectionDAGBuilder::visitPHI(const PHINode &) { llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); } void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/IR/Instruction.def" } } // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDValue Val) { DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; if (DDI.getDI()) { const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); DILocalVariable *Variable = DI->getVariable(); DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); SDDbgValue *SDV; if (Val.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, Val)) { SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); SDValue Result; if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty); SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, Result); } return Result; } /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important // to do this first, so that we don't create a CopyFromReg if we already // have a regular SDValue. SDValue &N = NodeMap[V]; if (N.getNode()) return N; // If there's a virtual register allocated and initialized for this // value, use it. if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) return copyFromReg; // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; resolveDanglingDebugInfo(V, Val); return Val; } // Return true if SDValue exists for the given Value bool SelectionDAGBuilder::findValue(const Value *V) const { return (NodeMap.find(V) != NodeMap.end()) || (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); } /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; if (N.getNode()) { if (isa(N) || isa(N)) { // Remove the debug location from the node as the node is about to be used // in a location which may differ from the original debug location. This // is relevant to Constant and ConstantFP nodes because they can appear // as constant expressions inside PHI nodes. N->setDebugLoc(DebugLoc()); } return N; } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; resolveDanglingDebugInfo(V, Val); return Val; } /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast(V)) { EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout(), AS)); } if (const ConstantFP *CFP = dyn_cast(C)) return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast(C)) { visit(CE->getOpcode(), *CE); SDValue N1 = NodeMap[V]; assert(N1.getNode() && "visit didn't populate the NodeMap!"); return N1; } if (isa(C) || isa(C)) { SmallVector Constants; for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); OI != OE; ++OI) { SDNode *Val = getValue(*OI).getNode(); // If the operand is an empty aggregate, there are no values. if (!Val) continue; // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Constants.push_back(SDValue(Val, i)); } return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const ConstantDataSequential *CDS = dyn_cast(C)) { SmallVector Ops; for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); // Add each leaf value from the operand to the Constants list // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Ops.push_back(SDValue(Val, i)); } if (isa(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa(C) || isa(C)) && "Unknown struct or array constant!"); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct SmallVector Constants(NumElts); for (unsigned i = 0; i != NumElts; ++i) { EVT EltVT = ValueVTs[i]; if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast(C)) return DAG.getBlockAddress(BA, VT); VectorType *VecTy = cast(V->getType()); unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector Ops; if (const ConstantVector *CV = dyn_cast(C)) { for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); Ops.assign(NumElements, Op); } // Create a BUILD_VECTOR node. return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } // If this is a static alloca, generate it as the frameindex instead of // computation. if (const AllocaInst *AI = dyn_cast(V)) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, TLI.getPointerTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } llvm_unreachable("Can't get register for value!"); } void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // Update machine-CFG edge. MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; FuncInfo.MBB->addSuccessor(TargetMBB); auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsSEH = isAsynchronousEHPersonality(Pers); if (IsSEH) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (TargetMBB != NextBlock(FuncInfo.MBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB))); return; } // Figure out the funclet membership for the catchret's successor. // This will be used by the FuncletLayout pass to determine how to order the // BB's. // A 'catchret' returns to the outer scope's color. Value *ParentPad = I.getCatchSwitchParentPad(); const BasicBlock *SuccessorColor; if (isa(ParentPad)) SuccessorColor = &FuncInfo.Fn->getEntryBlock(); else SuccessorColor = cast(ParentPad)->getParent(); assert(SuccessorColor && "No parent funclet for catchret!"); MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); // Create the terminator node. SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB), DAG.getBasicBlock(SuccessorColorMBB)); DAG.setRoot(Ret); } void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { // Don't emit any special code for the cleanuppad instruction. It just marks // the start of a funclet. FuncInfo.MBB->setIsEHFuncletEntry(); FuncInfo.MBB->setIsCleanupFuncletEntry(); } /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. /// This function skips over imaginary basic blocks that hold catchswitch /// instructions, and finds all the "real" machine /// basic block destinations. As those destinations may not be successors of /// EHPadBB, here we also calculate the edge probability to those destinations. /// The passed-in Prob is the edge probability to EHPadBB. static void findUnwindDestinations( FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, BranchProbability Prob, SmallVectorImpl> &UnwindDests) { EHPersonality Personality = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); BasicBlock *NewEHPadBB = nullptr; if (isa(Pad)) { // Stop on landingpads. They are not funclets. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); break; } else if (isa(Pad)) { // Stop on cleanup pads. Cleanups are always funclet entries for all known // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast(Pad)) { // Add the catchpad handlers to the possible destinations. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); // For MSVC++ and the CLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) UnwindDests.back().first->setIsEHFuncletEntry(); } NewEHPadBB = CatchSwitch->getUnwindDest(); } else { continue; } BranchProbabilityInfo *BPI = FuncInfo.BPI; if (BPI && NewEHPadBB) Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); EHPadBB = NewEHPadBB; } } void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { // Update successor info. SmallVector, 1> UnwindDests; auto UnwindDest = I.getUnwindDest(); BranchProbabilityInfo *BPI = FuncInfo.BPI; BranchProbability UnwindDestProb = (BPI && UnwindDest) ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) : BranchProbability::getZero(); findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); for (auto &UnwindDest : UnwindDests) { UnwindDest.first->setIsEHPad(); addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); } FuncInfo.MBB->normalizeSuccProbs(); // Create the terminator node. SDValue Ret = DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); DAG.setRoot(Ret); } void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { report_fatal_error("visitCatchSwitch not yet implemented!"); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); SmallVector Outs; SmallVector OutVals; // Calls to @llvm.experimental.deoptimize don't generate a return value, so // lower // // %val = call @llvm.experimental.deoptimize() // ret %val // // differently. if (I.getParent()->getTerminatingDeoptimizeCall()) { LowerDeoptimizingReturn(); return; } if (!FuncInfo.CanLowerReturn) { unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); SmallVector Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), &Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. Add, MachinePointerInfo()); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); const Function *F = I.getParent()->getParent(); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(Context, VT); MVT PartVT = TLI.getRegisterType(Context, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (RetInReg) Flags.setInReg(); // Propagate extension type if any if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } } } // Push in swifterror virtual register as the last element of Outs. This makes // sure swifterror virtual register will be returned in the swifterror // physical register. const Function *F = I.getParent()->getParent(); if (TLI.supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { assert(FuncInfo.SwiftErrorArg && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, EVT(TLI.getPointerTy(DL)) /*argvt*/, true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( FuncInfo.MBB, FuncInfo.SwiftErrorArg), EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && "LowerReturn didn't return a valid chain!"); // Update the DAG with the new chain value resulting from return lowering. DAG.setRoot(Chain); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { // Skip empty types if (V->getType()->isEmptyTy()) return; DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); CopyValueToVirtualRegister(V, VMI->second); } } /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // No need to export constants. if (!isa(V) && !isa(V)) return; // Already exported? if (FuncInfo.isExportedInst(V)) return; unsigned Reg = FuncInfo.InitializeRegForValue(V); CopyValueToVirtualRegister(V, Reg); } bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. if (const Instruction *VI = dyn_cast(V)) { // Can export from current BB. if (VI->getParent() == FromBB) return true; // Is already exported, noop. return FuncInfo.isExportedInst(V); } // If this is an argument, we can export it if the BB is the entry block or // if it is already exported. if (isa(V)) { if (FromBB == &FromBB->getParent()->getEntryBlock()) return true; // Otherwise, can only export this if it is already exported. return FuncInfo.isExportedInst(V); } // Otherwise, constants can always be exported. return true; } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. BranchProbability SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; const BasicBlock *SrcBB = Src->getBasicBlock(); const BasicBlock *DstBB = Dst->getBasicBlock(); if (!BPI) { // If BPI is not available, set the default probability as 1 / N, where N is // the number of successors. auto SuccSize = std::max( std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); return BranchProbability(1, SuccSize); } return BPI->getEdgeProbability(SrcBB, DstBB); } void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst, BranchProbability Prob) { if (!FuncInfo.BPI) Src->addSuccessorWithoutProb(Dst); else { if (Prob.isUnknown()) Prob = getEdgeProbability(Src, Dst); Src->addSuccessor(Dst, Prob); } } static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast(V)) return I->getParent() == BB; return true; } /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. /// void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, BranchProbability FProb) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into // the caseblock. if (const CmpInst *BOp = dyn_cast(Cond)) { // The operands of the cmp have to be in this block. We don't know // how to export them from some other block. If this is the first block // of the sequence, no exporting is needed. if (CurBB == SwitchBB || (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); } else { const FCmpInst *FC = cast(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); return; } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, BranchProbability FProb) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast(Cond); if (!BOp || !(isa(BOp) || isa(BOp)) || (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb); return; } // Create TmpBB after CurBB. MachineFunction::iterator BBI(CurBB); MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); if (Opc == Instruction::Or) { // Codegen X | Y as: // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) // = TrueProb for original BB. // Assuming the original probabilities are A and B, one choice is to set // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to // A/(1+B) and 2B/(1+B). This choice assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. auto NewTrueProb = TProb / 2; auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, NewFalseProb); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], Probs[1]); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: // jmp_if_Y TBB // jmp FBB // // This requires creation of TmpBB after CurBB. // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) // = FalseProb for original BB. // Assuming the original probabilities are A and B, one choice is to set // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == // TrueProb for BB1 * FalseProb for TmpBB. auto NewTrueProb = TProb + FProb / 2; auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, NewFalseProb); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], Probs[1]); } } /// If the set of cases should be emitted as a series of branches, return true. /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they // will get folded into a single comparison, so don't emit two blocks. if ((Cases[0].CmpLHS == Cases[1].CmpLHS && Cases[0].CmpRHS == Cases[1].CmpRHS) || (Cases[0].CmpRHS == Cases[1].CmpLHS && Cases[0].CmpLHS == Cases[1].CmpRHS)) { return false; } // Handle: (X != null) | (Y != null) --> (X|Y) != 0 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 if (Cases[0].CmpRHS == Cases[1].CmpRHS && Cases[0].CC == Cases[1].CC && isa(Cases[0].CmpRHS) && cast(Cases[0].CmpRHS)->isNullValue()) { if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) return false; if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) return false; } return true; } void SelectionDAGBuilder::visitBr(const BranchInst &I) { MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); return; } // If this condition is one of the special cases we handle, do special stuff // now. const Value *CondVal = I.getCondition(); MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. // As long as jumps are not expensive, this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq // cmp D, E // F = setle // or C, F // jnz foo // Emit: // cmp A, B // je foo // cmp D, E // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && !I.getMetadata(LLVMContext::MD_unpredictable) && (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. if (ShouldEmitAsBranches(SwitchCases)) { for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { ExportFromCurrentBlock(SwitchCases[i].CmpLHS); ExportFromCurrentBlock(SwitchCases[i].CmpRHS); } // Emit the branch for this block. visitSwitchCase(SwitchCases[0], BrMBB); SwitchCases.erase(SwitchCases.begin()); return; } // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) FuncInfo.MF->erase(SwitchCases[i].ThisBB); SwitchCases.clear(); } } // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), nullptr, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. visitSwitchCase(CB, BrMBB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = getCurSDLoc(); // Build the setcc now. if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && CB.CC == ISD::SETEQ) Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } // Update successor info addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); // TrueBB and FalseBB are always different unless the incoming IR is // degenerate. This only happens when running llc on weird IR. if (CB.TrueBB != CB.FalseBB) addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); SwitchBB->normalizeSuccProbs(); // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); // Insert the false branch. Do this even if it's a fall through branch, // this makes it easier to do DAG optimizations which require inverting // the branch condition. BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } /// visitJumpTable - Emit JumpTable node in the current MBB void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); } /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); // Avoid emitting unnecessary branches to the next block. if (JT.MBB != NextBlock(SwitchBB)) BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); } /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global /// variable if there exists one. static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { MachinePointerInfo MPInfo(Global); MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); Node->setMemRefs(MemRefs, MemRefs + 1); } return SDValue(Node, 0); } /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// /// For a high level explanation of how this fits into the stack protector /// generation see the comment on the declaration of class /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI.getStackProtectorIndex(); SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction()->getParent(); unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. SDValue StackSlot = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. // Generate a call to that function with the content of the guard slot as // argument. auto *Fn = cast(GuardCheck); FunctionType *FnTy = Fn->getFunctionType(); assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = StackSlot; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.isInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(DAG.getEntryNode()) .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), getValue(GuardCheck), std::move(Args)); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return; } // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. SDValue Chain = DAG.getEntryNode(); if (TLI.useLoadStackGuardNode()) { Guard = getLoadStackGuard(DAG, dl, Chain); } else { const Value *IRGuard = TLI.getSDagStackGuard(M); SDValue GuardPtr = getValue(IRGuard); Guard = DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), Align, MachineMemOperand::MOVolatile); } // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, StackSlot.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); DAG.setRoot(Br); } /// Codegen the failure basic block for a stack protector check. /// /// A failure stack protector machine basic block consists simply of a call to /// __stack_chk_fail(). /// /// For a high level explanation of how this fits into the stack protector /// generation see the comment on the declaration of class /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, None, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue RangeCmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Sub.getValueType()), Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; if (!TLI.isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. // Just use pointer type, it's guaranteed to fit. UsePtrType = true; break; } } if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); } /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, dl, VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); Cmp = DAG.getSetCC( dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); // The branch probability from SwitchBB to NextMBB is BranchProbToNext. addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is // one as they are relative probabilities (and thus work more like weights), // and hence we need to normalize them to let the sum of them become one. SwitchBB->normalizeSuccProbs(); SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); // Avoid emitting unnecessary branches to the next block. if (NextMBB != NextBlock(SwitchBB)) BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. Look through artificial IR level blocks like // catchswitch for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; const BasicBlock *EHPadBB = I.getSuccessor(1); // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower invokes with arbitrary operand bundles yet!"); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast(Callee); if (isa(Callee)) visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { switch (Fn->getIntrinsicID()) { default: llvm_unreachable("Cannot invoke this intrinsic"); case Intrinsic::donothing: // Ignore invokes to @llvm.donothing: jump directly to the next BB. break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { // Currently we do not lower any intrinsic calls with deopt operand bundles. // Eventually we will support lowering the @llvm.experimental.deoptimize // intrinsic, and right now there are no plans to support other intrinsics // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); } else { LowerCallTo(&I, getValue(Callee), false, EHPadBB); } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. // We already took care of the exported value for the statepoint instruction // during call to the LowerStatepoint. if (!isStatepoint(I)) { CopyToExportRegsIfNeeded(&I); } SmallVector, 1> UnwindDests; BranchProbabilityInfo *BPI = FuncInfo.BPI; BranchProbability EHPadBBProb = BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) : BranchProbability::getZero(); findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); // Update successor info. addSuccessorWithProb(InvokeMBB, Return); for (auto &UnwindDest : UnwindDests) { UnwindDest.first->setIsEHPad(); addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); } InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; addLandingPadInfo(LP, *MBB); // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && TLI.getExceptionSelectorRegister(PersonalityFn) == 0) return; // If landingpad's return type is token type, we don't create DAG nodes // for its exception pointer and selector value. The extraction of exception // pointer or selector value from token type landingpads is not currently // supported. if (LP.getType()->isTokenTy()) return; SmallVector ValueVTs; SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[0]); } else { Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); } Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[1]); // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { #ifndef NDEBUG for (const CaseCluster &CC : Clusters) assert(CC.Low == CC.High && "Input clusters must be single-case"); #endif std::sort(Clusters.begin(), Clusters.end(), [](const CaseCluster &a, const CaseCluster &b) { return a.Low->getValue().slt(b.Low->getValue()); }); // Merge adjacent clusters with the same destination. const unsigned N = Clusters.size(); unsigned DstIndex = 0; for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { CaseCluster &CC = Clusters[SrcIndex]; const ConstantInt *CaseVal = CC.Low; MachineBasicBlock *Succ = CC.MBB; if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { // If this case has the same successor and is a neighbour, merge it into // the previous cluster. Clusters[DstIndex - 1].High = CaseVal; Clusters[DstIndex - 1].Prob += CC.Prob; } else { std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], sizeof(Clusters[SrcIndex])); } } Clusters.resize(DstIndex); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last) { // Update JTCases. for (unsigned i = 0, e = JTCases.size(); i != e; ++i) if (JTCases[i].first.HeaderBB == First) JTCases[i].first.HeaderBB = Last; // Update BitTestCases. for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) if (BitTestCases[i].Parent == First) BitTestCases[i].Parent = Last; } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallSet Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); bool Inserted = Done.insert(BB).second; if (!Inserted) continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithProb(IndirectBrMBB, Succ); } IndirectBrMBB->normalizeSuccProbs(); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) DAG.setRoot( DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg Type *Ty = I.getType(); if (isa(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), Op2.getValueType(), Op2)); return; } visitBinary(I, ISD::FSUB); } /// Checks if the given instruction performs a vector reduction, in which case /// we have the freedom to alter the elements in the result as long as the /// reduction of them stays unchanged. static bool isVectorReductionOp(const User *I) { const Instruction *Inst = dyn_cast(I); if (!Inst || !Inst->getType()->isVectorTy()) return false; auto OpCode = Inst->getOpcode(); switch (OpCode) { case Instruction::Add: case Instruction::Mul: case Instruction::And: case Instruction::Or: case Instruction::Xor: break; case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast(Inst)) if (FPOp->getFastMathFlags().unsafeAlgebra()) break; LLVM_FALLTHROUGH; default: return false; } unsigned ElemNum = Inst->getType()->getVectorNumElements(); unsigned ElemNumToReduce = ElemNum; // Do DFS search on the def-use chain from the given instruction. We only // allow four kinds of operations during the search until we reach the // instruction that extracts the first element from the vector: // // 1. The reduction operation of the same opcode as the given instruction. // // 2. PHI node. // // 3. ShuffleVector instruction together with a reduction operation that // does a partial reduction. // // 4. ExtractElement that extracts the first element from the vector, and we // stop searching the def-use chain here. // // 3 & 4 above perform a reduction on all elements of the vector. We push defs // from 1-3 to the stack to continue the DFS. The given instruction is not // a reduction operation if we meet any other instructions other than those // listed above. SmallVector UsersToVisit{Inst}; SmallPtrSet Visited; bool ReduxExtracted = false; while (!UsersToVisit.empty()) { auto User = UsersToVisit.back(); UsersToVisit.pop_back(); if (!Visited.insert(User).second) continue; for (const auto &U : User->users()) { auto Inst = dyn_cast(U); if (!Inst) return false; if (Inst->getOpcode() == OpCode || isa(U)) { if (const FPMathOperator *FPOp = dyn_cast(Inst)) if (!isa(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = dyn_cast(U)) { // Detect the following pattern: A ShuffleVector instruction together // with a reduction that do partial reduction on the first and second // ElemNumToReduce / 2 elements, and store the result in // ElemNumToReduce / 2 elements in another vector. unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); if (ResultElements < ElemNum) return false; if (ElemNumToReduce == 1) return false; if (!isa(U->getOperand(1))) return false; for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) return false; for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) if (ShufInst->getMaskValue(i) != -1) return false; // There is only one user of this ShuffleVector instruction, which // must be a reduction operation. if (!U->hasOneUse()) return false; auto U2 = dyn_cast(*U->user_begin()); if (!U2 || U2->getOpcode() != OpCode) return false; // Check operands of the reduction operation. if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { UsersToVisit.push_back(U2); ElemNumToReduce /= 2; } else return false; } else if (isa(U)) { // At this moment we should have reduced all elements in the vector. if (ElemNumToReduce != 1) return false; const ConstantInt *Val = dyn_cast(U->getOperand(1)); if (!Val || Val->getZExtValue() != 0) return false; ReduxExtracted = true; } else return false; } } return ReduxExtracted; } void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); bool nuw = false; bool nsw = false; bool exact = false; bool vec_redux = false; FastMathFlags FMF; if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); nsw = OFBinOp->hasNoSignedWrap(); } if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); if (const FPMathOperator *FPOp = dyn_cast(&I)) FMF = FPOp->getFastMathFlags(); if (isVectorReductionOp(&I)) { vec_redux = true; DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); } SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); Flags.setVectorReduction(vec_redux); if (EnableFMFInDAG) { Flags.setAllowReciprocal(FMF.allowReciprocal()); Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); } SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags); setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( Op2.getValueType(), DAG.getDataLayout()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueSizeInBits(); SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); // Otherwise we'll need to temporarily settle for some other convenient // type. Type legalization will make adjustments once the shiftee is split. else Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } bool nuw = false; bool nsw = false; bool exact = false; if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); nsw = OFBinOp->hasNoSignedWrap(); } if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); } SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags); setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); SDNodeFlags Flags; Flags.setExact(isa(&I) && cast(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast(&I)) predicate = IC->getPredicate(); else if (const ConstantExpr *IC = dyn_cast(&I)) predicate = ICmpInst::Predicate(IC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(const User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; if (const FCmpInst *FC = dyn_cast(&I)) predicate = FC->getPredicate(); else if (const ConstantExpr *FC = dyn_cast(&I)) predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. // FIXME: We should propagate the fast-math-flags to the DAG node itself for // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { return all_of(Cond->users(), [](const Value *V) { return isa(V); }); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); SDValue LHSVal = getValue(I.getOperand(1)); SDValue RHSVal = getValue(I.getOperand(2)); auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; // Min/max matching is only viable if all output VTs are the same. if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); // We care about the legality of the operation after it has been type // legalized. while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && VT != TLI.getTypeToTransformTo(Ctx, VT)) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + // vselect. Otherwise, if this is going to be scalarized, we want to see if // min/max is legal on the scalar type. bool UseScalarMinMax = VT.isVector() && !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); Value *LHS, *RHS; auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); ISD::NodeType Opc = ISD::DELETED_NODE; switch (SPR.Flavor) { case SPF_UMAX: Opc = ISD::UMAX; break; case SPF_UMIN: Opc = ISD::UMIN; break; case SPF_SMAX: Opc = ISD::SMAX; break; case SPF_SMIN: Opc = ISD::SMIN; break; case SPF_FMINNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; case SPNB_RETURNS_ANY: { if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) Opc = ISD::FMINNUM; else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) Opc = ISD::FMINNAN; else if (UseScalarMinMax) Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? ISD::FMINNUM : ISD::FMINNAN; break; } } break; case SPF_FMAXNUM: switch (SPR.NaNBehavior) { case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_ANY: if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) Opc = ISD::FMAXNUM; else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) Opc = ISD::FMAXNAN; else if (UseScalarMinMax) Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? ISD::FMAXNUM : ISD::FMAXNAN; break; } break; default: break; } if (Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustom(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && // If the underlying comparison instruction is used by any other // instruction, the consumed instructions won't be destroyed, so it is // not profitable to convert to a min/max. hasOnlySelectUsers(cast(I).getCondition())) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); BaseOps = {}; } } for (unsigned i = 0; i != NumValues; ++i) { SmallVector Ops(BaseOps.begin(), BaseOps.end()); Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), Ops); } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, DAG.getTargetConstant( 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only regcognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. } void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); } void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), InVec, InIdx)); } void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); SDLoc DL = getCurSDLoc(); SmallVector Mask; ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; } // Normalize the shuffle vector since mask and vector length don't match. if (SrcNumElts < MaskNumElts) { // Mask is longer than the source vectors. We can use concatenate vector to // make the mask and vectors lengths match. if (MaskNumElts % SrcNumElts == 0) { // Mask length is a multiple of the source vector length. // Check if the shuffle is some kind of concatenation of the input // vectors. unsigned NumConcat = MaskNumElts / SrcNumElts; bool IsConcat = true; SmallVector ConcatSrcs(NumConcat, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx < 0) continue; // Ensure the indices in each SrcVT sized piece are sequential and that // the same source is used for the whole piece. if ((Idx % SrcNumElts != (i % SrcNumElts)) || (ConcatSrcs[i / SrcNumElts] >= 0 && ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { IsConcat = false; break; } // Remember which source this index came from. ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; } // The shuffle is concatenating multiple vectors together. Just emit // a CONCAT_VECTORS operation. if (IsConcat) { SmallVector ConcatOps; for (auto Src : ConcatSrcs) { if (Src < 0) ConcatOps.push_back(DAG.getUNDEF(SrcVT)); else if (Src == 0) ConcatOps.push_back(Src1); else ConcatOps.push_back(Src2); } setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); return; } } unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), PaddedMaskNumElts); // Pad both vectors with undefs to make them the same length as the mask. SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector MOps1(NumConcat, UndefVal); SmallVector MOps2(NumConcat, UndefVal); MOps1[0] = Src1; MOps2[0] = Src2; Src1 = Src1.isUndef() ? DAG.getUNDEF(PaddedVT) : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); Src2 = Src2.isUndef() ? DAG.getUNDEF(PaddedVT) : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. SmallVector MappedOps(PaddedMaskNumElts, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts - PaddedMaskNumElts; MappedOps[i] = Idx; } SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); // If the concatenated vector was padded, extract a subvector with the // correct number of elements. if (MaskNumElts != PaddedMaskNumElts) Result = DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, VT, Result, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); setValue(&I, Result); return; } if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. int MinRange[2] = { static_cast(SrcNumElts), static_cast(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; unsigned Input = 0; if (Idx < 0) continue; if (Idx >= (int)SrcNumElts) { Input = 1; Idx -= SrcNumElts; } if (Idx > MaxRange[Input]) MaxRange[Input] = Idx; if (Idx < MinRange[Input]) MinRange[Input] = Idx; } // Check if the access is smaller than the vector size and can we find // a reasonable extract index. int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not // Extract. int StartIdx[2]; // StartIdx to extract from for (unsigned Input = 0; Input < 2; ++Input) { if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; continue; } // Find a good start index that is a multiple of the mask length. Then // see if the rest of the elements are in range. StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && StartIdx[Input] + MaskNumElts <= SrcNumElts) RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else { Src = DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, VT, Src, DAG.getConstant(StartIdx[Input], DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } } // Calculate new mask. SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= 0) { if (Idx < (int)SrcNumElts) Idx -= StartIdx[0]; else Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; } MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); return; } } // We can't use either concat vectors or extract subvectors so fall back to // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; SDValue Res; if (Idx < 0) { Res = DAG.getUNDEF(EltVT); } else { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); } Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); Type *ValTy = Op1->getType(); bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); SmallVector ValValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector Values(NumAggValues); // Ignore an insertvalue that produces an empty object if (!NumAggValues) { setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); return; } SDValue Agg = getValue(Op0); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). if (NumValValues) { SDValue Val = getValue(Op1); for (; i != LinearIndex + NumValValues; ++i) Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); } // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(AggValueVTs), Values)); } void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); // Ignore a extractvalue that produces an empty object if (!NumValValues) { setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); return; } SmallVector Values(NumValValues); SDValue Agg = getValue(Op0); // Copy out the selected value(s). for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) Values[i - LinearIndex] = OutOfUndef ? DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValValueVTs), Values)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = I.getType()->isVectorTy() ? cast(I.getType())->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); N = DAG.getSplatBuildVector(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); if (StructType *StTy = GTI.getStructTypeOrNull()) { unsigned Field = cast(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } } else { MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); unsigned PtrSize = PtrTy.getSizeInBits(); APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. const auto *CI = dyn_cast(Idx); if (!CI && isa(Idx) && cast(Idx)->getSplatValue()) CI = cast(cast(Idx)->getSplatValue()); if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); LLVMContext &Context = *DAG.getContext(); SDValue OffsVal = VectorWidth ? DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); continue; } // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && VectorWidth) { MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } setValue(&I, N); } void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, DAG.getConstant(TySize, dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. unsigned StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), dl)); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(SV)) { if (Arg->hasSwiftErrorAttr()) return visitLoadFromSwiftError(I); } if (const AllocaInst *Alloca = dyn_cast(SV)) { if (Alloca->isSwiftError()) return visitLoadFromSwiftError(I); } } SDValue Ptr = getValue(SV); Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SDValue Root; bool ConstantMemory = false; if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); } SDLoc dl = getCurSDLoc(); if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and // TokenFactor places arbitrary choke points on the scheduler. SD scheduling // could recover a bit by hoisting nodes upward in the chain by recognizing // they are side-effect free or do not alias. The optimizer should really // avoid this case by converting large object/array copies to llvm.memcpy // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); auto MMOFlags = MachineMemOperand::MONone; if (isVolatile) MMOFlags |= MachineMemOperand::MOVolatile; if (isNonTemporal) MMOFlags |= MachineMemOperand::MONonTemporal; if (isInvariant) MMOFlags |= MachineMemOperand::MOInvariant; if (isDereferenceable) MMOFlags |= MachineMemOperand::MODereferenceable; SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, MMOFlags, AAInfo, Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else PendingLoads.push_back(Chain); } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert(TLI.supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; SmallVector Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. auto &DL = DAG.getDataLayout(); const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); Type *Ty = I.getType(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); assert(!AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && "load_from_swift_error should not be constant memory"); SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, ValueVTs, &Offsets); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); setValue(&I, L); } void SelectionDAGBuilder::visitStore(const StoreInst &I) { if (I.isAtomic()) return visitAtomicStore(I); const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(PtrV)) { if (Arg->hasSwiftErrorAttr()) return visitStoreToSwiftError(I); } if (const AllocaInst *Alloca = dyn_cast(PtrV)) { if (Alloca->isSwiftError()) return visitStoreToSwiftError(I); } } SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; // Get the lowered operands. Note that we do this after // checking if NumResults is zero, because with zero results // the operands won't have values in the map. SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); SmallVector Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); EVT PtrVT = Ptr.getValueType(); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); auto MMOFlags = MachineMemOperand::MONone; if (I.isVolatile()) MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); SDValue St = DAG.getStore( Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, bool IsCompressing) { SDLoc sdl = getCurSDLoc(); auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, unsigned& Alignment) { // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Alignment = cast(I.getArgOperand(2))->getZExtValue(); Mask = I.getArgOperand(3); }; auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, unsigned& Alignment) { // llvm.masked.compressstore.*(Src0, Ptr, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); Alignment = 0; }; Value *PtrOperand, *MaskOperand, *Src0Operand; unsigned Alignment; if (IsCompressing) getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); EVT VT = Src0.getValueType(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, MMO, false /* Truncating */, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } // Get a uniform base for the Gather/Scatter intrinsic. // The first argument of the Gather/Scatter intrinsic is a vector of pointers. // We try to represent it as a base pointer + vector of indices. // Usually, the vector of pointers comes from a 'getelementptr' instruction. // The first operand of the GEP may be a single pointer or a vector of pointers // Example: // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind // or // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we // extract the spalt value and use it as a uniform base. // In all other cases the function returns 'false'. // static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); const GetElementPtrInst *GEP = dyn_cast(Ptr); if (!GEP || GEP->getNumOperands() > 2) return false; const Value *GEPPtr = GEP->getPointerOperand(); if (!GEPPtr->getType()->isVectorTy()) Ptr = GEPPtr; else if (!(Ptr = getSplatValue(GEPPtr))) return false; Value *IndexVal = GEP->getOperand(1); // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); // Suppress sign extension. if (SExtInst* Sext = dyn_cast(IndexVal)) { if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); Index = SDB->getValue(IndexVal); } } if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); } return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); SDValue Base; SDValue Index; const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO); DAG.setRoot(Scatter); setValue(&I, Scatter); } void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, unsigned& Alignment) { // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Ptr = I.getArgOperand(0); Alignment = cast(I.getArgOperand(1))->getZExtValue(); Mask = I.getArgOperand(2); Src0 = I.getArgOperand(3); }; auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, unsigned& Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); Alignment = 0; Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; Value *PtrOperand, *MaskOperand, *Src0Operand; unsigned Alignment; if (IsExpanding) getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); EVT VT = Src0.getValueType(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, VT.getStoreSize(), Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) { SDValue OutChain = Load.getValue(1); DAG.setRoot(OutChain); } setValue(&I, Load); } void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) const Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && AA->pointsToConstantMemory(MemoryLocation( BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; } MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), MachineMemOperand::MOLoad, VT.getStoreSize(), Alignment, AAInfo, Ranges); if (!UniformBase) { Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { Root, Src0, Mask, Base, Index }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); SDValue OutChain = Gather.getValue(1); if (!ConstantMemory) PendingLoads.push_back(OutChain); setValue(&I, Gather); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDLoc dl = getCurSDLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); SDValue L = DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), /* Alignment=*/ 0, Order, Scope); SDValue OutChain = L.getValue(1); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getPointerTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT), AAMDNodes(), nullptr, Scope, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), MMO); SDValue OutChain = L.getValue(1); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), Order, Scope); DAG.setRoot(OutChain); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { // Ignore the callsite's attributes. A specific call site may be marked with // readnone, but the lowering code will expect the chain based on the // definition. const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); bool OnlyLoad = HasChain && F->onlyReadsMemory(); // Build the operand list. SmallVector Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. if (OnlyLoad) { // We don't need to serialize loads against other loads. Ops.push_back(DAG.getRoot()); } else { Ops.push_back(getRoot()); } } // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { SDValue Op = getValue(I.getArgOperand(i)); Ops.push_back(Op); } SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); SDVTList VTs = DAG.getVTList(ValueVTs); // Create the node. SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } if (HasChain) { SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); if (OnlyLoad) PendingLoads.push_back(Chain); else DAG.setRoot(Chain); } if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast(I.getType())) { EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } else Result = lowerRangeToAssertZExt(DAG, I, Result); setValue(&I, Result); } } /// GetSignificand - Get the significand and build it into a floating-point /// number with exponent of 1: /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexadecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, const SDLoc &dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( ISD::SRL, dl, MVT::i32, t0, DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, const SDLoc &dl) { return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl, MVT::f32); } static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SelectionDAG &DAG) { // TODO: What fast-math-flags should be set on the floating-point nodes? // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); // FractionalPartOfX = t0 - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // TwoToFractionalPartOfX = // 0.997535578f + // (0.735607626f + 0.252464424f * x) * x; // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3e814304, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = // 0.999892986f + // (0.696457318f + // (0.224338339f + 0.792043434e-1f * x) * x) * x; // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3da235e3, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3e65b8f3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = // 0.999999982f + // (0.693148872f + // (0.240227044f + // (0.554906021e-1f + // (0.961591928e-2f + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3924b03e, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3ab24b87, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3c1d8c17, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3d634a1d, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x3e75fe14, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234, dl)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000, dl)); } // Add the exponent into the result in integer domain. SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); } /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3f317218, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); SDValue LogOfMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // LogofMantissa = // -1.1609546f + // (1.4034025f - 0.23903021f * x) * x; // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // LogOfMantissa = // -1.7417939f + // (2.8212026f + // (-1.4699568f + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // LogOfMantissa = // -2.1072184f + // (4.2372794f + // (-3.7029485f + // (2.2781945f + // (-0.87823314f + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. SDValue Log2ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log2ofMantissa = // -2.51285454f + // (4.07009056f + // (-2.12067489f + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log2ofMantissa = // -3.0400495f + // (6.1129976f + // (-5.3420409f + // (3.2865683f + // (-1.2669343f + // (0.27515199f - // 0.25691327e-1f * x) * x) * x) * x) * x) * x; // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); SDValue Log10ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // // Log10ofMantissa = // -0.50419619f + // (0.60948995f - 0.10380950f * x) * x; // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log10ofMantissa = // -0.64831180f + // (0.91751397f + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log10ofMantissa = // -0.84299375f + // (1.5327582f + // (-1.0688956f + // (0.49102474f + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } // No special expansion. return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast(LHS)) { APFloat Ten(10.0f); IsExp10 = LHSC->isExactlyValue(Ten); } } // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When // optimizing for size, we only want to do this if the expansion would produce // a small number of multiplies, otherwise we do the full expansion. if (ConstantSDNode *RHSC = dyn_cast(RHS)) { // Get the exponent as a positive value. unsigned Val = RHSC->getSExtValue(); if ((int)Val < 0) Val = -Val; // powi(x, 0) -> 1.0 if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); if (!F->optForSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; // TODO: Intrinsics should have fast-math-flags that propagate to these // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); else Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), CurSquare, CurSquare); Val >>= 1; } // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } // Otherwise, expand to a libcall. return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } // getUnderlyingArgReg - Find underlying register used for a truncated or // bitcasted argument. static unsigned getUnderlyingArgReg(const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: return cast(N.getOperand(1))->getReg(); case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: return getUnderlyingArgReg(N.getOperand(0)); default: return 0; } } /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. // // FIXME: Should we be checking DL->inlinedAt() to determine this? if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; Optional Op; // Some arguments' frame index is recorded during argument lowering. if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { unsigned Reg = getUnderlyingArgReg(N); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } if (Reg) Op = MachineOperand::CreateReg(Reg, false); } if (!Op) { // Check if ValueMap has reg number. DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) Op = MachineOperand::CreateReg(VMI->second, false); } if (!Op && N.getNode()) // Check if frame index is available. if (LoadSDNode *LNode = dyn_cast(N.getNode())) if (FrameIndexSDNode *FINode = dyn_cast(LNode->getBasePtr().getNode())) Op = MachineOperand::CreateFI(FINode->getIndex()); if (!Op) return false; assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(Offset) .addMetadata(Variable) .addMetadata(Expr)); return true; } /// Return the appropriate SDDbgValue based on N. SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, DIExpression *Expr, int64_t Offset, DebugLoc dl, unsigned DbgSDNodeOrder) { SDDbgValue *SDV; auto *FISDN = dyn_cast(N.getNode()); if (FISDN && Expr->startsWithDeref()) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. ArrayRef TrailingElements(Expr->elements_begin() + 1, Expr->elements_end()); DIExpression *DerefedDIExpr = DIExpression::get(*DAG.getContext(), TrailingElements); int FI = FISDN->getIndex(); SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl, DbgSDNodeOrder); } else { SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, Offset, dl, DbgSDNodeOrder); } return SDV; } // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) # pragma push_macro("setjmp") # undef setjmp # define setjmp_undefined_for_msvc #endif /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return nullptr; case Intrinsic::vastart: visitVAStart(I); return nullptr; case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::addressofreturnaddress: setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Res = DAG.getNode(ISD::READ_REGISTER, sdl, DAG.getVTList(VT, MVT::Other), Chain, RegName); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); return nullptr; } case Intrinsic::setjmp: return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); return nullptr; } case Intrinsic::memcpy_element_atomic: { SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue NumElements = getValue(I.getArgOperand(2)); SDValue ElementSize = getValue(I.getArgOperand(3)); // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Ty = I.getArgOperand(2)->getType(); Entry.Node = NumElements; Args.push_back(Entry); Entry.Ty = Type::getInt32Ty(*DAG.getContext()); Entry.Node = ElementSize; Args.push_back(Entry); uint64_t ElementSizeConstant = cast(I.getArgOperand(3))->getZExtValue(); RTLIB::Libcall LibraryCall = RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported element size"); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl) .setChain(getRoot()) .setCallee(TLI.getLibcallCallingConv(LibraryCall), Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol( TLI.getLibcallName(LibraryCall), TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair CallResult = TLI.LowerCallTo(CLI); DAG.setRoot(CallResult.second); return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); if (!Address) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } // Check if address has undef value. if (isa(Address) || (Address->use_empty() && !isa(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } SDValue &N = NodeMap[Address]; if (!N.getNode() && isa(Address)) // Check unused arguments map. N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. bool isParameter = Variable->isParameter() || isa(Address); auto FINode = dyn_cast(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); } else if (isa(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast(Address)) { if (AI->getParent() != DI.getParent()) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, 0, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; } } } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return nullptr; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast(I); assert(DI.getVariable() && "Missing variable"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) return nullptr; SDDbgValue *SDV; if (isa(V) || isa(V) || isa(V)) { SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at // this point if it hasn't been done yet. SDValue N = NodeMap[V]; if (!N.getNode() && isa(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, N)) { SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { // Do not call getValue(V) yet, as we don't want to generate code. // Remember it for later. DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); DanglingDebugInfoMap[V] = DDI; } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter. DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } // Build a debug info table entry. if (const BitCastInst *BCI = dyn_cast(V)) V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast(V); // Don't handle byval struct arguments or VLAs, for example. if (!AI) { DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) return nullptr; // VLAs. return nullptr; } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; } case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: DAG.getMachineFunction().setCallsEHReturn(true); DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); return nullptr; case Intrinsic::eh_dwarf_cfa: { setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast(I.getArgOperand(0)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); return nullptr; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); AllocaInst *FnCtx = cast(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI.setFunctionContextIndex(FI); return nullptr; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); return nullptr; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::eh_sjlj_setup_dispatch: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return nullptr; } case Intrinsic::masked_gather: visitMaskedGather(I); return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; case Intrinsic::masked_scatter: visitMaskedScatter(I); return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; case Intrinsic::masked_expandload: visitMaskedLoad(I, true /* IsExpanding */); return nullptr; case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: case Intrinsic::x86_mmx_psrli_w: case Intrinsic::x86_mmx_psrli_d: case Intrinsic::x86_mmx_psrli_q: case Intrinsic::x86_mmx_psrai_w: case Intrinsic::x86_mmx_psrai_d: { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); return nullptr; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; switch (Intrinsic) { case Intrinsic::x86_mmx_pslli_w: NewIntrinsic = Intrinsic::x86_mmx_psll_w; break; case Intrinsic::x86_mmx_pslli_d: NewIntrinsic = Intrinsic::x86_mmx_psll_d; break; case Intrinsic::x86_mmx_pslli_q: NewIntrinsic = Intrinsic::x86_mmx_psll_q; break; case Intrinsic::x86_mmx_psrli_w: NewIntrinsic = Intrinsic::x86_mmx_psrl_w; break; case Intrinsic::x86_mmx_psrli_d: NewIntrinsic = Intrinsic::x86_mmx_psrl_d; break; case Intrinsic::x86_mmx_psrli_q: NewIntrinsic = Intrinsic::x86_mmx_psrl_q; break; case Intrinsic::x86_mmx_psrai_w: NewIntrinsic = Intrinsic::x86_mmx_psra_w; break; case Intrinsic::x86_mmx_psrai_d: NewIntrinsic = Intrinsic::x86_mmx_psra_d; break; default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. // We must do this early because v2i32 is not a legal type. SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); return nullptr; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, TLI)); return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::fabs: Opcode = ISD::FABS; break; case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::minnum: { auto VT = getValue(I.getArgOperand(0)).getValueType(); unsigned Opc = I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) ? ISD::FMINNAN : ISD::FMINNUM; setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; } case Intrinsic::maxnum: { auto VT = getValue(I.getArgOperand(0)).getValueType(); unsigned Opc = I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) ? ISD::FMAXNAN : ISD::FMAXNUM; setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1))); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, getValue(I.getArgOperand(2))); setValue(&I, Add); } return nullptr; } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), DAG.getTargetConstant(0, sdl, MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); return nullptr; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); return nullptr; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); return nullptr; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return nullptr; } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( ISD::STACKSAVE, sdl, DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; } case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. if (PtrTy != ResTy) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), Op); DAG.setRoot(Op); setValue(&I, Res); return nullptr; } case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); const Module &M = *MF.getFunction()->getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); } else { const Value *Global = TLI.getSDagStackGuard(M); unsigned Align = DL->getPrefTypeAlignment(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } DAG.setRoot(Chain); setValue(&I, Res); return nullptr; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); if (TLI.useLoadStackGuardNode()) Src = getLoadStackGuard(DAG, sdl, Chain); else Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI.setStackProtectorIndex(FI); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI), /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return nullptr; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. ConstantInt *CI = dyn_cast(I.getArgOperand(1)); assert(CI && "Non-constant type in __builtin_object_size?"); SDValue Arg = getValue(I.getCalledValue()); EVT Ty = Arg.getValueType(); if (CI->isZero()) Res = DAG.getConstant(-1ULL, sdl, Ty); else Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); return nullptr; } case Intrinsic::annotation: case Intrinsic::ptr_annotation: case Intrinsic::invariant_group_barrier: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; case Intrinsic::assume: case Intrinsic::var_annotation: // Discard annotate attributes and assumptions return nullptr; case Intrinsic::init_trampoline: { const Function *F = cast(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); return nullptr; } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::gcroot: { MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); (void)F; assert(F->hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); const Constant *TypeMap = cast(I.getArgOperand(1)); FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); return nullptr; } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return nullptr; case Intrinsic::expect: { // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return nullptr; } case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = I.getAttributes() .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); return nullptr; } TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return nullptr; } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: { ISD::NodeType Op; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; } SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); return nullptr; } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ false, /* volatile */ rw==0, /* read */ rw==1)); /* write */ return nullptr; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) return nullptr; SmallVector Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); for (SmallVectorImpl::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { AllocaInst *LifetimeObject = dyn_cast_or_null(*Object); // Could not find an Alloca. if (!LifetimeObject) continue; // First check that the Alloca is static, otherwise it won't have a // valid frame index. auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); if (SI == FuncInfo.StaticAllocaMap.end()) return nullptr; int FI = SI->second; SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); DAG.setRoot(Res); } return nullptr; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::invariant_end: // Discard region information. return nullptr; case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return nullptr; case Intrinsic::experimental_stackmap: { visitStackmap(I); return nullptr; } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { visitPatchpoint(&I); return nullptr; } case Intrinsic::experimental_gc_statepoint: { LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; } case Intrinsic::experimental_gc_result: { visitGCResult(cast(I)); return nullptr; } case Intrinsic::experimental_gc_relocate: { visitGCRelocate(cast(I)); return nullptr; } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); if (isa(Arg)) continue; // Skip null pointers. They represent a hole in index space. AllocaInst *Slot = cast(Arg); assert(FuncInfo.StaticAllocaMap.count(Slot) && "can only escape static allocas"); int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) .addFrameIndex(FI); } return nullptr; } case Intrinsic::localrecover: { // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); // Get the symbol that defines the frame offset. auto *Fn = cast(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast(I.getArgOperand(2)); unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); SDValue OffsetVal = DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. Value *FP = I.getArgOperand(1); SDValue FPVal = getValue(FP); SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); setValue(&I, Add); return nullptr; } case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. const auto *CPI = cast(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); if (Intrinsic == Intrinsic::eh_exceptioncode) N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); return nullptr; } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; } } std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { MachineFunction &MF = DAG.getMachineFunction(); MachineModuleInfo &MMI = MF.getMMI(); MCSymbol *BeginLabel = nullptr; if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); } // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); CLI.setChain(getRoot()); } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::pair Result = TLI.LowerCallTo(CLI); assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted // and the DAG root is already updated. HasTailCall = true; // Since there's no actual continuation from this block, nothing can be // relying on us setting vregs for them. PendingExports.clear(); } else { DAG.setRoot(Result.second); } if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. if (MF.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast(CLI.CS->getInstruction()), BeginLabel, EndLabel); } else { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } } return Result; } void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { auto &DL = DAG.getDataLayout(); FunctionType *FTy = CS.getFunctionType(); Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; // Skip empty types if (V->getType()->isEmptyTy()) continue; SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); // Use swifterror virtual register as input to the call. if (Entry.isSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. Entry.Node = DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. if (Entry.isSRet && isa(V)) isTailCall = false; } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) isTailCall = false; // Disable tail calls if there is an swifterror argument. Targets have not // been updated to support tail calls. if (TLI.supportSwiftError() && SwiftErrorVal) isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall) .setConvergent(CS.isConvergent()); std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { const Instruction *Inst = CS.getInstruction(); Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); setValue(Inst, Result.first); } // The last element of CLI.InVals has the SDValue for swifterror return. // Here we copy it to a virtual register and update SwiftErrorMap for // book-keeping. if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { for (const User *U : V->users()) { if (const ICmpInst *IC = dyn_cast(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast(IC->getOperand(1))) if (C->isNullValue()) continue; // Unknown instruction. return false; } return true; } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast(PtrVal)) { // Cast pointer to the type we really want to load. LoadInput = ConstantExpr::getBitCast(const_cast(LoadInput), PointerType::getUnqual(LoadTy)); if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( const_cast(LoadInput), LoadTy, *Builder.DL)) return Builder.getValue(LoadCst); } // Otherwise, we have to emit the load. If the pointer is to unfoldable but // still constant memory, the input chain can be the entry node. SDValue Root; bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. if (Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { // Do not serialize non-volatile loads against each other. Root = Builder.DAG.getRoot(); } SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), /* Alignment = */ 1); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); return LoadVal; } /// processIntegerCallValue - Record the value for an instruction that /// produces an integer result, converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); setValue(&I, Value); } /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) if (I.getNumArgOperands() != 3) return false; const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || !I.getArgOperand(2)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); return true; } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = nullptr; ActuallyDoIt = false; break; case 2: LoadVT = MVT::i16; LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ } // This turns into unaligned loads. We only do this if the target natively // supports the MVT we'll be loading or if it is small enough (<= 4) that // we'll only produce a small number of byte loads. // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { unsigned DstAS = LHS->getType()->getPointerAddressSpace(); unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. // TODO: Check alignment of src and dest ptrs. if (!TLI.isTypeLegal(LoadVT) || !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } if (ActuallyDoIt) { SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); processIntegerCallValue(I, Res, false); return true; } } return false; } /// visitMemChrCall -- See if we can lower a memchr call into an optimized /// form. If so, return true and lower it, otherwise return false and it /// will be lowered like a normal call. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { // Verify that the prototype makes sense. void *memchr(void *, int, size_t) if (I.getNumArgOperands() != 3) return false; const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); if (!Src->getType()->isPointerTy() || !Char->getType()->isIntegerTy() || !Length->getType()->isIntegerTy() || !I.getType()->isPointerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Src), getValue(Char), getValue(Length), MachinePointerInfo(Src)); if (Res.first.getNode()) { setValue(&I, Res.first); PendingLoads.push_back(Res.second); return true; } return false; } /// /// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to /// to adjust the dst pointer by the size of the copied memory. bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // Verify argument count: void *mempcpy(void *, const void *, size_t) if (I.getNumArgOperands() != 3) return false; SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); unsigned DstAlign = DAG.InferPtrAlignment(Dst); unsigned SrcAlign = DAG.InferPtrAlignment(Src); unsigned Align = std::min(DstAlign, SrcAlign); if (Align == 0) // Alignment of one or both could not be inferred. Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. bool isVol = false; SDLoc sdl = getCurSDLoc(); // In the mempcpy context we need to pass in a false value for isTailCall // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); // Check if Size needs to be truncated or extended. Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType()); // Adjust return pointer to point just past the last dst byte. SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(), Dst, Size); setValue(&I, DstPlusSize); return true; } /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an /// optimized form. If so, return true and lower it, otherwise return false /// and it will be lowered like a normal call. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { // Verify that the prototype makes sense. char *strcpy(char *, char *) if (I.getNumArgOperands() != 2) return false; const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); if (!Arg0->getType()->isPointerTy() || !Arg1->getType()->isPointerTy() || !I.getType()->isPointerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0), MachinePointerInfo(Arg1), isStpcpy); if (Res.first.getNode()) { setValue(&I, Res.first); DAG.setRoot(Res.second); return true; } return false; } /// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int strcmp(void*,void*) if (I.getNumArgOperands() != 2) return false; const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); if (!Arg0->getType()->isPointerTy() || !Arg1->getType()->isPointerTy() || !I.getType()->isIntegerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0), MachinePointerInfo(Arg1)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); return true; } return false; } /// visitStrLenCall -- See if we can lower a strlen call into an optimized /// form. If so, return true and lower it, otherwise return false and it /// will be lowered like a normal call. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { // Verify that the prototype makes sense. size_t strlen(char *) if (I.getNumArgOperands() != 1) return false; const Value *Arg0 = I.getArgOperand(0); if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), MachinePointerInfo(Arg0)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, false); PendingLoads.push_back(Res.second); return true; } return false; } /// visitStrNLenCall -- See if we can lower a strnlen call into an optimized /// form. If so, return true and lower it, otherwise return false and it /// will be lowered like a normal call. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { // Verify that the prototype makes sense. size_t strnlen(char *, size_t) if (I.getNumArgOperands() != 2) return false; const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); if (!Arg0->getType()->isPointerTy() || !Arg1->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), MachinePointerInfo(Arg0)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, false); PendingLoads.push_back(Res.second); return true; } return false; } /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { // Sanity check that it really is a unary floating-point call. if (I.getNumArgOperands() != 1 || !I.getArgOperand(0)->getType()->isFloatingPointTy() || I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory()) return false; SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); return true; } /// visitBinaryFloatCall - If a call instruction is a binary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { // Sanity check that it really is a binary floating-point call. if (I.getNumArgOperands() != 2 || !I.getArgOperand(0)->getType()->isFloatingPointTy() || I.getType() != I.getArgOperand(0)->getType() || I.getType() != I.getArgOperand(1)->getType() || !I.onlyReadsMemory()) return false; SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); return true; } void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa(I.getCalledValue())) { visitInlineAsm(&I); return; } MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); computeUsesVAFloatArgument(I, MMI); const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; } } if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; } } // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for // some reason. LibFunc::Func Func; if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; case LibFunc::copysign: case LibFunc::copysignf: case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && I.getType() == I.getArgOperand(1)->getType() && I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), LHS.getValueType(), LHS, RHS)); return; } break; case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; case LibFunc::cos: case LibFunc::cosf: case LibFunc::cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite: case LibFunc::sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; case LibFunc::round: case LibFunc::roundf: case LibFunc::roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; case LibFunc::memcmp: if (visitMemCmpCall(I)) return; break; case LibFunc::mempcpy: if (visitMemPCpyCall(I)) return; break; case LibFunc::memchr: if (visitMemChrCall(I)) return; break; case LibFunc::strcpy: if (visitStrCpyCall(I, false)) return; break; case LibFunc::stpcpy: if (visitStrCpyCall(I, true)) return; break; case LibFunc::strcmp: if (visitStrCmpCall(I)) return; break; case LibFunc::strlen: if (visitStrLenCall(I)) return; break; case LibFunc::strnlen: if (visitStrNLenCall(I)) return; break; } } } SDValue Callee; if (!RenameFn) Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol( RenameFn, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower calls with arbitrary operand bundles!"); if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); else // Check if we can potentially perform a tail call. More detailed checking // is be done within LowerCallTo, after more information about the call is // known. LowerCallTo(&I, Callee, I.isTailCall()); } namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. /// This gets modified as the asm is processed. SDValue CallOperand; /// AssignedRegs - If this is a register or register class operand, this /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { } /// Whether or not this operand accesses memory bool hasMemory(const TargetLowering &TLI) const { // Indirect operand accesses access memory. if (isIndirect) return true; for (const auto &Code : Codes) if (TLI.getConstraintType(Code) == TargetLowering::C_Memory) return true; return false; } /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL) const { if (!CallOperandVal) return MVT::Other; if (isa(CallOperandVal)) return TLI.getPointerTy(DL); llvm::Type *OpTy = CallOperandVal->getType(); // FIXME: code duplicated from TargetLowering::ParseConstraints(). // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { llvm::PointerType *PtrTy = dyn_cast(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. if (StructType *STy = dyn_cast(OpTy)) if (STy->getNumElements() == 1) OpTy = STy->getElementType(0); // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { unsigned BitSize = DL.getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: case 8: case 16: case 32: case 64: case 128: OpTy = IntegerType::get(Context, BitSize); break; } } return TLI.getValueType(DL, OpTy, true); } }; typedef SmallVector SDISelAsmOperandInfoVector; } // end anonymous namespace /// Make sure that the output operand \p OpInfo and its corresponding input /// operand \p MatchingOpInfo have compatible constraint types (otherwise error /// out). static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, SDISelAsmOperandInfo &MatchingOpInfo, SelectionDAG &DAG) { if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) return; const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); const auto &TLI = DAG.getTargetLoweringInfo(); std::pair MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); std::pair InputRC = TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode, MatchingOpInfo.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != MatchingOpInfo.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { // FIXME: error out in a more elegant fashion report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); } MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; } /// Get a direct memory input to behave well as an indirect operand. /// This may introduce stores, hence the need for a \p Chain. /// \return The (possibly updated) chain. static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, SDISelAsmOperandInfo &OpInfo, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we don't have an indirect input, put it in the constpool if we can, // otherwise spill it to a stack slot. // TODO: This isn't quite right. We need to handle these according to // the addressing mode that the constraint wants. Also, this may take // an additional register for the computation and we don't want that // either. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa(OpVal) || isa(OpVal) || isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool( cast(OpVal), TLI.getPointerTy(DAG.getDataLayout())); return Chain; } // Otherwise, create a stack slot and emit a store to it before the asm. Type *Ty = OpVal->getType(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); OpInfo.CallOperand = StackSlot; return Chain; } /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm /// uses features that we can't model on machineinstrs, we have SDISel do the /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair PhysReg = TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { // If the input is a FP value and we want it in FP registers, do a // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } MVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, // assign it now. if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) ValueVT = *RC->vt_begin(); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. RegVT = *RC->vt_begin(); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); // If this is an expanded reference, add the rest of the regs to Regs. if (NumRegs != 1) { TargetRegisterClass::iterator I = RC->begin(); for (; *I != AssignedReg; ++I) assert(I != RC->end() && "Didn't find reg!"); // Already added the first reg. --NumRegs; ++I; for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); Regs.push_back(*I); } } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { RegVT = *RC->vt_begin(); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; // Create the appropriate number of virtual registers. MachineRegisterInfo &RegInfo = MF.getRegInfo(); for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } // Otherwise, we couldn't allocate enough registers for this. } static unsigned findMatchingInlineAsmOperand(unsigned OperandNo, const std::vector &AsmNodeOperands) { // Scan until we find the definition we already emitted of this operand. unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); assert((InlineAsm::isRegDefKind(OpFlag) || InlineAsm::isRegDefEarlyClobberKind(OpFlag) || InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1; } return CurOp; } /// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT /// \return true if it has succeeded, false otherwise static bool createVirtualRegs(SmallVector &Regs, unsigned NumRegs, MVT RegVT, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = NumRegs; i != e; ++i) { if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) Regs.push_back(RegInfo.createVirtualRegister(RC)); else return false; } return true; } class ExtraFlags { unsigned Flags = 0; public: explicit ExtraFlags(ImmutableCallSite CS) { const InlineAsm *IA = cast(CS.getCalledValue()); if (IA->hasSideEffects()) Flags |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) Flags |= InlineAsm::Extra_IsAlignStack; if (CS.isConvergent()) Flags |= InlineAsm::Extra_IsConvergent; Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) { // Ideally, we would only check against memory constraints. However, the // meaning of an Other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore // for Other constraints as well. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Other) { if (OpInfo.Type == InlineAsm::isInput) Flags |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) Flags |= InlineAsm::Extra_MayStore; else if (OpInfo.Type == InlineAsm::isClobber) Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } unsigned get() const { return Flags; } }; /// visitInlineAsm - Handle a call to an InlineAsm object. /// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore ExtraFlags ExtraInfo(CS); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); MVT OpVT = MVT::Other; // Compute the value type for each operand. if (OpInfo.Type == InlineAsm::isInput || (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { OpInfo.CallOperandVal = const_cast(CS.getArgument(ArgNo++)); // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. if (const BasicBlock *BB = dyn_cast(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } OpVT = OpInfo .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) .getSimpleVT(); } if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; } OpInfo.ConstraintVT = OpVT; if (!hasMemory) hasMemory = OpInfo.hasMemory(TLI); // Determine if this InlineAsm MayLoad or MayStore based on the constraints. // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]? auto TargetConstraint = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(TargetConstraint, SDValue()); ExtraInfo.update(TargetConstraint); } SDValue Chain, Flag; // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. if (hasMemory || IA->hasSideEffects()) Chain = getRoot(); else Chain = DAG.getRoot(); // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; patchMatchingInput(OpInfo, Input, DAG); } // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) continue; // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG); // There is no longer a Value* corresponding to this operand. OpInfo.CallOperandVal = nullptr; // It is now an indirect operand. OpInfo.isIndirect = true; } // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // Third pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. std::vector AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we // pass in the third operand as this (potentially null) inline asm MDNode. const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore // bits as operand 3. AsmNodeOperands.push_back(DAG.getTargetConstant( ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. std::vector > IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { case InlineAsm::isOutput: { if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } // Otherwise, this is a register or register class output. // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // If this is an indirect operand, store through the pointer after the // asm. if (OpInfo.isIndirect) { IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, OpInfo.CallOperandVal)); } else { // This is the result value of the call. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs .AddInlineAsmOperands(OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { // If this is required to match an output register we have already set, // just use its register. auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), AsmNodeOperands); unsigned OpFlag = cast(AsmNodeOperands[CurOp])->getZExtValue(); if (InlineAsm::isRegDefKind(OpFlag) || InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c emitInlineAsmError(CS, "inline asm not supported yet:" " don't know how to handle tied " "indirect register inputs"); return; } MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); SmallVector Regs; if (!createVirtualRegs(Regs, InlineAsm::getNumOperandRegisters(OpFlag), RegVT, DAG)) { emitInlineAsmError(CS, "inline asm error: This value type register " "class is not natively supported!"); return; } RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant( OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } // Treat indirect 'X' constraint as memory. if (OpInfo.ConstraintType == TargetLowering::C_Other && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant( ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); // TODO: Support this. if (OpInfo.isIndirect) { emitInlineAsmError( CS, "Don't know how to handle indirect register inputs yet " "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; } SDLoc dl = getCurSDLoc(); OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } } } // Finish up input operands. Set the input chain and add the flag last. AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can // contain multiple different value types. The preg or vreg allocated may // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && ResultType.isInteger() && Val.getValueType().isInteger()) { // If a result value was tied to an input value, the computed result may // have a wider width than the expected result. Extract the relevant // portion. Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); } assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); } setValue(CS.getInstruction(), Val); // Don't need to use this as a chain in this case. if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) return; } std::vector > StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } // Emit the non-flagged stores from the physregs. SmallVector OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), MachinePointerInfo(StoresToEmit[i].second)); OutChains.push_back(Val); } if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); DAG.setRoot(Chain); } void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, const Twine &Message) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), Message); // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); setValue(CS.getInstruction(), DAG.getUNDEF(VT)); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG.getSrcValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(1)))); } SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { const MDNode *Range = I.getMetadata(LLVMContext::MD_range); if (!Range) return Op; ConstantRange CR = getConstantRangeFromMetadata(*Range); if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet()) return Op; APInt Lo = CR.getUnsignedMin(); if (!Lo.isMinValue()) return Op; APInt Hi = CR.getUnsignedMax(); unsigned Bits = Hi.getActiveBits(); EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); SDLoc SL = getCurSDLoc(); SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, DAG.getValueType(SmallVT)); unsigned NumVals = Op.getNode()->getNumValues(); if (NumVals == 1) return ZExt; SmallVector Ops; Ops.push_back(ZExt); for (unsigned I = 1; I != NumVals; ++I) Ops.push_back(Op.getValue(I)); return DAG.getMergeValues(Ops, SL); } /// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; ArgI != ArgE; ++ArgI) { const Value *V = CS->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); Entry.setAttributes(&CS, AttrI); Args.push_back(Entry); } CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) .setDiscardResult(CS->use_empty()) .setIsPatchPoint(IsPatchPoint); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap /// or patchpoint target node's operand list. /// /// Constants are converted to TargetConstants purely as an optimization to /// avoid constant materialization and register allocation. /// /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not /// generate addess computation nodes, and so ExpandISelPseudo can convert the /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids /// address materialization and register allocation, but may also be required /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an /// alloca in the entry block, then the runtime may assume that the alloca's /// StackMap location can be read immediately after compilation and that the /// location is valid at any point during execution (this is similar to the /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); Ops.push_back( Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } } /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 , i32 , // [live variables...]) assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); SDValue Chain, InFlag, Callee, NullPtr; SmallVector Ops; SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledValue()); NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguemnts // passed to it) and emits NOPS (if requested). Unlike the patchpoint // intrinsic, this won't be lowered to a function call. This means we don't // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // // chain, flag = CALLSEQ_START(chain, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); InFlag = Chain.getValue(1); // Add the and constants. SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast(IDVal)->getZExtValue(), DL, MVT::i64)); SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast(NBytesVal)->getZExtValue(), DL, MVT::i32)); // Push live variables for the stack map. addStackMapLiveVars(&CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. // Push the chain and the glue flag. Ops.push_back(Chain); Ops.push_back(InFlag); // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); Chain = SDValue(SM, 0); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); // Stackmaps don't generate values, so nothing goes into the NodeMap. // Set the root to the target-lowered call chain. DAG.setRoot(Chain); // Inform the Frame Information that we have a stackmap in this function. FuncInfo.MF->getFrameInfo().setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , // i32 , // [Args...], // [live variables...]) CallingConv::ID CC = CS.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CS->getType()->isVoidTy(); SDLoc dl = getCurSDLoc(); SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); // Handle immediate and symbolic callees. if (auto* ConstCallee = dyn_cast(Callee)) Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, /*isTarget=*/true); else if (auto* SymbolicCallee = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee), SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); unsigned NumArgs = cast(NArgVal)->getZExtValue(); // Skip the four meta args: , , , // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; assert(CS.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); std::pair Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); /// Get a call instruction from the call sequence chain. /// Tail calls are not allowed. assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "Expected a callseq node."); SDNode *Call = CallEnd->getOperand(0).getNode(); bool HasGlue = Call->getGluedNode(); // Replace the target specific call node with the patchable intrinsic. SmallVector Ops; // Add the and constants. SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast(IDVal)->getZExtValue(), dl, MVT::i64)); SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast(NBytesVal)->getZExtValue(), dl, MVT::i32)); // Add the callee. Ops.push_back(Callee); // Adjust to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CS.getArgument(i))); // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) Ops.push_back(*(Call->op_end()-2)); else Ops.push_back(*(Call->op_end()-1)); // Push the chain (this is originally the first operand of the call, but // becomes now the last or second to last operand). Ops.push_back(*(Call->op_begin())); // Push the glue flag (last operand). if (HasGlue) Ops.push_back(*(Call->op_end()-1)); SDVTList NodeTys; if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end ValueVTs.push_back(MVT::Other); ValueVTs.push_back(MVT::Glue); NodeTys = DAG.getVTList(ValueVTs); } else NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) setValue(CS.getInstruction(), SDValue(MN, 0)); else setValue(CS.getInstruction(), Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the // call sequence. Furthermore the location of the chain and glue can change // when the AnyReg calling convention is used and the intrinsic returns a // value. if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } /// Returns an AttributeSet representing the attributes applied to the return /// value of the given call. static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { SmallVector Attrs; if (CLI.RetSExt) Attrs.push_back(Attribute::SExt); if (CLI.RetZExt) Attrs.push_back(Attribute::ZExt); if (CLI.IsInReg) Attrs.push_back(Attribute::InReg); return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; SmallVector Offsets; auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); SmallVector Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), CLI.IsVarArg, Outs, CLI.RetTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { // FIXME: equivalent assert? // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; Entry.isZExt = false; Entry.isInReg = false; Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; Entry.isReturned = false; Entry.isSwiftSelf = false; Entry.isSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument // points into the callers stack frame. CLI.IsTailCall = false; } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); if (CLI.RetZExt) MyFlags.Flags.setZExt(); if (CLI.IsInReg) MyFlags.Flags.setInReg(); CLI.Ins.push_back(MyFlags); } } } // We push in swifterror return as the last element of CLI.Ins. ArgListTy &Args = CLI.getArgs(); if (supportSwiftError()) { for (unsigned i = 0, e = Args.size(); i != e; ++i) { if (Args[i].isSwiftError) { ISD::InputArg MyFlags; MyFlags.VT = getPointerTy(DL); MyFlags.ArgVT = EVT(getPointerTy(DL)); MyFlags.Flags.setSwiftError(); CLI.Ins.push_back(MyFlags); } } } // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); Type *FinalType = Args[i].Ty; if (Args[i].isByVal) FinalType = cast(Args[i].Ty)->getElementType(); bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); if (Args[i].isSExt) Flags.setSExt(); if (Args[i].isInReg) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (CLI.CallConv == CallingConv::X86_VectorCall && isa(FinalType)) { // The first value of a structure is marked if (0 == Value) Flags.setHvaStart(); Flags.setHva(); } // Set InReg Flag Flags.setInReg(); } if (Args[i].isSRet) Flags.setSRet(); if (Args[i].isSwiftSelf) Flags.setSwiftSelf(); if (Args[i].isSwiftError) Flags.setSwiftError(); if (Args[i].isByVal) Flags.setByVal(); if (Args[i].isInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated // and how many bytes a callee cleanup function will pop. If we port // inalloca to more targets, we'll have to add custom inalloca handling // in the various CC lowering callbacks. Flags.setByVal(); } if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (Args[i].isNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (Args[i].isSExt) ExtendKind = ISD::SIGN_EXTEND; else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors for now if (Args[i].isReturned && !Op.getValueType().isVector()) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these // cases it's safe to pass the argument register value unchanged as the // return register value (although it's at the target's option whether // to do so) // TODO: allow code generation to take advantage of partially preserved // registers rather than clobbering the entire register when the // parameter extension method is not compatible with the return // extension method if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) Flags.setReturned(); } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { MyFlags.Flags.setOrigAlign(1); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } if (NeedsRegBlock && Value == NumValues - 1) CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); } } SmallVector InVals; CLI.Chain = LowerCall(CLI, InVals); // Update CLI.InVals to use outside of this function. CLI.InVals = InVals; // Verify that the target's LowerCall behaved as expected. assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); assert((!CLI.IsTailCall || InVals.empty()) && "LowerCall emitted a return value for a tail call!"); assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && "LowerCall didn't emit the correct number of values!"); // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions // should be processed in the current block. if (CLI.IsTailCall) { CLI.DAG.setRoot(CLI.Chain); return std::make_pair(SDValue(), SDValue()); } #ifndef NDEBUG for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); } #endif SmallVector ReturnValues; if (!CanLowerReturn) { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; unsigned NumValues = RetTys.size(); ReturnValues.resize(NumValues); SmallVector Chains(NumValues); // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, PtrVT), &Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), /* Alignment = */ 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. Optional AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) AssertOp = ISD::AssertZext; unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, AssertOp)); CurReg += NumRegs; } // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), CLI.Chain); } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, CLI.DAG.getVTList(RetTys), ReturnValues); return std::make_pair(Res, CLI.Chain); } void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) Results.push_back(Res); } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); } void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == FuncInfo.PreferredExtendType.end()) ? ISD::ANY_EXTEND : FuncInfo.PreferredExtendType[V]; RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } #include "llvm/CodeGen/SelectionDAGISel.h" /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. if (FastISel) return A->use_empty(); const BasicBlock &Entry = A->getParent()->front(); for (const User *U : A->users()) if (cast(U)->getParent() != &Entry || isa(U)) return false; // Use not in entry block. return true; } void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const DataLayout &DL = DAG.getDataLayout(); SmallVector Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } // Set up the incoming argument description vector. unsigned Idx = 1; for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; Type *FinalType = I->getType(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) FinalType = cast(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) Flags.setSExt(); if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && isa(I->getType())) { // The first value of a structure is marked if (0 == Value) Flags.setHvaStart(); Flags.setHva(); } // Set InReg Flag Flags.setInReg(); } if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) Flags.setSwiftSelf(); if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) Flags.setSwiftError(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated // and how many bytes a callee cleanup function will pop. If we port // inalloca to more targets, we'll have to add custom inalloca handling // in the various CC lowering callbacks. Flags.setByVal(); } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. if (Idx == 1) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { MyFlags.Flags.setOrigAlign(1); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); PartBase += VT.getStoreSize(); } } // Call the target to set up the argument values. SmallVector InVals; SDValue NewRoot = TLI->LowerFormalArguments( DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && "LowerFormalArguments didn't return a valid chain!"); assert(InVals.size() == Ins.size() && "LowerFormalArguments didn't emit the correct number of values!"); DEBUG({ for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); } }); // Update the DAG with the new chain value resulting from argument lowering. DAG.setRoot(NewRoot); // Set up the argument values. unsigned i = 0; Idx = 1; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional AssertOp = None; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. // Idx indexes LLVM arguments. Don't touch it. ++i; } for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ArgValues; SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); if (I->use_empty() && NumValues && !isSwiftErrorArg) { SDB->setUnusedArgValue(&*I, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast(InVals[i].getNode())) FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. if (!I->use_empty() || isSwiftErrorArg) { Optional AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, nullptr, AssertOp)); } i += NumParts; } // We don't need to do anything else for unused arguments. if (ArgValues.empty()) continue; // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(&*I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast(LNode->getBasePtr().getNode())) FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, FuncInfo->SwiftErrorArg, Reg); } // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. It's also subtly incompatible with the hacks FastISel // uses with vregs. unsigned Reg = cast(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&*I] = Reg; continue; } } if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(&*I); SDB->CopyToExportRegsIfNeeded(&*I); } } assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to /// ensure constants are generated when needed. Remember the virtual registers /// that need to be added to the Machine PHI nodes as input. We cannot just /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. /// void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; // Check PHI nodes in successors that expect a value to be available from this // block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB).second) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); const PHINode *PN = dyn_cast(I); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; // Skip empty types if (PN->getType()->isEmptyTy()) continue; unsigned Reg; const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); if (const Constant *C = dyn_cast(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C->getType()); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { DenseMap::iterator I = FuncInfo.ValueMap.find(PHIOp); if (I != FuncInfo.ValueMap.end()) Reg = I->second; else { assert(isa(PHIOp) && FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); Reg = FuncInfo.CreateRegs(PHIOp->getType()); CopyValueToVirtualRegister(PHIOp, Reg); } } // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back( std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; } } } ConstantsOut.clear(); } /// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB /// is 0. MachineBasicBlock * SelectionDAGBuilder::StackProtectorDescriptor:: AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely, MachineBasicBlock *SuccMBB) { // If SuccBB has not been created yet, create it. if (!SuccMBB) { MachineFunction *MF = ParentMBB->getParent(); MachineFunction::iterator BBI(ParentMBB); SuccMBB = MF->CreateMachineBasicBlock(BB); MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. ParentMBB->addSuccessor( SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); return SuccMBB; } MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) return nullptr; return &*I; } /// During lowering new call nodes can be created (such as memset, etc.). /// Those will become new roots of the current DAG, but complications arise /// when they are tail calls. In such cases, the call lowering will update /// the root, but the builder still needs to know that a tail call has been /// lowered in order to avoid generating an additional return. void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { // If the node is null, we do have a tail call. if (MaybeTC.getNode() != nullptr) DAG.setRoot(MaybeTC); else HasTailCall = true; } bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, const SmallVectorImpl &TotalCases, unsigned First, unsigned Last, unsigned Density) const { assert(Last >= First); assert(TotalCases[Last] >= TotalCases[First]); const APInt &LowCase = Clusters[First].Low->getValue(); const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); // FIXME: A range of consecutive cases has 100% density, but only requires one // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); uint64_t Range = Diff + 1; uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); return NumCases * 100 >= Range * Density; } static inline bool areJTsAllowed(const TargetLowering &TLI, const SwitchInst *SI) { const Function *Fn = SI->getParent()->getParent(); if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") return false; return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster) { assert(First <= Last); auto Prob = BranchProbability::getZero(); unsigned NumCmps = 0; std::vector Table; DenseMap JTProbs; // Initialize probabilities in JTProbs. for (unsigned I = First; I <= Last; ++I) JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Prob += Clusters[I].Prob; const APInt &Low = Clusters[I].Low->getValue(); const APInt &High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; if (I != First) { // Fill the gap between this and the previous cluster. const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); assert(PreviousHigh.slt(Low)); uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; for (uint64_t J = 0; J < Gap; J++) Table.push_back(DefaultMBB); } uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; for (uint64_t J = 0; J < ClusterSize; ++J) Table.push_back(Clusters[I].MBB); JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } unsigned NumDests = JTProbs.size(); if (isSuitableForBitTests(NumDests, NumCmps, Clusters[First].Low->getValue(), Clusters[Last].High->getValue())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } // Create the MBB that will load from and jump through the table. // Note: We create it here, but it's not inserted into the function yet. MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *JumpTableMBB = CurMF->CreateMachineBasicBlock(SI->getParent()); // Add successors. Note: use table order for determinism. SmallPtrSet Done; for (MachineBasicBlock *Succ : Table) { if (Done.count(Succ)) continue; addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); Done.insert(Succ); } JumpTableMBB->normalizeSuccProbs(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); // Set up the jump table info. JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); JumpTableHeader JTH(Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), SI->getCondition(), nullptr, false); JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, JTCases.size() - 1, Prob); return true; } void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, MachineBasicBlock *DefaultMBB) { #ifndef NDEBUG // Clusters must be non-empty, sorted, and only contain Range clusters. assert(!Clusters.empty()); for (CaseCluster &C : Clusters) assert(C.Kind == CC_Range); for (unsigned i = 1, e = Clusters.size(); i < e; ++i) assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!areJTsAllowed(TLI, SI)) return; const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); const int64_t N = Clusters.size(); const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; const unsigned MaxJumpTableSize = OptForSize || TLI.getMaximumJumpTableSize() == 0 ? UINT_MAX : TLI.getMaximumJumpTableSize(); if (N < 2 || N < MinJumpTableEntries) return; // TotalCases[i]: Total nbr of cases in Clusters[0..i]. SmallVector TotalCases(N); for (unsigned i = 0; i < N; ++i) { const APInt &Hi = Clusters[i].High->getValue(); const APInt &Lo = Clusters[i].Low->getValue(); TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; if (i != 0) TotalCases[i] += TotalCases[i - 1]; } const unsigned MinDensity = OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; // Cheap case: the whole range may be suitable for jump table. unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - Clusters[0].Low->getValue()) .getLimitedValue(UINT_MAX - 1) + 1; if (JumpTableSize <= MaxJumpTableSize && isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; Clusters.resize(1); return; } } // The algorithm below is not suitable for -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; // Split Clusters into minimum number of dense partitions. The algorithm uses // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code // for the Case Statement'" (1994), but builds the MinPartitions array in // reverse order to make it easier to reconstruct the partitions in ascending // order. In the choice between two optimal partitionings, it picks the one // which yields more jump tables. // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. SmallVector MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector LastElement(N); // PartitionsScore[i] is used to break ties when choosing between two // partitionings resulting in the same number of partitions. SmallVector PartitionsScore(N); // For PartitionsScore, a small number of comparisons is considered as good as // a jump table and a single comparison is considered better than a jump // table. enum PartitionScores : unsigned { NoTable = 0, Table = 1, FewCases = 1, SingleCase = 2 }; // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; PartitionsScore[N - 1] = PartitionScores::SingleCase; // Note: loop indexes are signed to avoid underflow. for (int64_t i = N - 2; i >= 0; i--) { // Find optimal partitioning of Clusters[i..N-1]. // Baseline: Put Clusters[i] into a partition on its own. MinPartitions[i] = MinPartitions[i + 1] + 1; LastElement[i] = i; PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase; // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. JumpTableSize = (Clusters[j].High->getValue() - Clusters[i].Low->getValue()) .getLimitedValue(UINT_MAX - 1) + 1; if (JumpTableSize <= MaxJumpTableSize && isDense(Clusters, TotalCases, i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; if (NumEntries == 1) Score += PartitionScores::SingleCase; else if (NumEntries <= SmallNumberOfEntries) Score += PartitionScores::FewCases; else if (NumEntries >= MinJumpTableEntries) Score += PartitionScores::Table; // If this leads to fewer partitions, or to the same number of // partitions with better score, it is a better partitioning. if (NumPartitions < MinPartitions[i] || (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) { MinPartitions[i] = NumPartitions; LastElement[i] = j; PartitionsScore[i] = Score; } } } } // Iterate over the partitions, replacing some with jump tables in-place. unsigned DstIndex = 0; for (unsigned First = 0, Last; First < N; First = Last + 1) { Last = LastElement[First]; assert(Last >= First); assert(DstIndex <= First); unsigned NumClusters = Last - First + 1; CaseCluster JTCluster; if (NumClusters >= MinJumpTableEntries && buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { Clusters[DstIndex++] = JTCluster; } else { for (unsigned I = First; I <= Last; ++I) std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); } } Clusters.resize(DstIndex); } bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { // FIXME: Using the pointer type doesn't seem ideal. uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High) { // FIXME: I don't think NumCmps is the correct metric: a single case and a // range of cases both require only one branch to lower. Just looking at the // number of clusters and destinations should be enough to decide whether to // build bit tests. // To lower a range with bit tests, the range must fit the bitwidth of a // machine word. if (!rangeFitsInWord(Low, High)) return false; // Decide whether it's profitable to lower this range with bit tests. Each // destination requires a bit test and branch, and there is an overall range // check branch. For a small number of clusters, separate comparisons might be // cheaper, and for many destinations, splitting the range might be better. return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || (NumDests == 3 && NumCmps >= 6); } bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, CaseCluster &BTCluster) { assert(First <= Last); if (First == Last) return false; BitVector Dests(FuncInfo.MF->getNumBlockIDs()); unsigned NumCmps = 0; for (int64_t I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Dests.set(Clusters[I].MBB->getNumber()); NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; } unsigned NumDests = Dests.count(); APInt Low = Clusters[First].Low->getValue(); APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) return false; APInt LowBound; APInt CmpRange; const int BitWidth = DAG.getTargetLoweringInfo() .getPointerTy(DAG.getDataLayout()) .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. bool ContiguousRange = true; for (int64_t I = First + 1; I <= Last; ++I) { if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { ContiguousRange = false; break; } } if (Low.isStrictlyPositive() && High.slt(BitWidth)) { // Optimize the case where all the case values fit in a word without having // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; } CaseBitsVector CBV; auto TotalProb = BranchProbability::getZero(); for (unsigned i = First; i <= Last; ++i) { // Find the CaseBits for this destination. unsigned j; for (j = 0; j < CBV.size(); ++j) if (CBV[j].BB == Clusters[i].MBB) break; if (j == CBV.size()) CBV.push_back( CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); CaseBits *CB = &CBV[j]; // Update Mask, Bits and ExtraProb. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; CB->Bits += Hi - Lo + 1; CB->ExtraProb += Clusters[i].Prob; TotalProb += Clusters[i].Prob; } BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { // Sort by probability first, number of bits second. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; return a.Bits > b.Bits; }); for (auto &CB : CBV) { MachineBasicBlock *BitTestBB = FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), SI->getCondition(), -1U, MVT::Other, false, ContiguousRange, nullptr, nullptr, std::move(BTI), TotalProb); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalProb); return true; } void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI) { // Partition Clusters into as few subsets as possible, where each subset has a // range that fits in a machine word and has <= 3 unique destinations. #ifndef NDEBUG // Clusters must be sorted and contain Range or JumpTable clusters. assert(!Clusters.empty()); assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); for (const CaseCluster &C : Clusters) assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); for (unsigned i = 1; i < Clusters.size(); ++i) assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); #endif // The algorithm below is not suitable for -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; int BitWidth = PTy.getSizeInBits(); const int64_t N = Clusters.size(); // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. SmallVector MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector LastElement(N); // FIXME: This might not be the best algorithm for finding bit test clusters. // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; // Note: loop indexes are signed to avoid underflow. for (int64_t i = N - 2; i >= 0; --i) { // Find optimal partitioning of Clusters[i..N-1]. // Baseline: Put Clusters[i] into a partition on its own. MinPartitions[i] = MinPartitions[i + 1] + 1; LastElement[i] = i; // Search for a solution that results in fewer partitions. // Note: the search is limited by BitWidth, reducing time complexity. for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { // Try building a partition from Clusters[i..j]. // Check the range. if (!rangeFitsInWord(Clusters[i].Low->getValue(), Clusters[j].High->getValue())) continue; // Check nbr of destinations and cluster types. // FIXME: This works, but doesn't seem very efficient. bool RangesOnly = true; BitVector Dests(FuncInfo.MF->getNumBlockIDs()); for (int64_t k = i; k <= j; k++) { if (Clusters[k].Kind != CC_Range) { RangesOnly = false; break; } Dests.set(Clusters[k].MBB->getNumber()); } if (!RangesOnly || Dests.count() > 3) break; // Check if it's a better partition. unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); if (NumPartitions < MinPartitions[i]) { // Found a better partition. MinPartitions[i] = NumPartitions; LastElement[i] = j; } } } // Iterate over the partitions, replacing with bit-test clusters in-place. unsigned DstIndex = 0; for (unsigned First = 0, Last; First < N; First = Last + 1) { Last = LastElement[First]; assert(First <= Last); assert(DstIndex <= First); CaseCluster BitTestCluster; if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { Clusters[DstIndex++] = BitTestCluster; } else { size_t NumClusters = Last - First + 1; std::memmove(&Clusters[DstIndex], &Clusters[First], sizeof(Clusters[0]) * NumClusters); DstIndex += NumClusters; } } Clusters.resize(DstIndex); } void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI(W.MBB); if (++BBI != FuncInfo.MF->end()) NextMBB = &*BBI; unsigned Size = W.LastCluster - W.FirstCluster + 1; BranchProbabilityInfo *BPI = FuncInfo.BPI; if (Size == 2 && W.MBB == SwitchMBB) { // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" // TODO: This could be extended to merge any 2 cases in switches with 3 // cases. // TODO: Handle cases where W.CaseBB != SwitchBB. CaseCluster &Small = *W.FirstCluster; CaseCluster &Big = *W.LastCluster; if (Small.Low == Small.High && Big.Low == Big.High && Small.MBB == Big.MBB) { const APInt &SmallValue = Small.Low->getValue(); const APInt &BigValue = Big.Low->getValue(); // Check that there is only one bit different. APInt CommonBit = BigValue ^ SmallValue; if (CommonBit.isPowerOf2()) { SDValue CondLHS = getValue(Cond); EVT VT = CondLHS.getValueType(); SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, DL, VT)); SDValue Cond = DAG.getSetCC( DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the // probabilities. addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); if (BPI) addSuccessorWithProb( SwitchMBB, DefaultMBB, // The default destination is the first successor in IR. BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); else addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(Small.MBB)); // Insert the false branch. BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, DAG.getBasicBlock(DefaultMBB)); DAG.setRoot(BrCond); return; } } } if (TM.getOptLevel() != CodeGenOpt::None) { // Order cases by probability so the most likely case will be checked first. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { return a.Prob > b.Prob; }); // Rearrange the case blocks so that the last one falls through if possible // without without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; if (I->Prob > W.LastCluster->Prob) break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); break; } } } // Compute total probability. BranchProbability DefaultProb = W.DefaultProb; BranchProbability UnhandledProbs = DefaultProb; for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) UnhandledProbs += I->Prob; MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { MachineBasicBlock *Fallthrough; if (I == W.LastCluster) { // For the last cluster, fall through to the default destination. Fallthrough = DefaultMBB; } else { Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); CurMF->insert(BBI, Fallthrough); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } UnhandledProbs -= I->Prob; switch (I->Kind) { case CC_JumpTable: { // FIXME: Optimize away range check based on pivot comparisons. JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; JumpTable *JT = &JTCases[I->JTCasesIndex].second; // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); auto JumpProb = I->Prob; auto FallthroughProb = UnhandledProbs; // If the default statement is a target of the jump table, we evenly // distribute the default probability to successors of CurMBB. Also // update the probability on the edge from JumpMBB to Fallthrough. for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), SE = JumpMBB->succ_end(); SI != SE; ++SI) { if (*SI == DefaultMBB) { JumpProb += DefaultProb / 2; FallthroughProb -= DefaultProb / 2; JumpMBB->setSuccProbability(SI, DefaultProb / 2); JumpMBB->normalizeSuccProbs(); break; } } addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. JTH->HeaderBB = CurMBB; JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. // If we're in the right place, emit the jump table header right now. if (CurMBB == SwitchMBB) { visitJumpTableHeader(*JT, *JTH, SwitchMBB); JTH->Emitted = true; } break; } case CC_BitTests: { // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; // The bit test blocks haven't been inserted yet; insert them here. for (BitTestCase &BTC : BTB->Cases) CurMF->insert(BBI, BTC.ThisBB); // Fill in fields of the BitTestBlock. BTB->Parent = CurMBB; BTB->Default = Fallthrough; BTB->DefaultProb = UnhandledProbs; // If the cases in bit test don't form a contiguous range, we evenly // distribute the probability on the edge to Fallthrough to two // successors of CurMBB. if (!BTB->ContiguousRange) { BTB->Prob += DefaultProb / 2; BTB->DefaultProb -= DefaultProb / 2; } // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } break; } case CC_Range: { const Value *RHS, *LHS, *MHS; ISD::CondCode CC; if (I->Low == I->High) { // Check Cond == I->Low. CC = ISD::SETEQ; LHS = Cond; RHS=I->Low; MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. CC = ISD::SETLE; LHS = I->Low; MHS = Cond; RHS = I->High; } // The false probability is the sum of all unhandled cases. CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else SwitchCases.push_back(CB); break; } } CurMBB = Fallthrough; } } unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, CaseClusterIt First, CaseClusterIt Last) { return std::count_if(First, Last + 1, [&](const CaseCluster &X) { if (X.Prob != CC.Prob) return X.Prob > CC.Prob; // Ties are broken by comparing the case value. return X.Low->getValue().slt(CC.Low->getValue()); }); } void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, MachineBasicBlock *SwitchMBB) { assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); // Balance the tree based on branch probabilities to create a near-optimal (in // terms of search time given key frequency) binary search tree. See e.g. Kurt // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; auto RightProb = FirstRight->Prob + W.DefaultProb / 2; // Move LastLeft and FirstRight towards each other from opposite directions to // find a partitioning of the clusters which balances the probability on both // sides. If LeftProb and RightProb are equal, alternate which side is // taken to ensure 0-probability nodes are distributed evenly. unsigned I = 0; while (LastLeft + 1 < FirstRight) { if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) LeftProb += (++LastLeft)->Prob; else RightProb += (--FirstRight)->Prob; I++; } for (;;) { // Our binary search tree differs from a typical BST in that ours can have up // to three values in each leaf. The pivot selection above doesn't take that // into account, which means the tree might require more nodes and be less // efficient. We compensate for this here. unsigned NumLeft = LastLeft - W.FirstCluster + 1; unsigned NumRight = W.LastCluster - FirstRight + 1; if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { // If one side has less than 3 clusters, and the other has more than 3, // consider taking a cluster from the other side. if (NumLeft < NumRight) { // Consider moving the first cluster on the right to the left side. CaseCluster &CC = *FirstRight; unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); if (LeftSideRank <= RightSideRank) { // Moving the cluster to the left does not demote it. ++LastLeft; ++FirstRight; continue; } } else { assert(NumRight < NumLeft); // Consider moving the last element on the left to the right side. CaseCluster &CC = *LastLeft; unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); if (RightSideRank <= LeftSideRank) { // Moving the cluster to the right does not demot it. --LastLeft; --FirstRight; continue; } } } break; } assert(LastLeft + 1 == FirstRight); assert(LastLeft >= W.FirstCluster); assert(FirstRight <= W.LastCluster); // Use the first element on the right as pivot since we will make less-than // comparisons against it. CaseClusterIt PivotCluster = FirstRight; assert(PivotCluster > W.FirstCluster); assert(PivotCluster <= W.LastCluster); CaseClusterIt FirstLeft = W.FirstCluster; CaseClusterIt LastRight = W.LastCluster; const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. MachineFunction::iterator BBI(W.MBB); ++BBI; // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, // we can branch to its destination directly if it's squeezed exactly in // between the known lower bound and Pivot - 1. MachineBasicBlock *LeftMBB; if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && FirstLeft->Low == W.GE && (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { LeftMBB = FirstLeft->MBB; } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); WorkList.push_back( {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a // single cluster, RHS.Low == Pivot, and we can branch to its destination // directly if RHS.High equals the current upper bound. MachineBasicBlock *RightMBB; if (FirstRight == LastRight && FirstRight->Kind == CC_Range && W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { RightMBB = FirstRight->MBB; } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); WorkList.push_back( {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else SwitchCases.push_back(CB); } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; CaseClusterVector Clusters; Clusters.reserve(SI.getNumCases()); for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); BranchProbability Prob = BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) : BranchProbability(1, SI.getNumCases() + 1); Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; // Cluster adjacent cases with the same destination. We do this at all // optimization levels because it's cheap to do and will make codegen faster // if there are many clusters. sortAndRangeify(Clusters); if (TM.getOptLevel() != CodeGenOpt::None) { // Replace an unreachable default with the most popular destination. // FIXME: Exploit unreachable default more aggressively. bool UnreachableDefault = isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()); if (UnreachableDefault && !Clusters.empty()) { DenseMap Popularity; unsigned MaxPop = 0; const BasicBlock *MaxBB = nullptr; for (auto I : SI.cases()) { const BasicBlock *BB = I.getCaseSuccessor(); if (++Popularity[BB] > MaxPop) { MaxPop = Popularity[BB]; MaxBB = BB; } } // Set new default. assert(MaxPop > 0 && MaxBB); DefaultMBB = FuncInfo.MBBMap[MaxBB]; // Remove cases that were pointing to the destination that is now the // default. CaseClusterVector New; New.reserve(Clusters.size()); for (CaseCluster &CC : Clusters) { if (CC.MBB != DefaultMBB) New.push_back(CC); } Clusters = std::move(New); } } // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(DefaultMBB))); } return; } findJumpTables(Clusters, &SI, DefaultMBB); findBitTestClusters(Clusters, &SI); DEBUG({ dbgs() << "Case clusters: "; for (const CaseCluster &C : Clusters) { if (C.Kind == CC_JumpTable) dbgs() << "JT:"; if (C.Kind == CC_BitTests) dbgs() << "BT:"; C.Low->getValue().print(dbgs(), true); if (C.Low != C.High) { dbgs() << '-'; C.High->getValue().print(dbgs(), true); } dbgs() << ' '; } dbgs() << '\n'; }); assert(!Clusters.empty()); SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); WorkList.pop_back(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && !DefaultMBB->getParent()->getFunction()->optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; } lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); } } Index: projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (revision 313894) @@ -1,3679 +1,3683 @@ //===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements the SelectionDAGISel class. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include using namespace llvm; #define DEBUG_TYPE "isel" STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); STATISTIC(NumFastIselFailLowerArguments, "Number of entry blocks where fast isel failed to lower arguments"); #ifndef NDEBUG static cl::opt EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, cl::desc("Enable extra verbose messages in the \"fast\" " "instruction selector")); // Terminators STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); // Standard binary operators... STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); // Logical operators... STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); // Memory instructions... STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); // Convert instructions... STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); // Other instructions... STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); // Intrinsic instructions... STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call"); STATISTIC(NumFastIselFailSAddWithOverflow, "Fast isel fails on sadd.with.overflow"); STATISTIC(NumFastIselFailUAddWithOverflow, "Fast isel fails on uadd.with.overflow"); STATISTIC(NumFastIselFailSSubWithOverflow, "Fast isel fails on ssub.with.overflow"); STATISTIC(NumFastIselFailUSubWithOverflow, "Fast isel fails on usub.with.overflow"); STATISTIC(NumFastIselFailSMulWithOverflow, "Fast isel fails on smul.with.overflow"); STATISTIC(NumFastIselFailUMulWithOverflow, "Fast isel fails on umul.with.overflow"); STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress"); STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call"); STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call"); STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call"); #endif static cl::opt EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); static cl::opt EnableFastISelAbort( "fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction selection " "fails to lower an instruction: 0 disable the abort, 1 will " "abort but for args, calls and terminators, 2 will also " "abort for argument lowering, and 3 will never fallback " "to SelectionDAG.")); static cl::opt UseMBPI("use-mbpi", cl::desc("use Machine Branch Probability Info"), cl::init(true), cl::Hidden); #ifndef NDEBUG static cl::opt FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, cl::desc("Only display the basic block whose name " "matches this for all view-*-dags options")); static cl::opt ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the first " "dag combine pass")); static cl::opt ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, cl::desc("Pop up a window to show dags before legalize types")); static cl::opt ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, cl::desc("Pop up a window to show dags before legalize")); static cl::opt ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the second " "dag combine pass")); static cl::opt ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the post legalize types" " dag combine pass")); static cl::opt ViewISelDAGs("view-isel-dags", cl::Hidden, cl::desc("Pop up a window to show isel dags as they are selected")); static cl::opt ViewSchedDAGs("view-sched-dags", cl::Hidden, cl::desc("Pop up a window to show sched dags as they are processed")); static cl::opt ViewSUnitDAGs("view-sunit-dags", cl::Hidden, cl::desc("Pop up a window to show SUnit dags after they are processed")); #else static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, ViewDAGCombine2 = false, ViewDAGCombineLT = false, ViewISelDAGs = false, ViewSchedDAGs = false, ViewSUnitDAGs = false; #endif //===---------------------------------------------------------------------===// /// /// RegisterScheduler class - Track the registration of instruction schedulers. /// //===---------------------------------------------------------------------===// MachinePassRegistry RegisterScheduler::Registry; //===---------------------------------------------------------------------===// /// /// ISHeuristic command line option for instruction schedulers. /// //===---------------------------------------------------------------------===// static cl::opt > ISHeuristic("pre-RA-sched", cl::init(&createDefaultScheduler), cl::Hidden, cl::desc("Instruction schedulers available (before register" " allocation):")); static RegisterScheduler defaultListDAGScheduler("default", "Best scheduler for the target", createDefaultScheduler); namespace llvm { //===--------------------------------------------------------------------===// /// \brief This class is used by SelectionDAGISel to temporarily override /// the optimization level on a per-function basis. class OptLevelChanger { SelectionDAGISel &IS; CodeGenOpt::Level SavedOptLevel; bool SavedFastISel; public: OptLevelChanger(SelectionDAGISel &ISel, CodeGenOpt::Level NewOptLevel) : IS(ISel) { SavedOptLevel = IS.OptLevel; if (NewOptLevel == SavedOptLevel) return; IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); DEBUG(dbgs() << "\nChanging optimization level for Function " << IS.MF->getFunction()->getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == CodeGenOpt::None) { IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); DEBUG(dbgs() << "\tFastISel is " << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") << "\n"); } } ~OptLevelChanger() { if (IS.OptLevel == SavedOptLevel) return; DEBUG(dbgs() << "\nRestoring optimization level for Function " << IS.MF->getFunction()->getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" << SavedOptLevel << "\n"); IS.OptLevel = SavedOptLevel; IS.TM.setOptLevel(SavedOptLevel); IS.TM.setFastISel(SavedFastISel); } }; //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); // Try first to see if the Target has its own way of selecting a scheduler if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) { return SchedulerCtor(IS, OptLevel); } if (OptLevel == CodeGenOpt::None || (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::VLIW) return createVLIWDAGScheduler(IS, OptLevel); assert(TLI->getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } } // end namespace llvm // EmitInstrWithCustomInserter - This method should be implemented by targets // that mark instructions with the 'usesCustomInserter' flag. These // instructions are special in various ways, which require special support to // insert. The specified MachineInstr is created but not inserted into any // basic blocks, and this method is called to expand it into a sequence of // instructions, potentially also creating new basic blocks and control flow. // When new basic blocks are inserted and the edges from MBB to its successors // are modified, the method should insert pairs of into the // DenseMap. MachineBasicBlock * TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { #ifndef NDEBUG dbgs() << "If a target marks an instruction with " "'usesCustomInserter', it must implement " "TargetLowering::EmitInstrWithCustomInserter!"; #endif llvm_unreachable(nullptr); } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { assert(!MI.hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } //===----------------------------------------------------------------------===// // SelectionDAGISel code //===----------------------------------------------------------------------===// SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoWrapperPassPass( *PassRegistry::getPassRegistry()); initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); initializeTargetLibraryInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { delete SDB; delete CurDAG; delete FuncInfo; } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addRequired(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't /// execute the possibly trapping instruction. /// /// This is required for correctness, so it must be done at -O0. /// static void SplitCriticalSideEffectEdges(Function &Fn) { // Loop for blocks with phi nodes. for (BasicBlock &BB : Fn) { PHINode *PN = dyn_cast(BB.begin()); if (!PN) continue; ReprocessBlock: // For each block with a PHI node, check to see if any of the input values // are potentially trapping constant expressions. Constant expressions are // the only potentially trapping value that can occur as the argument to a // PHI. for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast(PN->getIncomingValue(i)); if (!CE || !CE->canTrap()) continue; // The only case we have to worry about is when the edge is critical. // Since this block has a PHI Node, we assume it has multiple input // edges: check to see if the pred has multiple successors. BasicBlock *Pred = PN->getIncomingBlock(i); if (Pred->getTerminator()->getNumSuccessors() == 1) continue; // Okay, we have to split this edge. SplitCriticalEdge( Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); goto ReprocessBlock; } } } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If we already selected that function, we do not need to run SDISel. if (mf.getProperties().hasProperty( MachineFunctionProperties::Property::Selected)) return false; // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort > 0 requires -fast-isel"); const Function &Fn = *mf.getFunction(); MF = &mf; // Reset the target options before resetting the optimization // level below. // FIXME: This is a horrible hack and should be processed via // codegen looking at the optimization level explicitly when // it wants to look at it. TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. CodeGenOpt::Level NewOptLevel = OptLevel; if (OptLevel != CodeGenOpt::None && skipFunction(Fn)) NewOptLevel = CodeGenOpt::None; OptLevelChanger OLC(*this, NewOptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); AA = &getAnalysis().getAAResults(); LibInfo = &getAnalysis().getTLI(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast(Fn)); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis().getBPI(); else FuncInfo->BPI = nullptr; SDB->init(GFI, *AA, LibInfo); MF->setHasInlineAsm(false); FuncInfo->SplitCSR = false; // We split CSR if the target supports it for the given function // and the function has only return exits. if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { FuncInfo->SplitCSR = true; // Collect all the return blocks. for (const BasicBlock &BB : Fn) { if (!succ_empty(&BB)) continue; const TerminatorInst *Term = BB.getTerminator(); if (isa(Term) || isa(Term)) continue; // Bail out if the exit block is not Return nor Unreachable. FuncInfo->SplitCSR = false; break; } } MachineBasicBlock *EntryMBB = &MF->front(); if (FuncInfo->SplitCSR) // This performs initialization so lowering for SplitCSR will be correct. TLI->initializeSplitCSR(EntryMBB); SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); // Insert copies in the entry block and the return blocks. if (FuncInfo->SplitCSR) { SmallVector Returns; // Collect all the return blocks. for (MachineBasicBlock &MBB : mf) { if (!MBB.succ_empty()) continue; MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); if (Term != MBB.end() && Term->isReturn()) { Returns.push_back(&MBB); continue; } } TLI->insertCopiesSplitCSR(EntryMBB, Returns); } DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), E = RegInfo->livein_end(); LI != E; ++LI) if (LI->second) LiveInMap.insert(std::make_pair(LI->first, LI->second)); // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); if (Def) { MachineBasicBlock::iterator InsertPos = Def; // FIXME: VR def may not be in entry block. Def->getParent()->insert(std::next(InsertPos), MI); } else DEBUG(dbgs() << "Dropping debug info for dead vreg" << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. DenseMap::iterator LDI = LiveInMap.find(Reg); if (LDI != LiveInMap.end()) { assert(!hasFI && "There's no handling of frame pointer updating here yet " "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getDebugVariable(); const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; assert(cast(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. MachineInstr *CopyUseMI = nullptr; for (MachineRegisterInfo::use_instr_iterator UI = RegInfo->use_instr_begin(LDI->second), E = RegInfo->use_instr_end(); UI != E; ) { MachineInstr *UseMI = &*(UI++); if (UseMI->isDebugValue()) continue; if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { CopyUseMI = UseMI; continue; } // Otherwise this is another use or second copy use. CopyUseMI = nullptr; break; } if (CopyUseMI) { // Use MI's debug location, which describes where Variable was // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } } } // Determine if there are any calls in this machine function. MachineFrameInfo &MFI = MF->getFrameInfo(); for (const auto &MBB : *MF) { if (MFI.hasCalls() && MF->hasInlineAsm()) break; for (const auto &MI : MBB) { const MCInstrDesc &MCID = TII->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || MI.isStackAligningInlineAsm()) { MFI.setHasCalls(true); } if (MI.isInlineAsm()) { MF->setHasInlineAsm(true); } } } // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. MachineRegisterInfo &MRI = MF->getRegInfo(); for (DenseMap::iterator I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); I != E; ++I) { unsigned From = I->first; unsigned To = I->second; // If To is also scheduled to be replaced, find what its ultimate // replacement is. for (;;) { DenseMap::iterator J = FuncInfo->RegFixups.find(To); if (J == E) break; To = J->second; } // Make sure the new register has a sufficiently constrained register class. if (TargetRegisterInfo::isVirtualRegister(From) && TargetRegisterInfo::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. // Replacing one register with another won't touch the kill flags. // We need to conservatively clear the kill flags as a kill on the old // register might dominate existing uses of the new register. if (!MRI.use_empty(To)) MRI.clearKillFlags(From); MRI.replaceRegWith(From, To); } if (TLI->hasCopyImplyingStackAdjustment(MF)) MFI.setHasCopyImplyingStackAdjustment(true); // Freeze the set of reserved registers now that MachineFrameInfo has been // set up. All the information required by getReservedRegs() should be // available now. MRI.freezeReservedRegs(*MF); // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); DEBUG(MF->print(dbgs())); return true; } void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { // Lower the instructions. If a call is emitted as a tail call, cease emitting // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) SDB->visit(*I); // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDB->getControlRoot()); HadTailCall = SDB->HasTailCall; SDB->clear(); // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); } void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; Worklist.push_back(CurDAG->getRoot().getNode()); APInt KnownZero; APInt KnownOne; do { SDNode *N = Worklist.pop_back_val(); // If we've already seen this node, ignore it. if (!VisitedNodes.insert(N).second) continue; // Otherwise, add all chain operands to the worklist. for (const SDValue &Op : N->op_values()) if (Op.getValueType() == MVT::Other) Worklist.push_back(Op.getNode()); // If this is a CopyToReg with a vreg dest, process it. if (N->getOpcode() != ISD::CopyToReg) continue; unsigned DestReg = cast(N->getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(DestReg)) continue; // Ignore non-scalar or non-integer values. SDValue Src = N->getOperand(2); EVT SrcVT = Src.getValueType(); if (!SrcVT.isInteger() || SrcVT.isVector()) continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); CurDAG->computeKnownBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } void SelectionDAGISel::CodeGenAndEmitDAG() { StringRef GroupName = "sdag"; StringRef GroupDescription = "Instruction Selection and Scheduling"; std::string BlockName; int BlockNumber = -1; (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; #ifndef NDEBUG MatchFilterBB = (FilterDAGBasicBlockName.empty() || FilterDAGBasicBlockName == FuncInfo->MBB->getBasicBlock()->getName().str()); #endif #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) #endif { BlockNumber = FuncInfo->MBB->getNumber(); BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. { NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. if (ViewLegalizeTypesDAGs && MatchFilterBB) CurDAG->viewGraph("legalize-types input for " + BlockName); bool Changed; { NamedRegionTimer T("legalize_types", "Type Legalization", GroupName, GroupDescription, TimePassesIsEnabled); Changed = CurDAG->LegalizeTypes(); } DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); CurDAG->NewNodesMustHaveLegalTypes = true; if (Changed) { if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. { NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } { NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName, GroupDescription, TimePassesIsEnabled); Changed = CurDAG->LegalizeVectors(); } if (Changed) { { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. { NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } if (ViewLegalizeDAGs && MatchFilterBB) CurDAG->viewGraph("legalize input for " + BlockName); { NamedRegionTimer T("legalize", "DAG Legalization", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Legalize(); } DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); if (ViewISelDAGs && MatchFilterBB) CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. { NamedRegionTimer T("isel", "Instruction Selection", GroupName, GroupDescription, TimePassesIsEnabled); DoInstructionSelection(); } DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); { NamedRegionTimer T("sched", "Instruction Scheduling", GroupName, GroupDescription, TimePassesIsEnabled); Scheduler->Run(CurDAG, FuncInfo->MBB); } if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; { NamedRegionTimer T("emit", "Instruction Creation", GroupName, GroupDescription, TimePassesIsEnabled); // FuncInfo->InsertPt is passed by reference and set to the end of the // scheduled instructions. LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); } // If the block was split, make sure we update any references that are used to // update PHI nodes later on. if (FirstMBB != LastMBB) SDB->UpdateSplitBlock(FirstMBB, LastMBB); // Free the scheduler state. { NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName, GroupDescription, TimePassesIsEnabled); delete Scheduler; } // Free the SelectionDAG state, now that we're finished with it. CurDAG->clear(); } namespace { /// ISelUpdater - helper class to handle updates of the instruction selection /// graph. class ISelUpdater : public SelectionDAG::DAGUpdateListener { SelectionDAG::allnodes_iterator &ISelPosition; public: ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} /// NodeDeleted - Handle nodes deleted from the graph. If the node being /// deleted is the current ISelPosition node, update ISelPosition. /// void NodeDeleted(SDNode *N, SDNode *E) override { if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } }; } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() << " '" << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); // Select target instructions for the DAG. { // Number all nodes with a topological order and set DAGSize. DAGSize = CurDAG->AssignTopologicalOrder(); // Create a dummy node (which is not added to allnodes), that adds // a reference to the root node, preventing it from being deleted, // and tracking any changes of the root. HandleSDNode Dummy(CurDAG->getRoot()); SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); ++ISelPosition; // Make sure that ISelPosition gets properly updated when nodes are deleted // in calls made from this function. ISelUpdater ISU(*CurDAG, ISelPosition); // The AllNodes list is now topological-sorted. Visit the // nodes by starting at the end of the list (the root of the // graph) and preceding back toward the beginning (the entry // node). while (ISelPosition != CurDAG->allnodes_begin()) { SDNode *Node = &*--ISelPosition; // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, // but there are currently some corner cases that it misses. Also, this // makes it theoretically possible to disable the DAGCombiner. if (Node->use_empty()) continue; Select(Node); } CurDAG->setRoot(Dummy.getValue()); } DEBUG(dbgs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); } static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { for (const User *U : CPI->users()) { if (const IntrinsicInst *EHPtrCall = dyn_cast(U)) { Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); if (IID == Intrinsic::eh_exceptionpointer || IID == Intrinsic::eh_exceptioncode) return true; } } return false; } /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); const BasicBlock *LLVMBB = MBB->getBasicBlock(); const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); // Catchpads have one live-in register, which typically holds the exception // pointer or code. if (const auto *CPI = dyn_cast(LLVMBB->getFirstNonPHI())) { if (hasExceptionPointerOrCodeUser(CPI)) { // Get or create the virtual register to hold the pointer or code. Mark // the live in physreg and copy into the vreg. MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); assert(EHPhysReg && "target lacks exception pointer register"); MBB->addLiveIn(EHPhysReg); unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), TII->get(TargetOpcode::COPY), VReg) .addReg(EHPhysReg, RegState::Kill); } return true; } if (!LLVMBB->isLandingPad()) return true; // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->addLandingPad(MBB); // Assign the call site to the landing pad's begin label. MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); // Mark exception register as live in. if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); return true; } /// isFoldedOrDeadInstruction - Return true if the specified instruction is /// side-effect free and is either dead or folded into a generated instruction. /// Return false if it needs to be emitted. static bool isFoldedOrDeadInstruction(const Instruction *I, FunctionLoweringInfo *FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. !isa(I) && // Terminators aren't folded. !isa(I) && // Debug instructions aren't folded. !I->isEHPad() && // EH pad instructions aren't folded. !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } #ifndef NDEBUG // Collect per Instruction statistics for fast-isel misses. Only those // instructions that cause the bail are accounted for. It does not account for // instructions higher in the block. Thus, summing the per instructions stats // will not add up to what is reported by NumFastIselFailures. static void collectFailStats(const Instruction *I) { switch (I->getOpcode()) { default: assert (0 && " "); // Terminators case Instruction::Ret: NumFastIselFailRet++; return; case Instruction::Br: NumFastIselFailBr++; return; case Instruction::Switch: NumFastIselFailSwitch++; return; case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; case Instruction::Invoke: NumFastIselFailInvoke++; return; case Instruction::Resume: NumFastIselFailResume++; return; case Instruction::Unreachable: NumFastIselFailUnreachable++; return; // Standard binary operators... case Instruction::Add: NumFastIselFailAdd++; return; case Instruction::FAdd: NumFastIselFailFAdd++; return; case Instruction::Sub: NumFastIselFailSub++; return; case Instruction::FSub: NumFastIselFailFSub++; return; case Instruction::Mul: NumFastIselFailMul++; return; case Instruction::FMul: NumFastIselFailFMul++; return; case Instruction::UDiv: NumFastIselFailUDiv++; return; case Instruction::SDiv: NumFastIselFailSDiv++; return; case Instruction::FDiv: NumFastIselFailFDiv++; return; case Instruction::URem: NumFastIselFailURem++; return; case Instruction::SRem: NumFastIselFailSRem++; return; case Instruction::FRem: NumFastIselFailFRem++; return; // Logical operators... case Instruction::And: NumFastIselFailAnd++; return; case Instruction::Or: NumFastIselFailOr++; return; case Instruction::Xor: NumFastIselFailXor++; return; // Memory instructions... case Instruction::Alloca: NumFastIselFailAlloca++; return; case Instruction::Load: NumFastIselFailLoad++; return; case Instruction::Store: NumFastIselFailStore++; return; case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; case Instruction::Fence: NumFastIselFailFence++; return; case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; // Convert instructions... case Instruction::Trunc: NumFastIselFailTrunc++; return; case Instruction::ZExt: NumFastIselFailZExt++; return; case Instruction::SExt: NumFastIselFailSExt++; return; case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; case Instruction::FPExt: NumFastIselFailFPExt++; return; case Instruction::FPToUI: NumFastIselFailFPToUI++; return; case Instruction::FPToSI: NumFastIselFailFPToSI++; return; case Instruction::UIToFP: NumFastIselFailUIToFP++; return; case Instruction::SIToFP: NumFastIselFailSIToFP++; return; case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; case Instruction::BitCast: NumFastIselFailBitCast++; return; // Other instructions... case Instruction::ICmp: NumFastIselFailICmp++; return; case Instruction::FCmp: NumFastIselFailFCmp++; return; case Instruction::PHI: NumFastIselFailPHI++; return; case Instruction::Select: NumFastIselFailSelect++; return; case Instruction::Call: { if (auto const *Intrinsic = dyn_cast(I)) { switch (Intrinsic->getIntrinsicID()) { default: NumFastIselFailIntrinsicCall++; return; case Intrinsic::sadd_with_overflow: NumFastIselFailSAddWithOverflow++; return; case Intrinsic::uadd_with_overflow: NumFastIselFailUAddWithOverflow++; return; case Intrinsic::ssub_with_overflow: NumFastIselFailSSubWithOverflow++; return; case Intrinsic::usub_with_overflow: NumFastIselFailUSubWithOverflow++; return; case Intrinsic::smul_with_overflow: NumFastIselFailSMulWithOverflow++; return; case Intrinsic::umul_with_overflow: NumFastIselFailUMulWithOverflow++; return; case Intrinsic::frameaddress: NumFastIselFailFrameaddress++; return; case Intrinsic::sqrt: NumFastIselFailSqrt++; return; case Intrinsic::experimental_stackmap: NumFastIselFailStackMap++; return; case Intrinsic::experimental_patchpoint_void: // fall-through case Intrinsic::experimental_patchpoint_i64: NumFastIselFailPatchPoint++; return; } } NumFastIselFailCall++; return; } case Instruction::Shl: NumFastIselFailShl++; return; case Instruction::LShr: NumFastIselFailLShr++; return; case Instruction::AShr: NumFastIselFailAShr++; return; case Instruction::VAArg: NumFastIselFailVAArg++; return; case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; case Instruction::InsertElement: NumFastIselFailInsertElement++; return; case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; case Instruction::InsertValue: NumFastIselFailInsertValue++; return; case Instruction::LandingPad: NumFastIselFailLandingPad++; return; } } #endif // NDEBUG /// Set up SwiftErrorVals by going through the function. If the function has /// swifterror argument, it will be the first entry. static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, FunctionLoweringInfo *FuncInfo) { if (!TLI->supportSwiftError()) return; FuncInfo->SwiftErrorVals.clear(); FuncInfo->SwiftErrorVRegDefMap.clear(); FuncInfo->SwiftErrorVRegUpwardsUse.clear(); FuncInfo->SwiftErrorArg = nullptr; // Check if function has a swifterror argument. bool HaveSeenSwiftErrorArg = false; for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end(); AI != AE; ++AI) if (AI->hasSwiftErrorAttr()) { assert(!HaveSeenSwiftErrorArg && "Must have only one swifterror parameter"); (void)HaveSeenSwiftErrorArg; // silence warning. HaveSeenSwiftErrorArg = true; FuncInfo->SwiftErrorArg = &*AI; FuncInfo->SwiftErrorVals.push_back(&*AI); } for (const auto &LLVMBB : Fn) for (const auto &Inst : LLVMBB) { if (const AllocaInst *Alloca = dyn_cast(&Inst)) if (Alloca->isSwiftError()) FuncInfo->SwiftErrorVals.push_back(Alloca); } } static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo, const TargetLowering *TLI, const TargetInstrInfo *TII, const BasicBlock *LLVMBB, SelectionDAGBuilder *SDB) { if (!TLI->supportSwiftError()) return; // We only need to do this when we have swifterror parameter or swifterror // alloc. if (FuncInfo->SwiftErrorVals.empty()) return; if (pred_begin(LLVMBB) == pred_end(LLVMBB)) { auto &DL = FuncInfo->MF->getDataLayout(); auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { // We will always generate a copy from the argument. It is always used at // least by the 'return' of the swifterror. if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal) continue; unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); // Assign Undef to Vreg. We construct MI directly to make sure it works // with FastISel. BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg); } } } /// Propagate swifterror values through the machine function CFG. static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { auto *TLI = FuncInfo->TLI; if (!TLI->supportSwiftError()) return; // We only need to do this when we have swifterror parameter or swifterror // alloc. if (FuncInfo->SwiftErrorVals.empty()) return; // For each machine basic block in reverse post order. ReversePostOrderTraversal RPOT(FuncInfo->MF); for (ReversePostOrderTraversal::rpo_iterator It = RPOT.begin(), E = RPOT.end(); It != E; ++It) { MachineBasicBlock *MBB = *It; // For each swifterror value in the function. for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { auto Key = std::make_pair(MBB, SwiftErrorVal); auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key); bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end(); unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0; bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end(); assert(!(UpwardsUse && !DownwardDef) && "We can't have an upwards use but no downwards def"); // If there is no upwards exposed use and an entry for the swifterror in // the def map for this value we don't need to do anything: We already // have a downward def for this basic block. if (!UpwardsUse && DownwardDef) continue; // Otherwise we either have an upwards exposed use vreg that we need to // materialize or need to forward the downward def from predecessors. // Check whether we have a single vreg def from all predecessors. // Otherwise we need a phi. SmallVector, 4> VRegs; SmallSet Visited; for (auto *Pred : MBB->predecessors()) { if (!Visited.insert(Pred).second) continue; VRegs.push_back(std::make_pair( Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal))); if (Pred != MBB) continue; // We have a self-edge. // If there was no upwards use in this basic block there is now one: the // phi needs to use it self. if (!UpwardsUse) { UpwardsUse = true; UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end()); UUseVReg = UUseIt->second; } } // We need a phi node if we have more than one predecessor with different // downward defs. bool needPHI = VRegs.size() >= 1 && std::find_if( VRegs.begin(), VRegs.end(), [&](const std::pair &V) -> bool { return V.second != VRegs[0].second; }) != VRegs.end(); // If there is no upwards exposed used and we don't need a phi just // forward the swifterror vreg from the predecessor(s). if (!UpwardsUse && !needPHI) { assert(!VRegs.empty() && "No predecessors? The entry block should bail out earlier"); // Just forward the swifterror vreg from the predecessor(s). FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second); continue; } auto DLoc = isa(SwiftErrorVal) ? dyn_cast(SwiftErrorVal)->getDebugLoc() : DebugLoc(); const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); // If we don't need a phi create a copy to the upward exposed vreg. if (!needPHI) { assert(UpwardsUse); unsigned DestReg = UUseVReg; BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), DestReg) .addReg(VRegs[0].second); continue; } // We need a phi: if there is an upwards exposed use we already have a // destination virtual register number otherwise we generate a new one. auto &DL = FuncInfo->MF->getDataLayout(); auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); unsigned PHIVReg = UpwardsUse ? UUseVReg : FuncInfo->MF->getRegInfo().createVirtualRegister(RC); MachineInstrBuilder SwiftErrorPHI = BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::PHI), PHIVReg); for (auto BBRegPair : VRegs) { SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first); } // We did not have a definition in this block before: store the phi's vreg // as this block downward exposed def. if (!UpwardsUse) FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg); } } } void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) FastIS = TLI->createFastISel(*FuncInfo, LibInfo); setupSwiftErrorVals(Fn, TLI, FuncInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal RPOT(&Fn); for (ReversePostOrderTraversal::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { const BasicBlock *LLVMBB = *I; if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); PI != PE; ++PI) { if (!FuncInfo->VisitedBBs.count(*PI)) { AllPredsVisited = false; break; } } if (AllPredsVisited) { for (BasicBlock::const_iterator I = LLVMBB->begin(); const PHINode *PN = dyn_cast(I); ++I) FuncInfo->ComputePHILiveOutRegInfo(PN); } else { for (BasicBlock::const_iterator I = LLVMBB->begin(); const PHINode *PN = dyn_cast(I); ++I) FuncInfo->InvalidatePHILiveOutRegInfo(PN); } FuncInfo->VisitedBBs.insert(LLVMBB); } BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI()->getIterator(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; if (!FuncInfo->MBB) continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; if (LLVMBB->isEHPad()) if (!PrepareEHLandingPad()) continue; // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { FastIS->startNewBlock(); // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { ++NumEntryBlocks; // Lower any arguments needed in this block if this is the entry block. if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; if (EnableFastISelAbort > 1) report_fatal_error("FastISel didn't lower all arguments"); // Use SelectionDAG argument lowering LowerArguments(Fn); CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); CodeGenAndEmitDAG(); } // If we inserted any instructions at the beginning, make a note of // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); else FastIS->setLastLocalValue(nullptr); } unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { --NumFastIselRemaining; continue; } // Bottom-up: reset the insert pos at the top, after any local-value // instructions. FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. if (FastIS->selectInstruction(Inst)) { --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. // Try to fold the load if so. const Instruction *BeforeInst = Inst; while (BeforeInst != &*Begin) { BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } if (BeforeInst != Inst && isa(BeforeInst) && BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; } continue; } #ifndef NDEBUG if (EnableFastISelVerbose2) collectFailStats(Inst); #endif // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa(Inst)) { if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; Inst->dump(); } if (EnableFastISelAbort > 2) // FastISel selector couldn't handle something and bailed. // For the purpose of debugging, just abort. report_fatal_error("FastISel didn't select the entire block"); if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) R = FuncInfo->CreateRegs(Inst->getType()); } bool HadTailCall = false; MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. // We also need to delete any previously emitted instructions. if (HadTailCall) { FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); --BI; break; } // Recompute NumFastIselRemaining as Selection DAG instruction // selection may have handled the call, input args, etc. unsigned RemainingNow = std::distance(Begin, BI); NumFastIselFailures += NumFastIselRemaining - RemainingNow; NumFastIselRemaining = RemainingNow; continue; } bool ShouldAbort = EnableFastISelAbort; if (EnableFastISelVerbose || EnableFastISelAbort) { if (isa(Inst)) { // Use a different message for terminator misses. dbgs() << "FastISel missed terminator: "; // Don't abort unless for terminator unless the level is really high ShouldAbort = (EnableFastISelAbort > 2); } else { dbgs() << "FastISel miss: "; } Inst->dump(); } if (ShouldAbort) // FastISel selector couldn't handle something and bailed. // For the purpose of debugging, just abort. report_fatal_error("FastISel didn't select the entire block"); NumFastIselFailures += NumFastIselRemaining; break; } FastIS->recomputeInsertPt(); } else { // Lower any arguments needed in this block if this is the entry block. if (LLVMBB == &Fn.getEntryBlock()) { ++NumEntryBlocks; LowerArguments(Fn); } } if (getAnalysis().shouldEmitSDCheck(*LLVMBB)) { bool FunctionBasedInstrumentation = TLI->getSSPStackGuardCheck(*Fn.getParent()); SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], FunctionBasedInstrumentation); } if (Begin != BI) ++NumDAGBlocks; else ++NumFastIselBlocks; if (Begin != BI) { // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. bool HadTailCall; SelectBasicBlock(Begin, BI, HadTailCall); } FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } propagateSwiftErrorVRegs(FuncInfo); delete FastIS; SDB->clearDanglingDebugInfo(); SDB->SPDescriptor.resetPerFunctionState(); } /// Given that the input MI is before a partial terminator sequence TSeq, return /// true if M + TSeq also a partial terminator sequence. /// /// A Terminator sequence is a sequence of MachineInstrs which at this point in /// lowering copy vregs into physical registers, which are then passed into /// terminator instructors so we can satisfy ABI constraints. A partial /// terminator sequence is an improper subset of a terminator sequence (i.e. it /// may be the whole terminator sequence). static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // If we do not have a copy or an implicit def, we return true if and only if // MI is a debug value. if (!MI.isCopy() && !MI.isImplicitDef()) // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the // physical registers if there is debug info associated with the terminator // of our mbb. We want to include said debug info in our terminator // sequence, so we return true in that case. return MI.isDebugValue(); // We have left the terminator sequence if we are not doing one of the // following: // // 1. Copying a vreg into a physical register. // 2. Copying a vreg into a vreg. // 3. Defining a register via an implicit def. // OPI should always be a register definition... MachineInstr::const_mop_iterator OPI = MI.operands_begin(); if (!OPI->isReg() || !OPI->isDef()) return false; // Defining any register via an implicit def is always ok. if (MI.isImplicitDef()) return true; // Grab the copy source... MachineInstr::const_mop_iterator OPI2 = OPI; ++OPI2; assert(OPI2 != MI.operands_end() && "Should have a copy implying we should have 2 arguments."); // Make sure that the copy dest is not a vreg when the copy source is a // physical register. if (!OPI2->isReg() || (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) return false; return true; } /// Find the split point at which to splice the end of BB into its success stack /// protector check machine basic block. /// /// On many platforms, due to ABI constraints, terminators, even before register /// allocation, use physical registers. This creates an issue for us since /// physical registers at this point can not travel across basic /// blocks. Luckily, selectiondag always moves physical registers into vregs /// when they enter functions and moves them through a sequence of copies back /// into the physical registers right before the terminator creating a /// ``Terminator Sequence''. This function is searching for the beginning of the /// terminator sequence so that we can ensure that we splice off not just the /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator FindSplitPointForStackProtector(MachineBasicBlock *BB) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); // if (SplitPoint == BB->begin()) return SplitPoint; MachineBasicBlock::iterator Start = BB->begin(); MachineBasicBlock::iterator Previous = SplitPoint; --Previous; while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) break; --Previous; } return SplitPoint; } void SelectionDAGISel::FinishBasicBlock() { DEBUG(dbgs() << "Total amount of phi nodes to update: " << FuncInfo->PHINodesToUpdate.size() << "\n"; for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } // Handle stack protector. if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) { // The target provides a guard check function. There is no need to // generate error handling code or to split current basic block. MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = FindSplitPointForStackProtector(ParentMBB); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); // Clear the Per-BB State. SDB->SPDescriptor.resetPerBBState(); } else if (SDB->SPDescriptor.shouldEmitStackProtector()) { MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); // Find the split point to split the parent mbb. At the same time copy all // physical registers used in the tail of parent mbb into virtual registers // before the split point and back into physical registers after the split // point. This prevents us needing to deal with Live-ins and many other // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = FindSplitPointForStackProtector(ParentMBB); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint, ParentMBB->end()); // Add compare/jump on neq/jump to the parent BB. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = ParentMBB->end(); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); // CodeGen Failure MBB if we have not codegened it yet. MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); if (FailureMBB->empty()) { FuncInfo->MBB = FailureMBB; FuncInfo->InsertPt = FailureMBB->end(); SDB->visitSPDescriptorFailure(SDB->SPDescriptor); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); } // Clear the Per-BB State. SDB->SPDescriptor.resetPerBBState(); } // Lower each BitTestBlock. for (auto &BTB : SDB->BitTestCases) { // Lower header first, if it wasn't already lowered if (!BTB.Emitted) { // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = BTB.Parent; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitBitTestHeader(BTB, FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); } BranchProbability UnhandledProb = BTB.Prob; for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { UnhandledProb -= BTB.Cases[j].ExtraProb; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = BTB.Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code // If all cases cover a contiguous range, it is not necessary to jump to // the default block after the last bit test fails. This is because the // range check during bit test header creation has guaranteed that every // case here doesn't go outside the range. In this case, there is no need // to perform the last bit test, as it will always be true. Instead, make // the second-to-last bit-test fall through to the target of the last bit // test, and delete the last bit test. MachineBasicBlock *NextMBB; if (BTB.ContiguousRange && j + 2 == ej) { // Second-to-last bit-test with contiguous range: fall through to the // target of the final bit test. NextMBB = BTB.Cases[j + 1].TargetBB; } else if (j + 1 == ej) { // For the last bit test, fall through to Default. NextMBB = BTB.Default; } else { // Otherwise, fall through to the next bit test. NextMBB = BTB.Cases[j + 1].ThisBB; } SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); if (BTB.ContiguousRange && j + 2 == ej) { // Since we're not going to use the final bit test, remove it. BTB.Cases.pop_back(); break; } } // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); pi != pe; ++pi) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB, unless the latter was skipped. if (PHIBB == BTB.Default) { PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent); if (!BTB.ContiguousRange) { PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) .addMBB(BTB.Cases.back().ThisBB); } } // One of "cases" BB. for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { MachineBasicBlock* cBB = BTB.Cases[j].ThisBB; if (cBB->isSuccessor(PHIBB)) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); } } } SDB->BitTestCases.clear(); // If the JumpTable record is filled in, then we need to emit a jump table. // Updating the PHI nodes is tricky in this case, since we need to determine // whether the PHI is a successor of the range check MBB or the jump table MBB for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); } // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->JTCases[i].second.MBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); pi != pe; ++pi) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. if (PHIBB == SDB->JTCases[i].second.Default) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) .addMBB(SDB->JTCases[i].first.HeaderBB); // JT BB. Just iterate over successors here if (FuncInfo->MBB->isSuccessor(PHIBB)) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); } } SDB->JTCases.clear(); // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. SmallVector Succs; Succs.push_back(SDB->SwitchCases[i].TrueBB); if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) Succs.push_back(SDB->SwitchCases[i].FalseBB); // Emit the code. Note that this could result in FuncInfo->MBB being split. SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); // Remember the last block, now that any splitting is done, for use in // populating PHI nodes in successors. MachineBasicBlock *ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. for (unsigned i = 0, e = Succs.size(); i != e; ++i) { FuncInfo->MBB = Succs[i]; FuncInfo->InsertPt = FuncInfo->MBB->end(); // FuncInfo->MBB may have been removed from the CFG if a branch was // constant folded. if (ThisBB->isSuccessor(FuncInfo->MBB)) { for (MachineBasicBlock::iterator MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); MBBI != MBBE && MBBI->isPHI(); ++MBBI) { MachineInstrBuilder PHI(*MF, MBBI); // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { assert(pn != FuncInfo->PHINodesToUpdate.size() && "Didn't find PHI entry!"); if (FuncInfo->PHINodesToUpdate[pn].first == PHI) { PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); break; } } } } } } SDB->SwitchCases.clear(); } /// Create the scheduler. If a specific scheduler was specified /// via the SchedulerRegistry, use it, otherwise select the /// one preferred by the target. /// ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { return ISHeuristic(this, OptLevel); } //===----------------------------------------------------------------------===// // Helper functions used by the generated instruction selector. //===----------------------------------------------------------------------===// // Calls to these methods are generated by tblgen. /// CheckAndMask - The isel is trying to match something like (and X, 255). If /// the dag combiner simplified the 255, we still want to match. RHS is the /// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value /// specified in the .td file (e.g. 255). bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, int64_t DesiredMaskS) const { const APInt &ActualMask = RHS->getAPIntValue(); const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); // If the actual mask exactly matches, success! if (ActualMask == DesiredMask) return true; // If the actual AND mask is allowing unallowed bits, this doesn't match. if (ActualMask.intersects(~DesiredMask)) return false; // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) return true; // TODO: check to see if missing bits are just not demanded. // Otherwise, this pattern doesn't match. return false; } /// CheckOrMask - The isel is trying to match something like (or X, 255). If /// the dag combiner simplified the 255, we still want to match. RHS is the /// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value /// specified in the .td file (e.g. 255). bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, int64_t DesiredMaskS) const { const APInt &ActualMask = RHS->getAPIntValue(); const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); // If the actual mask exactly matches, success! if (ActualMask == DesiredMask) return true; // If the actual AND mask is allowing unallowed bits, this doesn't match. if (ActualMask.intersects(~DesiredMask)) return false; // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) return true; // TODO: check to see if missing bits are just not demanded. // Otherwise, this pattern doesn't match. return false; } /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated /// by tblgen. Others should not call it. void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector &Ops, const SDLoc &DL) { std::vector InOps; std::swap(InOps, Ops); Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Glue) --e; // Don't process a glue operand if it is here. while (i != e) { unsigned Flags = cast(InOps[i])->getZExtValue(); if (!InlineAsm::isMemKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); i += InlineAsm::getNumOperandRegisters(Flags) + 1; } else { assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && "Memory operand with multiple values?"); unsigned TiedToOperand; if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { // We need the constraint ID from the operand this is tied to. unsigned CurOp = InlineAsm::Op_FirstOperand; Flags = cast(InOps[CurOp])->getZExtValue(); for (; TiedToOperand; --TiedToOperand) { CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; Flags = cast(InOps[CurOp])->getZExtValue(); } } // Otherwise, this is a memory operand. Ask the target to select it. std::vector SelOps; unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); // Add this to the output node. unsigned NewFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } } // Add the glue input back if present. if (e != InOps.size()) Ops.push_back(InOps.back()); } /// findGlueUse - Return use of MVT::Glue value produced by the specified /// SDNode. /// static SDNode *findGlueUse(SDNode *N) { unsigned FlagResNo = N->getNumValues()-1; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { SDUse &Use = I.getUse(); if (Use.getResNo() == FlagResNo) return Use.getUser(); } return nullptr; } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". /// This function recursively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, SDNode *Root, SmallPtrSetImpl &Visited, bool IgnoreChains) { // The NodeID's are given uniques ID's where a node ID is guaranteed to be // greater than all of its (recursive) operands. If we scan to a point where // 'use' is smaller than the node we're scanning for, then we know we will // never find it. // // The Use may be -1 (unassigned) if it is a newly allocated node. This can // happen because we scan down to newly selected nodes in the case of glue // uses. if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) return false; // Don't revisit nodes if we already scanned it and didn't fail, we know we // won't fail if we scan it again. if (!Visited.insert(Use).second) return false; for (const SDValue &Op : Use->op_values()) { // Ignore chain uses, they are validated by HandleMergeInputChains. if (Op.getValueType() == MVT::Other && IgnoreChains) continue; SDNode *N = Op.getNode(); if (N == Def) { if (Use == ImmedUse || Use == Root) continue; // We are not looking for immediate use. assert(N != Root); return true; } // Traverse up the operand chain. if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) return true; } return false; } /// IsProfitableToFold - Returns true if it's profitable to fold the specific /// operand node N of U during instruction selection that starts at Root. bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; return N.hasOneUse(); } /// IsLegalToFold - Returns true if the specific operand node N of /// U can be folded during instruction selection that starts at Root. bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOpt::Level OptLevel, bool IgnoreChains) { if (OptLevel == CodeGenOpt::None) return false; // If Root use can somehow reach N through a path that that doesn't contain // U then folding N would create a cycle. e.g. In the following // diagram, Root can reach N through X. If N is folded into into Root, then // X is both a predecessor and a successor of U. // // [N*] // // ^ ^ // // / \ // // [U*] [X]? // // ^ ^ // // \ / // // \ / // // [Root*] // // // * indicates nodes to be folded together. // // If Root produces glue, then it gets (even more) interesting. Since it // will be "glued" together with its glue use in the scheduler, we need to // check if it might reach N. // // [N*] // // ^ ^ // // / \ // // [U*] [X]? // // ^ ^ // // \ \ // // \ | // // [Root*] | // // ^ | // // f | // // | / // // [Y] / // // ^ / // // f / // // | / // // [GU] // // // If GU (glue use) indirectly reaches N (the load), and Root folds N // (call it Fold), then X is a predecessor of GU and a successor of // Fold. But since Fold and GU are glued together, this will create // a cycle in the scheduling graph. // If the node has glue, walk down the graph to the "lowest" node in the // glueged set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = findGlueUse(Root); if (!GU) break; Root = GU; VT = Root->getValueType(Root->getNumValues()-1); // If our query node has a glue result with a use, we've walked up it. If // the user (which has already been selected) has a chain or indirectly uses // the chain, our WalkChainUsers predicate will not consider it. Because of // this, we cannot ignore chains in this predicate. IgnoreChains = false; } SmallPtrSet Visited; return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); } void SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDLoc DL(N); std::vector Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops, DL); const EVT VTs[] = {MVT::Other, MVT::Glue}; SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); New->setNodeId(-1); ReplaceUses(N, New.getNode()); CurDAG->RemoveDeadNode(N); } void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), *CurDAG); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); ReplaceUses(Op, New.getNode()); CurDAG->RemoveDeadNode(Op); } void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType(), *CurDAG); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); ReplaceUses(Op, New.getNode()); CurDAG->RemoveDeadNode(Op); } void SelectionDAGISel::Select_UNDEF(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. unsigned Shift = 7; uint64_t NextBits; do { NextBits = MatcherTable[Idx++]; Val |= (NextBits&127) << Shift; Shift += 7; } while (NextBits & 128); return Val; } /// When a match is complete, this method updates uses of interior chain results /// to use the new results. void SelectionDAGISel::UpdateChains( SDNode *NodeToMatch, SDValue InputChain, SmallVectorImpl &ChainNodesMatched, bool isMorphNodeTo) { SmallVector NowDeadNodes; // Now that all the normal results are replaced, we replace the chain and // glue results if present. if (!ChainNodesMatched.empty()) { assert(InputChain.getNode() && "Matched input chains but didn't produce a chain"); // Loop over all of the nodes we matched that produced a chain result. // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; // If ChainNode is null, it's because we replaced it on a previous // iteration and we cleared it out of the map. Just skip it. if (!ChainNode) continue; assert(ChainNode->getOpcode() != ISD::DELETED_NODE && "Deleted node left in chain"); // Don't replace the results of the root node if we're doing a // MorphNodeTo. if (ChainNode == NodeToMatch && isMorphNodeTo) continue; SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); SelectionDAG::DAGNodeDeletedListener NDL( *CurDAG, [&](SDNode *N, SDNode *E) { std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, static_cast(nullptr)); }); CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. if (ChainNode != NodeToMatch && ChainNode->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) NowDeadNodes.push_back(ChainNode); } } if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); DEBUG(dbgs() << "ISEL: Match complete!\n"); } enum ChainResult { CR_Simple, CR_InducesCycle, CR_LeadsToInteriorNode }; /// WalkChainUsers - Walk down the users of the specified chained node that is /// part of the pattern we're matching, looking at all of the users we find. /// This determines whether something is an interior node, whether we have a /// non-pattern node in between two pattern nodes (which prevent folding because /// it would induce a cycle) and whether we have a TokenFactor node sandwiched /// between pattern nodes (in which case the TF becomes part of the pattern). /// /// The walk we do here is guaranteed to be small because we quickly get down to /// already selected nodes "below" us. static ChainResult WalkChainUsers(const SDNode *ChainedNode, SmallVectorImpl &ChainedNodesInPattern, DenseMap &TokenFactorResult, SmallVectorImpl &InteriorChainedNodes) { ChainResult Result = CR_Simple; for (SDNode::use_iterator UI = ChainedNode->use_begin(), E = ChainedNode->use_end(); UI != E; ++UI) { // Make sure the use is of the chain, not some other value we produce. if (UI.getUse().getValueType() != MVT::Other) continue; SDNode *User = *UI; if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; // If we see an already-selected machine node, then we've gone beyond the // pattern that we're selecting down into the already selected chunk of the // DAG. unsigned UserOpcode = User->getOpcode(); if (User->isMachineOpcode() || UserOpcode == ISD::CopyToReg || UserOpcode == ISD::CopyFromReg || UserOpcode == ISD::INLINEASM || UserOpcode == ISD::EH_LABEL || UserOpcode == ISD::LIFETIME_START || UserOpcode == ISD::LIFETIME_END) { // If their node ID got reset to -1 then they've already been selected. // Treat them like a MachineOpcode. if (User->getNodeId() == -1) continue; } // If we have a TokenFactor, we handle it specially. if (User->getOpcode() != ISD::TokenFactor) { // If the node isn't a token factor and isn't part of our pattern, then it // must be a random chained node in between two nodes we're selecting. // This happens when we have something like: // x = load ptr // call // y = x+4 // store y -> ptr // Because we structurally match the load/store as a read/modify/write, // but the call is chained between them. We cannot fold in this case // because it would induce a cycle in the graph. if (!std::count(ChainedNodesInPattern.begin(), ChainedNodesInPattern.end(), User)) return CR_InducesCycle; // Otherwise we found a node that is part of our pattern. For example in: // x = load ptr // y = x+4 // store y -> ptr // This would happen when we're scanning down from the load and see the // store as a user. Record that there is a use of ChainedNode that is // part of the pattern and keep scanning uses. Result = CR_LeadsToInteriorNode; InteriorChainedNodes.push_back(User); continue; } // If we found a TokenFactor, there are two cases to consider: first if the // TokenFactor is just hanging "below" the pattern we're matching (i.e. no // uses of the TF are in our pattern) we just want to ignore it. Second, // the TokenFactor can be sandwiched in between two chained nodes, like so: // [Load chain] // ^ // | // [Load] // ^ ^ // | \ DAG's like cheese // / \ do you? // / | // [TokenFactor] [Op] // ^ ^ // | | // \ / // \ / // [Store] // // In this case, the TokenFactor becomes part of our match and we rewrite it // as a new TokenFactor. // // To distinguish these two cases, do a recursive walk down the uses. auto MemoizeResult = TokenFactorResult.find(User); bool Visited = MemoizeResult != TokenFactorResult.end(); // Recursively walk chain users only if the result is not memoized. if (!Visited) { auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, InteriorChainedNodes); MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; } switch (MemoizeResult->second) { case CR_Simple: // If the uses of the TokenFactor are just already-selected nodes, ignore // it, it is "below" our pattern. continue; case CR_InducesCycle: // If the uses of the TokenFactor lead to nodes that are not part of our // pattern that are not selected, folding would turn this into a cycle, // bail out now. return CR_InducesCycle; case CR_LeadsToInteriorNode: break; // Otherwise, keep processing. } // Okay, we know we're in the interesting interior case. The TokenFactor // is now going to be considered part of the pattern so that we rewrite its // uses (it may have uses that are not part of the pattern) with the // ultimate chain result of the generated code. We will also add its chain // inputs as inputs to the ultimate TokenFactor we create. Result = CR_LeadsToInteriorNode; if (!Visited) { ChainedNodesInPattern.push_back(User); InteriorChainedNodes.push_back(User); } } return Result; } /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains /// operation for when the pattern matched at least one node with a chains. The /// input vector contains a list of all of the chained nodes that we match. We /// must determine if this is a valid thing to cover (i.e. matching it won't /// induce cycles in the DAG) and if so, creating a TokenFactor node. that will /// be used as the input node chain for the generated nodes. static SDValue HandleMergeInputChains(SmallVectorImpl &ChainNodesMatched, SelectionDAG *CurDAG) { // Used for memoization. Without it WalkChainUsers could take exponential // time to run. DenseMap TokenFactorResult; // Walk all of the chained nodes we've matched, recursively scanning down the // users of the chain result. This adds any TokenFactor nodes that are caught // in between chained nodes to the chained and interior nodes list. SmallVector InteriorChainedNodes; for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, TokenFactorResult, InteriorChainedNodes) == CR_InducesCycle) return SDValue(); // Would induce a cycle. } // Okay, we have walked all the matched nodes and collected TokenFactor nodes // that we are interested in. Form our input TokenFactor node. SmallVector InputChains; for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { // Add the input chain of this node to the InputChains list (which will be // the operands of the generated TokenFactor) if it's not an interior node. SDNode *N = ChainNodesMatched[i]; if (N->getOpcode() != ISD::TokenFactor) { if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) continue; // Otherwise, add the input chain. SDValue InChain = ChainNodesMatched[i]->getOperand(0); assert(InChain.getValueType() == MVT::Other && "Not a chain"); InputChains.push_back(InChain); continue; } // If we have a token factor, we want to add all inputs of the token factor // that are not part of the pattern we're matching. for (const SDValue &Op : N->op_values()) { if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), Op.getNode())) InputChains.push_back(Op); } } if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), MVT::Other, InputChains); } /// MorphNode - Handle morphing a node in place for the selector. SDNode *SelectionDAGISel:: MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, ArrayRef Ops, unsigned EmitNodeInfo) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be // adding a chain) and the input could have glue and chains as well. // In this case we need to shift the operands down. // FIXME: This is a horrible hack and broken in obscure cases, no worse // than the old isel though. int OldGlueResultNo = -1, OldChainResultNo = -1; unsigned NTMNumResults = Node->getNumValues(); if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { OldGlueResultNo = NTMNumResults-1; if (NTMNumResults != 1 && Node->getValueType(NTMNumResults-2) == MVT::Other) OldChainResultNo = NTMNumResults-2; } else if (Node->getValueType(NTMNumResults-1) == MVT::Other) OldChainResultNo = NTMNumResults-1; // Call the underlying SelectionDAG routine to do the transmogrification. Note // that this deletes operands of the old node that become dead. SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it // updates the node in place to have the requested operands. if (Res == Node) { // If we updated the node in place, reset the node ID. To the isel, // this should be just like a newly allocated machine node. Res->setNodeId(-1); } unsigned ResNumResults = Res->getNumValues(); // Move the glue if needed. if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && (unsigned)OldGlueResultNo != ResNumResults-1) CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), SDValue(Res, ResNumResults-1)); if ((EmitNodeInfo & OPFL_GlueOutput) != 0) --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && (unsigned)OldChainResultNo != ResNumResults-1) CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), SDValue(Res, ResNumResults-1)); // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. if (Res != Node) { CurDAG->ReplaceAllUsesWith(Node, Res); CurDAG->RemoveDeadNode(Node); } return Res; } /// CheckSame - Implements OP_CheckSame. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl > &RecordedNodes) { // Accept if it is exactly the same as a previously recorded node. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); return N == RecordedNodes[RecNo].first; } /// CheckChildSame - Implements OP_CheckChildXSame. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl > &RecordedNodes, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), RecordedNodes); } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; return N->getOpcode() == Opc; } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; // Handle the case when VT is iPTR. return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI, DL); } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast(N)->getVT() == VT) return true; // Handle the case when VT is iPTR. return VT == MVT::iPTR && cast(N)->getVT() == TLI->getPointerTy(DL); } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); ConstantSDNode *C = dyn_cast(N); return C && C->getSExtValue() == Val; } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); if (N->getOpcode() != ISD::AND) return false; ConstantSDNode *C = dyn_cast(N->getOperand(1)); return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); if (N->getOpcode() != ISD::OR) return false; ConstantSDNode *C = dyn_cast(N->getOperand(1)); return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); } /// IsPredicateKnownToFail - If we know how and can do so without pushing a /// scope, evaluate the current node. If the current predicate is known to /// fail, set Result=true and return anything. If the current predicate is /// known to pass, set Result=false and return the MatcherIndex to continue /// with. If the current predicate is unknown, set Result=false and return the /// MatcherIndex to continue with. static unsigned IsPredicateKnownToFail(const unsigned char *Table, unsigned Index, SDValue N, bool &Result, const SelectionDAGISel &SDISel, SmallVectorImpl > &RecordedNodes) { switch (Table[Index++]) { default: Result = false; return Index-1; // Could not evaluate this predicate. case SelectionDAGISel::OPC_CheckSame: Result = !::CheckSame(Table, Index, N, RecordedNodes); return Index; case SelectionDAGISel::OPC_CheckChild0Same: case SelectionDAGISel::OPC_CheckChild1Same: case SelectionDAGISel::OPC_CheckChild2Same: case SelectionDAGISel::OPC_CheckChild3Same: Result = !::CheckChildSame(Table, Index, N, RecordedNodes, Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: Result = !::CheckPatternPredicate(Table, Index, SDISel); return Index; case SelectionDAGISel::OPC_CheckPredicate: Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckOpcode: Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: Result = !::CheckType(Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout()); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: case SelectionDAGISel::OPC_CheckChild2Type: case SelectionDAGISel::OPC_CheckChild3Type: case SelectionDAGISel::OPC_CheckChild4Type: case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: Result = !::CheckChildType( Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(), Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: Result = !::CheckValueType(Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout()); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckChild0Integer: case SelectionDAGISel::OPC_CheckChild1Integer: case SelectionDAGISel::OPC_CheckChild2Integer: case SelectionDAGISel::OPC_CheckChild3Integer: case SelectionDAGISel::OPC_CheckChild4Integer: Result = !::CheckChildInteger(Table, Index, N, Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); return Index; case SelectionDAGISel::OPC_CheckAndImm: Result = !::CheckAndImm(Table, Index, N, SDISel); return Index; case SelectionDAGISel::OPC_CheckOrImm: Result = !::CheckOrImm(Table, Index, N, SDISel); return Index; } } namespace { struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. unsigned FailIndex; /// NodeStack - The node stack when the scope was formed. SmallVector NodeStack; /// NumRecordedNodes - The number of recorded nodes when the scope was formed. unsigned NumRecordedNodes; /// NumMatchedMemRefs - The number of matched memref entries. unsigned NumMatchedMemRefs; /// InputChain/InputGlue - The current chain/glue SDValue InputChain, InputGlue; /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. bool HasChainNodesMatched; }; /// \\brief A DAG update listener to keep the matching state /// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to /// change the DAG while matching. X86 addressing mode matcher is an example /// for this. class MatchStateUpdater : public SelectionDAG::DAGUpdateListener { - SmallVectorImpl > &RecordedNodes; - SmallVectorImpl &MatchScopes; + SDNode **NodeToMatch; + SmallVectorImpl> &RecordedNodes; + SmallVectorImpl &MatchScopes; public: - MatchStateUpdater(SelectionDAG &DAG, - SmallVectorImpl > &RN, - SmallVectorImpl &MS) : - SelectionDAG::DAGUpdateListener(DAG), - RecordedNodes(RN), MatchScopes(MS) { } + MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch, + SmallVectorImpl> &RN, + SmallVectorImpl &MS) + : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch), + RecordedNodes(RN), MatchScopes(MS) {} void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we // do, so it's unnecessary to update matching state at that point). // Neither of these can occur currently because we only install this // update listener during matching a complex patterns. if (!E || E->isMachineOpcode()) return; + // Check if NodeToMatch was updated. + if (N == *NodeToMatch) + *NodeToMatch = E; // Performing linear search here does not matter because we almost never // run this code. You'd have to have a CSE during complex pattern // matching. for (auto &I : RecordedNodes) if (I.first.getNode() == N) I.first.setNode(E); for (auto &I : MatchScopes) for (auto &J : I.NodeStack) if (J.getNode() == N) J.setNode(E); } }; } // end anonymous namespace void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned TableSize) { // FIXME: Should these even be selected? Handle these cases in the caller? switch (NodeToMatch->getOpcode()) { default: break; case ISD::EntryToken: // These nodes remain the same. case ISD::BasicBlock: case ISD::Register: case ISD::RegisterMask: case ISD::HANDLENODE: case ISD::MDNODE_SDNODE: case ISD::TargetConstant: case ISD::TargetConstantFP: case ISD::TargetConstantPool: case ISD::TargetFrameIndex: case ISD::TargetExternalSymbol: case ISD::MCSymbol: case ISD::TargetBlockAddress: case ISD::TargetJumpTable: case ISD::TargetGlobalTLSAddress: case ISD::TargetGlobalAddress: case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. return; case ISD::AssertSext: case ISD::AssertZext: CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); CurDAG->RemoveDeadNode(NodeToMatch); return; case ISD::INLINEASM: Select_INLINEASM(NodeToMatch); return; case ISD::READ_REGISTER: Select_READ_REGISTER(NodeToMatch); return; case ISD::WRITE_REGISTER: Select_WRITE_REGISTER(NodeToMatch); return; case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); // Set up the node stack with NodeToMatch as the only node on the stack. SmallVector NodeStack; SDValue N = SDValue(NodeToMatch, 0); NodeStack.push_back(N); // MatchScopes - Scopes used when matching, if a match failure happens, this // indicates where to continue checking. SmallVector MatchScopes; // RecordedNodes - This is the set of nodes that have been recorded by the // state machine. The second value is the parent of the node, or null if the // root is recorded. SmallVector, 8> RecordedNodes; // MatchedMemRefs - This is the set of MemRef's we've seen in the input // pattern. SmallVector MatchedMemRefs; // These are the current input chain and glue for use when generating nodes. // Various Emit operations change these. For example, emitting a copytoreg // uses and updates these. SDValue InputChain, InputGlue; // ChainNodesMatched - If a pattern matches nodes that have input/output // chains, the OPC_EmitMergeInputChains operation is emitted which indicates // which ones they are. The result is captured into this list so that we can // update the chain results when the pattern is complete. SmallVector ChainNodesMatched; DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); dbgs() << '\n'); // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we // accelerate the first lookup (which is guaranteed to be hot) with the // OpcodeOffset table. unsigned MatcherIndex = 0; if (!OpcodeOffset.empty()) { // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); } else if (MatcherTable[0] == OPC_SwitchOpcode) { // Otherwise, the table isn't computed, but the state machine does start // with an OPC_SwitchOpcode instruction. Populate the table now, since this // is the first time we're selecting an instruction. unsigned Idx = 1; while (1) { // Get the size of this case. unsigned CaseSize = MatcherTable[Idx++]; if (CaseSize & 128) CaseSize = GetVBR(CaseSize, MatcherTable, Idx); if (CaseSize == 0) break; // Get the opcode, add the index to the table. uint16_t Opc = MatcherTable[Idx++]; Opc |= (unsigned short)MatcherTable[Idx++] << 8; if (Opc >= OpcodeOffset.size()) OpcodeOffset.resize((Opc+1)*2); OpcodeOffset[Opc] = Idx; Idx += CaseSize; } // Okay, do the lookup for the first opcode. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; } while (1) { assert(MatcherIndex < TableSize && "Invalid index"); #ifndef NDEBUG unsigned CurrentOpcodeIndex = MatcherIndex; #endif BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; switch (Opcode) { case OPC_Scope: { // Okay, the semantics of this operation are that we should push a scope // then evaluate the first child. However, pushing a scope only to have // the first check fail (which then pops it) is inefficient. If we can // determine immediately that the first check (or first several) will // immediately fail, don't even bother pushing a scope for them. unsigned FailIndex; while (1) { unsigned NumToSkip = MatcherTable[MatcherIndex++]; if (NumToSkip & 128) NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); // Found the end of the scope with no match. if (NumToSkip == 0) { FailIndex = 0; break; } FailIndex = MatcherIndex+NumToSkip; unsigned MatcherIndexOfPredicate = MatcherIndex; (void)MatcherIndexOfPredicate; // silence warning. // If we can't evaluate this predicate without pushing a scope (e.g. if // it is a 'MoveParent') or if the predicate succeeds on this node, we // push the scope and evaluate the full predicate chain. bool Result; MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, Result, *this, RecordedNodes); if (!Result) break; DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " << "index " << MatcherIndexOfPredicate << ", continuing at " << FailIndex << "\n"); ++NumDAGIselRetries; // Otherwise, we know that this case of the Scope is guaranteed to fail, // move to the next case. MatcherIndex = FailIndex; } // If the whole scope failed to match, bail. if (FailIndex == 0) break; // Push a MatchScope which indicates where to go if the first child fails // to match. MatchScope NewEntry; NewEntry.FailIndex = FailIndex; NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); NewEntry.NumRecordedNodes = RecordedNodes.size(); NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); NewEntry.InputChain = InputChain; NewEntry.InputGlue = InputGlue; NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); MatchScopes.push_back(NewEntry); continue; } case OPC_RecordNode: { // Remember this node, it may end up being an operand in the pattern. SDNode *Parent = nullptr; if (NodeStack.size() > 1) Parent = NodeStack[NodeStack.size()-2].getNode(); RecordedNodes.push_back(std::make_pair(N, Parent)); continue; } case OPC_RecordChild0: case OPC_RecordChild1: case OPC_RecordChild2: case OPC_RecordChild3: case OPC_RecordChild4: case OPC_RecordChild5: case OPC_RecordChild6: case OPC_RecordChild7: { unsigned ChildNo = Opcode-OPC_RecordChild0; if (ChildNo >= N.getNumOperands()) break; // Match fails if out of range child #. RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), N.getNode())); continue; } case OPC_RecordMemRef: MatchedMemRefs.push_back(cast(N)->getMemOperand()); continue; case OPC_CaptureGlueInput: // If the current node has an input glue, capture it in InputGlue. if (N->getNumOperands() != 0 && N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) InputGlue = N->getOperand(N->getNumOperands()-1); continue; case OPC_MoveChild: { unsigned ChildNo = MatcherTable[MatcherIndex++]; if (ChildNo >= N.getNumOperands()) break; // Match fails if out of range child #. N = N.getOperand(ChildNo); NodeStack.push_back(N); continue; } case OPC_MoveChild0: case OPC_MoveChild1: case OPC_MoveChild2: case OPC_MoveChild3: case OPC_MoveChild4: case OPC_MoveChild5: case OPC_MoveChild6: case OPC_MoveChild7: { unsigned ChildNo = Opcode-OPC_MoveChild0; if (ChildNo >= N.getNumOperands()) break; // Match fails if out of range child #. N = N.getOperand(ChildNo); NodeStack.push_back(N); continue; } case OPC_MoveParent: // Pop the current node off the NodeStack. NodeStack.pop_back(); assert(!NodeStack.empty() && "Node stack imbalance!"); N = NodeStack.back(); continue; case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; case OPC_CheckChild0Same: case OPC_CheckChild1Same: case OPC_CheckChild2Same: case OPC_CheckChild3Same: if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, Opcode-OPC_CheckChild0Same)) break; continue; case OPC_CheckPatternPredicate: if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; continue; case OPC_CheckPredicate: if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, N.getNode())) break; continue; case OPC_CheckComplexPat: { unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); // If target can modify DAG during matching, keep the matching state // consistent. std::unique_ptr MSU; if (ComplexPatternFuncMutatesDAG()) - MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes, MatchScopes)); if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, RecordedNodes[RecNo].first, CPNum, RecordedNodes)) break; continue; } case OPC_CheckOpcode: if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; continue; case OPC_CheckType: if (!::CheckType(MatcherTable, MatcherIndex, N, TLI, CurDAG->getDataLayout())) break; continue; case OPC_SwitchOpcode: { unsigned CurNodeOpcode = N.getOpcode(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { // Get the size of this case. CaseSize = MatcherTable[MatcherIndex++]; if (CaseSize & 128) CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); if (CaseSize == 0) break; uint16_t Opc = MatcherTable[MatcherIndex++]; Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; // If the opcode matches, then we will execute this case. if (CurNodeOpcode == Opc) break; // Otherwise, skip over this case. MatcherIndex += CaseSize; } // If no cases matched, bail out. if (CaseSize == 0) break; // Otherwise, execute the case we found. DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " << MatcherIndex << "\n"); continue; } case OPC_SwitchType: { MVT CurNodeVT = N.getSimpleValueType(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { // Get the size of this case. CaseSize = MatcherTable[MatcherIndex++]; if (CaseSize & 128) CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); if (CaseSize == 0) break; MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) CaseVT = TLI->getPointerTy(CurDAG->getDataLayout()); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) break; // Otherwise, skip over this case. MatcherIndex += CaseSize; } // If no cases matched, bail out. if (CaseSize == 0) break; // Otherwise, execute the case we found. DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); continue; } case OPC_CheckChild0Type: case OPC_CheckChild1Type: case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, CurDAG->getDataLayout(), Opcode - OPC_CheckChild0Type)) break; continue; case OPC_CheckCondCode: if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI, CurDAG->getDataLayout())) break; continue; case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: case OPC_CheckChild4Integer: if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, Opcode-OPC_CheckChild0Integer)) break; continue; case OPC_CheckAndImm: if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; continue; case OPC_CheckOrImm: if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); // Verify that all intermediate nodes between the root and this one have // a single use. bool HasMultipleUses = false; for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) if (!NodeStack[i].hasOneUse()) { HasMultipleUses = true; break; } if (HasMultipleUses) break; // Check to see that the target thinks this is profitable to fold and that // we can fold it without inducing cycles in the graph. if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), NodeToMatch) || !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), NodeToMatch, OptLevel, true/*We validate our own chains*/)) break; continue; } case OPC_EmitInteger: { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); RecordedNodes.push_back(std::pair( CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr)); continue; } case OPC_EmitRegister: { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; RecordedNodes.push_back(std::pair( CurDAG->getRegister(RegNo, VT), nullptr)); continue; } case OPC_EmitRegister2: { // For targets w/ more than 256 register names, the register enum // values are stored in two bytes in the matcher table (just like // opcodes). MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; RegNo |= MatcherTable[MatcherIndex++] << 8; RecordedNodes.push_back(std::pair( CurDAG->getRegister(RegNo, VT), nullptr)); continue; } case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { const ConstantInt *Val=cast(Imm)->getConstantIntValue(); Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType()); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast(Imm)->getConstantFPValue(); Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch), Imm.getValueType()); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); continue; } case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1 case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2 // These are space-optimized forms of OPC_EmitMergeInputChains. assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); // Read all of the chained nodes. unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0; assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && !RecordedNodes[RecNo].first.hasOneUse()) { ChainNodesMatched.clear(); break; } // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); if (!InputChain.getNode()) break; // Failed to merge. continue; } case OPC_EmitMergeInputChains: { assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); // This node gets a list of nodes we matched in the input that have // chains. We want to token factor all of the input chains to these nodes // together. However, if any of the input chains is actually one of the // nodes matched in this pattern, then we have an intra-match reference. // Ignore these because the newly token factored chain should not refer to // the old nodes. unsigned NumChains = MatcherTable[MatcherIndex++]; assert(NumChains != 0 && "Can't TF zero chains"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); // Read all of the chained nodes. for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && !RecordedNodes[RecNo].first.hasOneUse()) { ChainNodesMatched.clear(); break; } } // If the inner loop broke out, the match fails. if (ChainNodesMatched.empty()) break; // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); if (!InputChain.getNode()) break; // Failed to merge. continue; } case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), DestPhysReg, RecordedNodes[RecNo].first, InputGlue); InputGlue = InputChain.getValue(1); continue; } case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); RecordedNodes.push_back(std::pair(Res, nullptr)); continue; } case OPC_EmitNode: case OPC_MorphNodeTo: case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2: case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: { uint16_t TargetOpc = MatcherTable[MatcherIndex++]; TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; // Get the result VT list. unsigned NumVTs; // If this is one of the compressed forms, get the number of VTs based // on the Opcode. Otherwise read the next byte from the table. if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2) NumVTs = Opcode - OPC_MorphNodeTo0; else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2) NumVTs = Opcode - OPC_EmitNode0; else NumVTs = MatcherTable[MatcherIndex++]; SmallVector VTs; for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (VT == MVT::iPTR) VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy; VTs.push_back(VT); } if (EmitNodeInfo & OPFL_Chain) VTs.push_back(MVT::Other); if (EmitNodeInfo & OPFL_GlueOutput) VTs.push_back(MVT::Glue); // This is hot code, so optimize the two most common cases of 1 and 2 // results. SDVTList VTList; if (VTs.size() == 1) VTList = CurDAG->getVTList(VTs[0]); else if (VTs.size() == 2) VTList = CurDAG->getVTList(VTs[0], VTs[1]); else VTList = CurDAG->getVTList(VTs); // Get the operand list. unsigned NumOps = MatcherTable[MatcherIndex++]; SmallVector Ops; for (unsigned i = 0; i != NumOps; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); Ops.push_back(RecordedNodes[RecNo].first); } // If there are variadic operands to add, handle them now. if (EmitNodeInfo & OPFL_VariadicInfo) { // Determine the start index to copy from. unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && "Invalid variadic node"); // Copy all of the variadic operands, not including a potential glue // input. for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); i != e; ++i) { SDValue V = NodeToMatch->getOperand(i); if (V.getValueType() == MVT::Glue) break; Ops.push_back(V); } } // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); // Create the node. SDNode *Res = nullptr; bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2); if (!IsMorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. for (unsigned i = 0, e = VTs.size(); i != e; ++i) { if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; RecordedNodes.push_back(std::pair(SDValue(Res, i), nullptr)); } } else { assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE && "NodeToMatch was removed partway through selection"); SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, SDNode *E) { auto &Chain = ChainNodesMatched; assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); }); Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } // If the node had chain/glue results, update our notion of the current // chain and glue. if (EmitNodeInfo & OPFL_GlueOutput) { InputGlue = SDValue(Res, VTs.size()-1); if (EmitNodeInfo & OPFL_Chain) InputChain = SDValue(Res, VTs.size()-2); } else if (EmitNodeInfo & OPFL_Chain) InputChain = SDValue(Res, VTs.size()-1); // If the OPFL_MemRefs glue is set on this node, slap all of the // accumulated memrefs onto it. // // FIXME: This is vastly incorrect for patterns with multiple outputs // instructions that access memory and for ComplexPatterns that match // loads. if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. const MCInstrDesc &MCID = TII->get(TargetOpc); bool mayLoad = MCID.mayLoad(); bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; for (SmallVectorImpl::const_iterator I = MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) ++NumMemRefs; } else if ((*I)->isStore()) { if (mayStore) ++NumMemRefs; } else { ++NumMemRefs; } } MachineSDNode::mmo_iterator MemRefs = MF->allocateMemRefsArray(NumMemRefs); MachineSDNode::mmo_iterator MemRefsPos = MemRefs; for (SmallVectorImpl::const_iterator I = MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) *MemRefsPos++ = *I; } else if ((*I)->isStore()) { if (mayStore) *MemRefsPos++ = *I; } else { *MemRefsPos++ = *I; } } cast(Res) ->setMemRefs(MemRefs, MemRefs + NumMemRefs); } DEBUG(dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created") << " node: "; Res->dump(CurDAG); dbgs() << "\n"); // If this was a MorphNodeTo then we're completely done! if (IsMorphNodeTo) { // Update chain uses. UpdateChains(Res, InputChain, ChainNodesMatched, true); return; } continue; } case OPC_CompleteMatch: { // The match has been completed, and any new nodes (if any) have been // created. Patch up references to the matched dag to use the newly // created nodes. unsigned NumResults = MatcherTable[MatcherIndex++]; for (unsigned i = 0; i != NumResults; ++i) { unsigned ResSlot = MatcherTable[MatcherIndex++]; if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); SDValue Res = RecordedNodes[ResSlot].first; assert(i < NodeToMatch->getNumValues() && NodeToMatch->getValueType(i) != MVT::Other && NodeToMatch->getValueType(i) != MVT::Glue && "Invalid number of results to complete!"); assert((NodeToMatch->getValueType(i) == Res.getValueType() || NodeToMatch->getValueType(i) == MVT::iPTR || Res.getValueType() == MVT::iPTR || NodeToMatch->getValueType(i).getSizeInBits() == Res.getValueSizeInBits()) && "invalid replacement"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); } // Update chain uses. UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false); // If the root node defines glue, we need to update it to the glue result. // TODO: This never happens in our tests and I think it can be removed / // replaced with an assert, but if we do it this the way the change is // NFC. if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == MVT::Glue && InputGlue.getNode()) CurDAG->ReplaceAllUsesOfValueWith( SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); CurDAG->RemoveDeadNode(NodeToMatch); return; } } // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); ++NumDAGIselRetries; while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); return; } // Restore the interpreter state back to the point where the scope was // formed. MatchScope &LastScope = MatchScopes.back(); RecordedNodes.resize(LastScope.NumRecordedNodes); NodeStack.clear(); NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); N = NodeStack.back(); if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); InputChain = LastScope.InputChain; InputGlue = LastScope.InputGlue; if (!LastScope.HasChainNodesMatched) ChainNodesMatched.clear(); // Check to see what the offset is at the new MatcherIndex. If it is zero // we have reached the end of this scope, otherwise we have another child // in the current scope to try. unsigned NumToSkip = MatcherTable[MatcherIndex++]; if (NumToSkip & 128) NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); // If we have another child in this scope to match, update FailIndex and // try it. if (NumToSkip != 0) { LastScope.FailIndex = MatcherIndex+NumToSkip; break; } // End of this scope, pop it and try the next child in the containing // scope. MatchScopes.pop_back(); } } } void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot select: "; if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = cast(N->getOperand(HasInputChain))->getZExtValue(); if (iid < Intrinsic::num_intrinsics) Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None); else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) Msg << "target intrinsic %" << TII->getName(iid); else Msg << "unknown intrinsic #" << iid; } report_fatal_error(Msg.str()); } char SelectionDAGISel::ID = 0; Index: projects/clang400-import/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp (revision 313894) @@ -1,1052 +1,1055 @@ //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the Thin Link Time Optimization library. This library is // intended to be used by linker to optimize code at link time. // //===----------------------------------------------------------------------===// #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h" #ifdef HAVE_LLVM_REVISION #include "LLVMLTORevision.h" #endif #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IRReader/IRReader.h" #include "llvm/LTO/LTO.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include using namespace llvm; #define DEBUG_TYPE "thinlto" namespace llvm { // Flags -discard-value-names, defined in LTOCodeGenerator.cpp extern cl::opt LTODiscardValueNames; extern cl::opt LTORemarksFilename; extern cl::opt LTOPassRemarksWithHotness; } namespace { static cl::opt ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency())); Expected> setupOptimizationRemarks(LLVMContext &Ctx, int Count) { if (LTOPassRemarksWithHotness) Ctx.setDiagnosticHotnessRequested(true); if (LTORemarksFilename.empty()) return nullptr; std::string FileName = LTORemarksFilename + ".thin." + llvm::utostr(Count) + ".yaml"; std::error_code EC; auto DiagnosticOutputFile = llvm::make_unique(FileName, EC, sys::fs::F_None); if (EC) return errorCodeToError(EC); Ctx.setDiagnosticsOutputFile( llvm::make_unique(DiagnosticOutputFile->os())); DiagnosticOutputFile->keep(); return std::move(DiagnosticOutputFile); } // Simple helper to save temporary files for debug. static void saveTempBitcode(const Module &TheModule, StringRef TempDir, unsigned count, StringRef Suffix) { if (TempDir.empty()) return; // User asked to save temps, let dump the bitcode file after import. std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str(); std::error_code EC; raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true); } static const GlobalValueSummary * getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { // If there is any strong definition anywhere, get it. auto StrongDefForLinker = llvm::find_if( GVSummaryList, [](const std::unique_ptr &Summary) { auto Linkage = Summary->linkage(); return !GlobalValue::isAvailableExternallyLinkage(Linkage) && !GlobalValue::isWeakForLinker(Linkage); }); if (StrongDefForLinker != GVSummaryList.end()) return StrongDefForLinker->get(); // Get the first *linker visible* definition for this global in the summary // list. auto FirstDefForLinker = llvm::find_if( GVSummaryList, [](const std::unique_ptr &Summary) { auto Linkage = Summary->linkage(); return !GlobalValue::isAvailableExternallyLinkage(Linkage); }); // Extern templates can be emitted as available_externally. if (FirstDefForLinker == GVSummaryList.end()) return nullptr; return FirstDefForLinker->get(); } // Populate map of GUID to the prevailing copy for any multiply defined // symbols. Currently assume first copy is prevailing, or any strong // definition. Can be refined with Linker information in the future. static void computePrevailingCopies( const ModuleSummaryIndex &Index, DenseMap &PrevailingCopy) { auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) { return GVSummaryList.size() > 1; }; for (auto &I : Index) { if (HasMultipleCopies(I.second)) PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second); } } static StringMap -generateModuleMap(const std::vector &Modules) { +generateModuleMap(const std::vector &Modules) { StringMap ModuleMap; for (auto &ModuleBuffer : Modules) { assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == ModuleMap.end() && "Expect unique Buffer Identifier"); - ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; + ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer(); } return ModuleMap; } static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { if (renameModuleForThinLTO(TheModule, Index)) report_fatal_error("renameModuleForThinLTO failed"); } static std::unique_ptr loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, bool Lazy, bool IsImporting) { SMDiagnostic Err; Expected> ModuleOrErr = Lazy ? getLazyBitcodeModule(Buffer, Context, /* ShouldLazyLoadMetadata */ true, IsImporting) : parseBitcodeFile(Buffer, Context); if (!ModuleOrErr) { handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error, EIB.message()); Err.print("ThinLTO", errs()); }); report_fatal_error("Can't load module, abort."); } return std::move(ModuleOrErr.get()); } static void crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, StringMap &ModuleMap, const FunctionImporter::ImportMapTy &ImportList) { auto Loader = [&](StringRef Identifier) { return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(), /*Lazy=*/true, /*IsImporting*/ true); }; FunctionImporter Importer(Index, Loader); Expected Result = Importer.importFunctions(TheModule, ImportList); if (!Result) { handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) { SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(), SourceMgr::DK_Error, EIB.message()); Err.print("ThinLTO", errs()); }); report_fatal_error("importFunctions failed"); } } static void optimizeModule(Module &TheModule, TargetMachine &TM, unsigned OptLevel) { // Populate the PassManager PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); PMB.Inliner = createFunctionInliningPass(); // FIXME: should get it from the bitcode? PMB.OptLevel = OptLevel; PMB.LoopVectorize = true; PMB.SLPVectorize = true; PMB.VerifyInput = true; PMB.VerifyOutput = false; legacy::PassManager PM; // Add the TTI (required to inform the vectorizer about register size for // instance) PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); // Add optimizations PMB.populateThinLTOPassManager(PM); PM.run(TheModule); } // Convert the PreservedSymbols map from "Name" based to "GUID" based. static DenseSet computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, const Triple &TheTriple) { DenseSet GUIDPreservedSymbols(PreservedSymbols.size()); for (auto &Entry : PreservedSymbols) { StringRef Name = Entry.first(); if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') Name = Name.drop_front(); GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); } return GUIDPreservedSymbols; } std::unique_ptr codegenModule(Module &TheModule, TargetMachine &TM) { SmallVector OutputBuffer; // CodeGen { raw_svector_ostream OS(OutputBuffer); legacy::PassManager PM; // If the bitcode files contain ARC code and were compiled with optimization, // the ObjCARCContractPass must be run, so do it unconditionally here. PM.add(createObjCARCContractPass()); // Setup the codegen now. if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile, /* DisableVerify */ true)) report_fatal_error("Failed to setup codegen"); // Run codegen now. resulting binary is in OutputBuffer. PM.run(TheModule); } return make_unique(std::move(OutputBuffer)); } /// Manage caching for a single Module. class ModuleCacheEntry { SmallString<128> EntryPath; public: // Create a cache entry. This compute a unique hash for the Module considering // the current list of export/import, and offer an interface to query to // access the content in the cache. ModuleCacheEntry( StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, const GVSummaryMapTy &DefinedFunctions, const DenseSet &PreservedSymbols, unsigned OptLevel, const TargetMachineBuilder &TMBuilder) { if (CachePath.empty()) return; if (!Index.modulePaths().count(ModuleID)) // The module does not have an entry, it can't have a hash at all return; // Compute the unique hash for this entry // This is based on the current compiler version, the module itself, the // export list, the hash for every single module in the import list, the // list of ResolvedODR for the module, and the list of preserved symbols. // Include the hash for the current module auto ModHash = Index.getModuleHash(ModuleID); if (all_of(ModHash, [](uint32_t V) { return V == 0; })) // No hash entry, no caching! return; SHA1 Hasher; // Include the parts of the LTO configuration that affect code generation. auto AddString = [&](StringRef Str) { Hasher.update(Str); Hasher.update(ArrayRef{0}); }; auto AddUnsigned = [&](unsigned I) { uint8_t Data[4]; Data[0] = I; Data[1] = I >> 8; Data[2] = I >> 16; Data[3] = I >> 24; Hasher.update(ArrayRef{Data, 4}); }; // Start with the compiler revision Hasher.update(LLVM_VERSION_STRING); #ifdef HAVE_LLVM_REVISION Hasher.update(LLVM_REVISION); #endif // Hash the optimization level and the target machine settings. AddString(TMBuilder.MCpu); // FIXME: Hash more of Options. For now all clients initialize Options from // command-line flags (which is unsupported in production), but may set // RelaxELFRelocations. The clang driver can also pass FunctionSections, // DataSections and DebuggerTuning via command line flags. AddUnsigned(TMBuilder.Options.RelaxELFRelocations); AddUnsigned(TMBuilder.Options.FunctionSections); AddUnsigned(TMBuilder.Options.DataSections); AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning); AddString(TMBuilder.MAttr); if (TMBuilder.RelocModel) AddUnsigned(*TMBuilder.RelocModel); AddUnsigned(TMBuilder.CGOptLevel); AddUnsigned(OptLevel); Hasher.update(ArrayRef((uint8_t *)&ModHash[0], sizeof(ModHash))); for (auto F : ExportList) // The export list can impact the internalization, be conservative here Hasher.update(ArrayRef((uint8_t *)&F, sizeof(F))); // Include the hash for every module we import functions from for (auto &Entry : ImportList) { auto ModHash = Index.getModuleHash(Entry.first()); Hasher.update(ArrayRef((uint8_t *)&ModHash[0], sizeof(ModHash))); } // Include the hash for the resolved ODR. for (auto &Entry : ResolvedODR) { Hasher.update(ArrayRef((const uint8_t *)&Entry.first, sizeof(GlobalValue::GUID))); Hasher.update(ArrayRef((const uint8_t *)&Entry.second, sizeof(GlobalValue::LinkageTypes))); } // Include the hash for the preserved symbols. for (auto &Entry : PreservedSymbols) { if (DefinedFunctions.count(Entry)) Hasher.update( ArrayRef((const uint8_t *)&Entry, sizeof(GlobalValue::GUID))); } sys::path::append(EntryPath, CachePath, toHex(Hasher.result())); } // Access the path to this entry in the cache. StringRef getEntryPath() { return EntryPath; } // Try loading the buffer for this cache entry. ErrorOr> tryLoadingBuffer() { if (EntryPath.empty()) return std::error_code(); return MemoryBuffer::getFile(EntryPath); } // Cache the Produced object file void write(const MemoryBuffer &OutputBuffer) { if (EntryPath.empty()) return; // Write to a temporary to avoid race condition SmallString<128> TempFilename; int TempFD; std::error_code EC = sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename); if (EC) { errs() << "Error: " << EC.message() << "\n"; report_fatal_error("ThinLTO: Can't get a temporary file"); } { raw_fd_ostream OS(TempFD, /* ShouldClose */ true); OS << OutputBuffer.getBuffer(); } // Rename to final destination (hopefully race condition won't matter here) EC = sys::fs::rename(TempFilename, EntryPath); if (EC) { sys::fs::remove(TempFilename); raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None); if (EC) report_fatal_error(Twine("Failed to open ") + EntryPath + " to save cached entry\n"); OS << OutputBuffer.getBuffer(); } } }; static std::unique_ptr ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, StringMap &ModuleMap, TargetMachine &TM, const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const DenseSet &GUIDPreservedSymbols, const GVSummaryMapTy &DefinedGlobals, const ThinLTOCodeGenerator::CachingOptions &CacheOptions, bool DisableCodeGen, StringRef SaveTempsDir, unsigned OptLevel, unsigned count) { // "Benchmark"-like optimization: single-source case bool SingleModule = (ModuleMap.size() == 1); if (!SingleModule) { promoteModule(TheModule, Index); // Apply summary-based LinkOnce/Weak resolution decisions. thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals); // Save temps: after promotion. saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc"); } // Be friendly and don't nuke totally the module when the client didn't // supply anything to preserve. if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) { // Apply summary-based internalization decisions. thinLTOInternalizeModule(TheModule, DefinedGlobals); } // Save internalized bitcode saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc"); if (!SingleModule) { crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); // Save temps: after cross-module import. saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } optimizeModule(TheModule, TM, OptLevel); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); if (DisableCodeGen) { // Configured to stop before CodeGen, serialize the bitcode and return. SmallVector OutputBuffer; { raw_svector_ostream OS(OutputBuffer); ProfileSummaryInfo PSI(TheModule); auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr); WriteBitcodeToFile(&TheModule, OS, true, &Index); } return make_unique(std::move(OutputBuffer)); } return codegenModule(TheModule, TM); } /// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map /// for caching, and in the \p Index for application during the ThinLTO /// backends. This is needed for correctness for exported symbols (ensure /// at least one copy kept) and a compile-time optimization (to drop duplicate /// copies when possible). static void resolveWeakForLinkerInIndex( ModuleSummaryIndex &Index, StringMap> &ResolvedODR) { DenseMap PrevailingCopy; computePrevailingCopies(Index, PrevailingCopy); auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { const auto &Prevailing = PrevailingCopy.find(GUID); // Not in map means that there was only one copy, which must be prevailing. if (Prevailing == PrevailingCopy.end()) return true; return Prevailing->second == S; }; auto recordNewLinkage = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID, GlobalValue::LinkageTypes NewLinkage) { ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; }; thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage); } // Initialize the TargetMachine builder for a given Triple static void initTMBuilder(TargetMachineBuilder &TMBuilder, const Triple &TheTriple) { // Set a default CPU for Darwin triples (copied from LTOCodeGenerator). // FIXME this looks pretty terrible... if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) { if (TheTriple.getArch() == llvm::Triple::x86_64) TMBuilder.MCpu = "core2"; else if (TheTriple.getArch() == llvm::Triple::x86) TMBuilder.MCpu = "yonah"; else if (TheTriple.getArch() == llvm::Triple::aarch64) TMBuilder.MCpu = "cyclone"; } TMBuilder.TheTriple = std::move(TheTriple); } } // end anonymous namespace void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { - MemoryBufferRef Buffer(Data, Identifier); + ThinLTOBuffer Buffer(Data, Identifier); if (Modules.empty()) { // First module added, so initialize the triple and some options LLVMContext Context; StringRef TripleStr; - ErrorOr TripleOrErr = - expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer)); + ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); if (TripleOrErr) TripleStr = *TripleOrErr; Triple TheTriple(TripleStr); initTMBuilder(TMBuilder, Triple(TheTriple)); } #ifndef NDEBUG else { LLVMContext Context; StringRef TripleStr; - ErrorOr TripleOrErr = - expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer)); + ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); if (TripleOrErr) TripleStr = *TripleOrErr; assert(TMBuilder.TheTriple.str() == TripleStr && "ThinLTO modules with different triple not supported"); } #endif Modules.push_back(Buffer); } void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { PreservedSymbols.insert(Name); } void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { // FIXME: At the moment, we don't take advantage of this extra information, // we're conservatively considering cross-references as preserved. // CrossReferencedSymbols.insert(Name); PreservedSymbols.insert(Name); } // TargetMachine factory std::unique_ptr TargetMachineBuilder::create() const { std::string ErrMsg; const Target *TheTarget = TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg); if (!TheTarget) { report_fatal_error("Can't load target for this Triple: " + ErrMsg); } // Use MAttr as the default set of features. SubtargetFeatures Features(MAttr); Features.getDefaultSubtargetFeatures(TheTriple); std::string FeatureStr = Features.getString(); return std::unique_ptr(TheTarget->createTargetMachine( TheTriple.str(), MCpu, FeatureStr, Options, RelocModel, CodeModel::Default, CGOptLevel)); } /** * Produce the combined summary index from all the bitcode files: * "thin-link". */ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { std::unique_ptr CombinedIndex; uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { Expected> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(ModuleBuffer); + object::ModuleSummaryIndexObjectFile::create( + ModuleBuffer.getMemBuffer()); if (!ObjOrErr) { // FIXME diagnose logAllUnhandledErrors( ObjOrErr.takeError(), errs(), "error: can't create ModuleSummaryIndexObjectFile for buffer: "); return nullptr; } auto Index = (*ObjOrErr)->takeIndex(); if (CombinedIndex) { CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); } else { CombinedIndex = std::move(Index); } } return CombinedIndex; } /** * Perform promotion and renaming of exported internal functions. * Index is updated to reflect linkage changes from weak resolution. */ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index) { auto ModuleCount = Index.modulePaths().size(); auto ModuleIdentifier = TheModule.getModuleIdentifier(); // Collect for each module the list of function it defines (GUID -> Summary). StringMap ModuleToDefinedGVSummaries; Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists, &DeadSymbols); // Resolve LinkOnce/Weak symbols. StringMap> ResolvedODR; resolveWeakForLinkerInIndex(Index, ResolvedODR); thinLTOResolveWeakForLinkerModule( TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); // Promote the exported values in the index, so that they are promoted // in the module. auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || GUIDPreservedSymbols.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(Index, isExported); promoteModule(TheModule, Index); } /** * Perform cross-module importing for the module identified by ModuleIdentifier. */ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, ModuleSummaryIndex &Index) { auto ModuleMap = generateModuleMap(Modules); auto ModuleCount = Index.modulePaths().size(); // Collect for each module the list of function it defines (GUID -> Summary). StringMap ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists, &DeadSymbols); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); } /** * Compute the list of summaries needed for importing into module. */ void ThinLTOCodeGenerator::gatherImportedSummariesForModule( StringRef ModulePath, ModuleSummaryIndex &Index, std::map &ModuleToSummariesForIndex) { auto ModuleCount = Index.modulePaths().size(); // Collect for each module the list of function it defines (GUID -> Summary). StringMap ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists); llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, ImportLists[ModulePath], ModuleToSummariesForIndex); } /** * Emit the list of files needed for importing into module. */ void ThinLTOCodeGenerator::emitImports(StringRef ModulePath, StringRef OutputName, ModuleSummaryIndex &Index) { auto ModuleCount = Index.modulePaths().size(); // Collect for each module the list of function it defines (GUID -> Summary). StringMap ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists); std::error_code EC; if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath]))) report_fatal_error(Twine("Failed to open ") + OutputName + " to save imports lists\n"); } /** * Perform internalization. Index is updated to reflect linkage changes. */ void ThinLTOCodeGenerator::internalize(Module &TheModule, ModuleSummaryIndex &Index) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); auto ModuleCount = Index.modulePaths().size(); auto ModuleIdentifier = TheModule.getModuleIdentifier(); // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Collect for each module the list of function it defines (GUID -> Summary). StringMap ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Compute "dead" symbols, we don't want to import/export these! auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists, &DeadSymbols); auto &ExportList = ExportLists[ModuleIdentifier]; // Be friendly and don't nuke totally the module when the client didn't // supply anything to preserve. if (ExportList.empty() && GUIDPreservedSymbols.empty()) return; // Internalization auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || GUIDPreservedSymbols.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(Index, isExported); thinLTOInternalizeModule(TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); } /** * Perform post-importing ThinLTO optimizations. */ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); // Optimize now optimizeModule(TheModule, *TMBuilder.create(), OptLevel); } /** * Perform ThinLTO CodeGen. */ std::unique_ptr ThinLTOCodeGenerator::codegen(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); return codegenModule(TheModule, *TMBuilder.create()); } /// Write out the generated object file, either from CacheEntryPath or from /// OutputBuffer, preferring hard-link when possible. /// Returns the path to the generated file in SavedObjectsDirectoryPath. static std::string writeGeneratedObject(int count, StringRef CacheEntryPath, StringRef SavedObjectsDirectoryPath, const MemoryBuffer &OutputBuffer) { SmallString<128> OutputPath(SavedObjectsDirectoryPath); llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o"); OutputPath.c_str(); // Ensure the string is null terminated. if (sys::fs::exists(OutputPath)) sys::fs::remove(OutputPath); // We don't return a memory buffer to the linker, just a list of files. if (!CacheEntryPath.empty()) { // Cache is enabled, hard-link the entry (or copy if hard-link fails). auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath); if (!Err) return OutputPath.str(); // Hard linking failed, try to copy. Err = sys::fs::copy_file(CacheEntryPath, OutputPath); if (!Err) return OutputPath.str(); // Copy failed (could be because the CacheEntry was removed from the cache // in the meantime by another process), fall back and try to write down the // buffer to the output. errs() << "error: can't link or copy from cached entry '" << CacheEntryPath << "' to '" << OutputPath << "'\n"; } // No cache entry, just write out the buffer. std::error_code Err; raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None); if (Err) report_fatal_error("Can't open output '" + OutputPath + "'\n"); OS << OutputBuffer.getBuffer(); return OutputPath.str(); } // Main entry point for the ThinLTO processing void ThinLTOCodeGenerator::run() { // Prepare the resulting object vector assert(ProducedBinaries.empty() && "The generator should not be reused"); if (SavedObjectsDirectoryPath.empty()) ProducedBinaries.resize(Modules.size()); else { sys::fs::create_directories(SavedObjectsDirectoryPath); bool IsDir; sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); if (!IsDir) report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); ProducedBinaryFiles.resize(Modules.size()); } if (CodeGenOnly) { // Perform only parallel codegen and return. ThreadPool Pool; int count = 0; for (auto &ModuleBuffer : Modules) { Pool.async([&](int count) { LLVMContext Context; Context.setDiscardValueNames(LTODiscardValueNames); // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false, - /*IsImporting*/ false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // CodeGen auto OutputBuffer = codegen(*TheModule); if (SavedObjectsDirectoryPath.empty()) ProducedBinaries[count] = std::move(OutputBuffer); else ProducedBinaryFiles[count] = writeGeneratedObject( count, "", SavedObjectsDirectoryPath, *OutputBuffer); }, count++); } return; } // Sequential linking phase auto Index = linkCombinedIndex(); // Save temps: index. if (!SaveTempsDir.empty()) { auto SaveTempPath = SaveTempsDir + "index.bc"; std::error_code EC; raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); WriteIndexToFile(*Index, OS); } // Prepare the module map. auto ModuleMap = generateModuleMap(Modules); auto ModuleCount = Modules.size(); // Collect for each module the list of function it defines (GUID -> Summary). StringMap ModuleToDefinedGVSummaries(ModuleCount); Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Convert the preserved symbols set from string to GUID, this is needed for // computing the caching hash and the internalization. auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Compute "dead" symbols, we don't want to import/export these! auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols); // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists, &DeadSymbols); // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. // FIXME: we should be able to compute the caching hash for the entry based // on the index, and nuke this map. StringMap> ResolvedODR; // Resolve LinkOnce/Weak symbols, this has to be computed early because it // impacts the caching. resolveWeakForLinkerInIndex(*Index, ResolvedODR); auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || GUIDPreservedSymbols.count(GUID); }; // Use global summary-based analysis to identify symbols that can be // internalized (because they aren't exported or preserved as per callback). // Changes are made in the index, consumed in the ThinLTO backends. thinLTOInternalizeAndPromoteInIndex(*Index, isExported); // Make sure that every module has an entry in the ExportLists and // ResolvedODR maps to enable threaded access to these maps below. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { ExportLists[DefinedGVSummaries.first()]; ResolvedODR[DefinedGVSummaries.first()]; } // Compute the ordering we will process the inputs: the rough heuristic here // is to sort them per size so that the largest module get schedule as soon as // possible. This is purely a compile-time optimization. std::vector ModulesOrdering; ModulesOrdering.resize(Modules.size()); std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), [&](int LeftIndex, int RightIndex) { - auto LSize = Modules[LeftIndex].getBufferSize(); - auto RSize = Modules[RightIndex].getBufferSize(); + auto LSize = Modules[LeftIndex].getBuffer().size(); + auto RSize = Modules[RightIndex].getBuffer().size(); return LSize > RSize; }); // Parallel optimizer + codegen { ThreadPool Pool(ThreadCount); for (auto IndexCount : ModulesOrdering) { auto &ModuleBuffer = Modules[IndexCount]; Pool.async([&](int count) { auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier(); auto &ExportList = ExportLists[ModuleIdentifier]; auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier]; // The module may be cached, this helps handling it. ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, ImportLists[ModuleIdentifier], ExportList, ResolvedODR[ModuleIdentifier], DefinedFunctions, GUIDPreservedSymbols, OptLevel, TMBuilder); auto CacheEntryPath = CacheEntry.getEntryPath(); { auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" << CacheEntryPath << "' for buffer " << count << " " << ModuleIdentifier << "\n"); if (ErrOrBuffer) { // Cache Hit! if (SavedObjectsDirectoryPath.empty()) ProducedBinaries[count] = std::move(ErrOrBuffer.get()); else ProducedBinaryFiles[count] = writeGeneratedObject( count, CacheEntryPath, SavedObjectsDirectoryPath, *ErrOrBuffer.get()); return; } } LLVMContext Context; Context.setDiscardValueNames(LTODiscardValueNames); Context.enableDebugTypeODRUniquing(); auto DiagFileOrErr = setupOptimizationRemarks(Context, count); if (!DiagFileOrErr) { errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; report_fatal_error("ThinLTO: Can't get an output file for the " "remarks"); } // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false, - /*IsImporting*/ false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // Save temps: original file. saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); auto &ImportList = ImportLists[ModuleIdentifier]; // Run the main process now, and generates a binary auto OutputBuffer = ProcessThinLTOModule( *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, ExportList, GUIDPreservedSymbols, ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, DisableCodeGen, SaveTempsDir, OptLevel, count); // Commit to the cache (if enabled) CacheEntry.write(*OutputBuffer); if (SavedObjectsDirectoryPath.empty()) { // We need to generated a memory buffer for the linker. if (!CacheEntryPath.empty()) { // Cache is enabled, reload from the cache // We do this to lower memory pressuree: the buffer is on the heap // and releasing it frees memory that can be used for the next input // file. The final binary link will read from the VFS cache // (hopefully!) or from disk if the memory pressure wasn't too high. auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); if (auto EC = ReloadedBufferOrErr.getError()) { // On error, keeping the preexisting buffer and printing a // diagnostic is more friendly than just crashing. errs() << "error: can't reload cached file '" << CacheEntryPath << "': " << EC.message() << "\n"; } else { OutputBuffer = std::move(*ReloadedBufferOrErr); } } ProducedBinaries[count] = std::move(OutputBuffer); return; } ProducedBinaryFiles[count] = writeGeneratedObject( count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer); }, IndexCount); } } CachePruning(CacheOptions.Path) .setPruningInterval(std::chrono::seconds(CacheOptions.PruningInterval)) .setEntryExpiration(std::chrono::seconds(CacheOptions.Expiration)) .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace) .prune(); // If statistics were requested, print them out now. if (llvm::AreStatisticsEnabled()) llvm::PrintStatistics(); } Index: projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td (revision 313894) @@ -1,337 +1,337 @@ //=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This describes the calling conventions for AArch64 architecture. // //===----------------------------------------------------------------------===// /// CCIfAlign - Match of the original alignment of the arg class CCIfAlign : CCIf; /// CCIfBigEndian - Match only if we're in big endian mode. class CCIfBigEndian : CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// def CC_AArch64_AAPCS : CallingConv<[ CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, // Big endian vectors must be passed as if they were 1-element vectors so that // their lanes are in a consistent order. CCIfBigEndian>>, CCIfBigEndian>>, // An SRet is passed in X8, not X0 like a normal pointer parameter. CCIfSRet>>, // Put ByVal arguments directly on the stack. Minimum size and alignment of a // slot is 64-bit. CCIfByVal>, // The 'nest' parameter, if any, is passed in X18. // Darwin uses X18 as the platform register and hence 'nest' isn't currently // supported there. CCIfNest>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, CCIfConsecutiveRegs>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. CCIfType<[i64], CCIfSplit>>, // i128 is split to two i64s, and its stack alignment is 16 bytes. CCIfType<[i64], CCIfSplit>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>, CCIfType<[i32, f32], CCAssignToStack<8, 8>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], CCAssignToStack<8, 8>>, CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], CCAssignToStack<16, 16>> ]>; def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, - CCIfSwiftError>>, + CCIfSwiftError>>, // Big endian vectors must be passed as if they were 1-element vectors so that // their lanes are in a consistent order. CCIfBigEndian>>, CCIfBigEndian>>, CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; // Darwin uses a calling convention which differs in only two ways // from the standard one at this level: // + i128s (i.e. split i64s) don't need even registers. // + Stack slots are sized as needed rather than being at least 64-bit. def CC_AArch64_DarwinPCS : CallingConv<[ CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, // An SRet is passed in X8, not X0 like a normal pointer parameter. CCIfSRet>>, // Put ByVal arguments directly on the stack. Minimum size and alignment of a // slot is 64-bit. CCIfByVal>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is passed in X19. - CCIfSwiftError>>, + // A SwiftError is passed in X21. + CCIfSwiftError>>, CCIfConsecutiveRegs>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. CCIfType<[i64], CCIfSplit>>, // i128 is split to two i64s, and its stack alignment is 16 bytes. CCIfType<[i64], CCIfSplit>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], CCAssignToStack<16, 16>> ]>; def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, CCIfConsecutiveRegs>, // Handle all scalar types as either i64 or f64. CCIfType<[i8, i16, i32], CCPromoteToType>, CCIfType<[f16, f32], CCPromoteToType>, // Everything is on the stack. // i128 is split to two i64s, and its stack alignment is 16 bytes. CCIfType<[i64], CCIfSplit>>, CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], CCAssignToStack<16, 16>> ]>; // The WebKit_JS calling convention only passes the first argument (the callee) // in register and the remaining arguments on stack. We allow 32bit stack slots, // so that WebKit can write partial values in the stack and define the other // 32bit quantity as undef. def CC_AArch64_WebKit_JS : CallingConv<[ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, // Pass the remaining arguments on the stack instead. CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; def RetCC_AArch64_WebKit_JS : CallingConv<[ CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; //===----------------------------------------------------------------------===// // ARM64 Calling Convention for GHC //===----------------------------------------------------------------------===// // This calling convention is specific to the Glasgow Haskell Compiler. // The only documentation is the GHC source code, specifically the C header // file: // // https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h // // which defines the registers for the Spineless Tagless G-Machine (STG) that // GHC uses to implement lazy evaluation. The generic STG machine has a set of // registers which are mapped to appropriate set of architecture specific // registers for each CPU architecture. // // The STG Machine is documented here: // // https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode // // The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI // register mapping". def CC_AArch64_GHC : CallingConv<[ CCIfType<[iPTR], CCBitConvertToType>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, // Promote i8/i16/i32 arguments to i64. CCIfType<[i8, i16, i32], CCPromoteToType>, // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> ]>; // FIXME: LR is only callee-saved in the sense that *we* preserve it and are // presumably a callee to someone. External functions may not do so, but this // is currently safe since BL has LR as an implicit-def and what happens after a // tail call doesn't matter. // // It would be better to model its preservation semantics properly (create a // vreg on entry, use it in RET & tail call generation; make that vreg def if we // end up saving LR as part of a call frame). Watch this space... def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, D8, D9, D10, D11, D12, D13, D14, D15)>; // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; // only the resulting RegMask is used; the SaveList is ignored // // (For generic ARM 64-bit ABI code, clang will not generate constructors or // destructors with 'this' returns, so this RegMask will not be used in that // case) def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>; + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the // fast path for calculation, but other registers except X0 (argument/return) // and LR (it is a call, after all) are preserved. def CSR_AArch64_TLS_Darwin : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), FP, (sequence "Q%u", 0, 31))>; // We can only handle a register pair with adjacent registers, the register pair // should belong to the same class as well. Since the access function on the // fast path calls a function that follows CSR_AArch64_TLS_Darwin, // CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. def CSR_AArch64_CXX_TLS_Darwin : CalleeSavedRegs<(add CSR_AArch64_AAPCS, (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), (sequence "D%u", 0, 31))>; // CSRs that are handled by prologue, epilogue. def CSR_AArch64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add LR, FP)>; // CSRs that are handled explicitly via copies. def CSR_AArch64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>; // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. def CSR_AArch64_TLS_ELF : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, (sequence "Q%u", 0, 31))>; def CSR_AArch64_AllRegs : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, (sequence "X%u", 0, 28), FP, LR, SP, (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), (sequence "Q%u", 0, 31))>; def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, (sequence "X%u", 9, 15))>; Index: projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (revision 313894) @@ -1,10715 +1,10716 @@ //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the AArch64TargetLowering class. // //===----------------------------------------------------------------------===// #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "aarch64-lower" STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); static cl::opt EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, cl::desc("Allow AArch64 SLI/SRI formation"), cl::init(false)); // FIXME: The necessary dtprel relocations don't seem to be supported // well in the GNU bfd and gold linkers at the moment. Therefore, by // default, for now, fall back to GeneralDynamic code generation. cl::opt EnableAArch64ELFLocalDynamicTLSGeneration( "aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so // we have to make something up. Arbitrarily, choose ZeroOrOne. setBooleanContents(ZeroOrOneBooleanContent); // When comparing vectors the result sets the different elements in the // vector to all-one or all-zero. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Set up the register classes. addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); if (Subtarget->hasFPARMv8()) { addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); } if (Subtarget->hasNEON()) { addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); // Someone set us up the NEON. addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); addDRTypeForNEON(MVT::v4i16); addDRTypeForNEON(MVT::v2i32); addDRTypeForNEON(MVT::v1i64); addDRTypeForNEON(MVT::v1f64); addDRTypeForNEON(MVT::v4f16); addQRTypeForNEON(MVT::v4f32); addQRTypeForNEON(MVT::v2f64); addQRTypeForNEON(MVT::v16i8); addQRTypeForNEON(MVT::v8i16); addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); addQRTypeForNEON(MVT::v8f16); } // Compute derived properties from the register classes computeRegisterProperties(Subtarget->getRegisterInfo()); // Provide all sorts of operation actions setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::SETCC, MVT::i32, Custom); setOperationAction(ISD::SETCC, MVT::i64, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f80, Expand); // Custom lowering hooks are needed for XOR // to fold it into CSINC/CSINV. setOperationAction(ISD::XOR, MVT::i32, Custom); setOperationAction(ISD::XOR, MVT::i64, Custom); // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. setOperationAction(ISD::FABS, MVT::f128, Expand); setOperationAction(ISD::FADD, MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); setOperationAction(ISD::FCOS, MVT::f128, Expand); setOperationAction(ISD::FDIV, MVT::f128, Custom); setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::FMUL, MVT::f128, Custom); setOperationAction(ISD::FNEG, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FSUB, MVT::f128, Custom); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); setOperationAction(ISD::SETCC, MVT::f128, Custom); setOperationAction(ISD::BR_CC, MVT::f128, Custom); setOperationAction(ISD::SELECT, MVT::f128, Custom); setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); // Lowering for many of the conversions is actually specified by the non-f128 // type. The LowerXXX function will be trivial when f128 isn't involved. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); // Variable arguments. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); // Variable-sized objects. setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); // Constant pool entries setOperationAction(ISD::ConstantPool, MVT::i64, Custom); // BlockAddress setOperationAction(ISD::BlockAddress, MVT::i64, Custom); // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); setOperationAction(ISD::SUBC, MVT::i32, Custom); setOperationAction(ISD::SUBE, MVT::i32, Custom); setOperationAction(ISD::ADDC, MVT::i64, Custom); setOperationAction(ISD::ADDE, MVT::i64, Custom); setOperationAction(ISD::SUBC, MVT::i64, Custom); setOperationAction(ISD::SUBE, MVT::i64, Custom); // AArch64 lacks both left-rotate and popcount instructions. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } // AArch64 doesn't have {U|S}MUL_LOHI. setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); // Custom lower Add/Sub/Mul with overflow. setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction(ISD::SADDO, MVT::i64, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::UADDO, MVT::i64, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::SSUBO, MVT::i64, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); setOperationAction(ISD::SMULO, MVT::i64, Custom); setOperationAction(ISD::UMULO, MVT::i32, Custom); setOperationAction(ISD::UMULO, MVT::i64, Custom); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // f16 is a storage-only type, always promote it to f32. setOperationAction(ISD::SETCC, MVT::f16, Promote); setOperationAction(ISD::BR_CC, MVT::f16, Promote); setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); setOperationAction(ISD::SELECT, MVT::f16, Promote); setOperationAction(ISD::FADD, MVT::f16, Promote); setOperationAction(ISD::FSUB, MVT::f16, Promote); setOperationAction(ISD::FMUL, MVT::f16, Promote); setOperationAction(ISD::FDIV, MVT::f16, Promote); setOperationAction(ISD::FREM, MVT::f16, Promote); setOperationAction(ISD::FMA, MVT::f16, Promote); setOperationAction(ISD::FNEG, MVT::f16, Promote); setOperationAction(ISD::FABS, MVT::f16, Promote); setOperationAction(ISD::FCEIL, MVT::f16, Promote); setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); setOperationAction(ISD::FCOS, MVT::f16, Promote); setOperationAction(ISD::FFLOOR, MVT::f16, Promote); setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); setOperationAction(ISD::FPOW, MVT::f16, Promote); setOperationAction(ISD::FPOWI, MVT::f16, Promote); setOperationAction(ISD::FRINT, MVT::f16, Promote); setOperationAction(ISD::FSIN, MVT::f16, Promote); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); setOperationAction(ISD::FSQRT, MVT::f16, Promote); setOperationAction(ISD::FEXP, MVT::f16, Promote); setOperationAction(ISD::FEXP2, MVT::f16, Promote); setOperationAction(ISD::FLOG, MVT::f16, Promote); setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::FLOG10, MVT::f16, Promote); setOperationAction(ISD::FROUND, MVT::f16, Promote); setOperationAction(ISD::FTRUNC, MVT::f16, Promote); setOperationAction(ISD::FMINNUM, MVT::f16, Promote); setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); setOperationAction(ISD::FMINNAN, MVT::f16, Promote); setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. setOperationAction(ISD::FADD, MVT::v4f16, Promote); setOperationAction(ISD::FSUB, MVT::v4f16, Promote); setOperationAction(ISD::FMUL, MVT::v4f16, Promote); setOperationAction(ISD::FDIV, MVT::v4f16, Promote); setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote); setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote); AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32); AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32); // Expand all other v4f16 operations. // FIXME: We could generate better code by promoting some operations to // a pair of v4f32s setOperationAction(ISD::FABS, MVT::v4f16, Expand); setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); setOperationAction(ISD::FCOS, MVT::v4f16, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); setOperationAction(ISD::FMA, MVT::v4f16, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); setOperationAction(ISD::FNEG, MVT::v4f16, Expand); setOperationAction(ISD::FPOW, MVT::v4f16, Expand); setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); setOperationAction(ISD::FREM, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); setOperationAction(ISD::FRINT, MVT::v4f16, Expand); setOperationAction(ISD::FSIN, MVT::v4f16, Expand); setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); setOperationAction(ISD::SETCC, MVT::v4f16, Expand); setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); setOperationAction(ISD::SELECT, MVT::v4f16, Expand); setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); setOperationAction(ISD::FEXP, MVT::v4f16, Expand); setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); setOperationAction(ISD::FLOG, MVT::v4f16, Expand); setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); // v8f16 is also a storage-only type, so expand it. setOperationAction(ISD::FABS, MVT::v8f16, Expand); setOperationAction(ISD::FADD, MVT::v8f16, Expand); setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); setOperationAction(ISD::FCOS, MVT::v8f16, Expand); setOperationAction(ISD::FDIV, MVT::v8f16, Expand); setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); setOperationAction(ISD::FMA, MVT::v8f16, Expand); setOperationAction(ISD::FMUL, MVT::v8f16, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::v8f16, Expand); setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); setOperationAction(ISD::FREM, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); setOperationAction(ISD::FSIN, MVT::v8f16, Expand); setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); setOperationAction(ISD::FSUB, MVT::v8f16, Expand); setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); setOperationAction(ISD::SETCC, MVT::v8f16, Expand); setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); setOperationAction(ISD::SELECT, MVT::v8f16, Expand); setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); setOperationAction(ISD::FEXP, MVT::v8f16, Expand); setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); setOperationAction(ISD::FLOG, MVT::v8f16, Expand); setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); // AArch64 has implementations of a lot of rounding-like FP operations. for (MVT Ty : {MVT::f32, MVT::f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FTRUNC, Ty, Legal); setOperationAction(ISD::FROUND, Ty, Legal); setOperationAction(ISD::FMINNUM, Ty, Legal); setOperationAction(ISD::FMAXNUM, Ty, Legal); setOperationAction(ISD::FMINNAN, Ty, Legal); setOperationAction(ISD::FMAXNAN, Ty, Legal); } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. // This requires the Performance Monitors extension. if (Subtarget->hasPerfMon()) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); if (Subtarget->isTargetMachO()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret to avoid memory // traffic. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } else { setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } // Make floating-point constants legal for the large code model, so they don't // become loads from the constant pool. if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Legal); } // AArch64 does not have floating-point extending loads, i1 sign-extending // load, floating-point truncating stores, or v2i32->v2i16 truncating store. for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); } for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f128, MVT::f80, Expand); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f16, Expand); setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::f16, Custom); // Indexed loads and stores are supported. for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, MVT::i8, Legal); setIndexedLoadAction(im, MVT::i16, Legal); setIndexedLoadAction(im, MVT::i32, Legal); setIndexedLoadAction(im, MVT::i64, Legal); setIndexedLoadAction(im, MVT::f64, Legal); setIndexedLoadAction(im, MVT::f32, Legal); setIndexedLoadAction(im, MVT::f16, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i64, Legal); setIndexedStoreAction(im, MVT::f64, Legal); setIndexedStoreAction(im, MVT::f32, Legal); setIndexedStoreAction(im, MVT::f16, Legal); } // Trap. setOperationAction(ISD::TRAP, MVT::Other, Legal); // We combine OR nodes for bitfield operations. setTargetDAGCombine(ISD::OR); // Vector add and sub nodes may conceal a high-half opportunity. // Also, try to fold ADD into CSINC/CSINV.. setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::STORE); if (Subtarget->supportsAddressTopByteIgnored()) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; setStackPointerRegisterToSaveRestore(AArch64::SP); setSchedulingPreference(Sched::Hybrid); // Enable TBZ/TBNZ MaskAndBranchFoldingIsLegal = true; EnableExtLdPromotion = true; // Set required alignment. setMinFunctionAlignment(2); // Set preferred alignments. setPrefFunctionAlignment(STI.getPrefFunctionAlignment()); setPrefLoopAlignment(STI.getPrefLoopAlignment()); // Only change the limit for entries in a jump table if specified by // the subtarget, but not at the command line. unsigned MaxJT = STI.getMaximumJumpTableSize(); if (MaxJT && getMaximumJumpTableSize() == 0) setMaximumJumpTableSize(MaxJT); setHasExtractBitsInsn(true); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: setOperationAction(ISD::FABS, MVT::v1f64, Expand); setOperationAction(ISD::FADD, MVT::v1f64, Expand); setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); setOperationAction(ISD::FCOS, MVT::v1f64, Expand); setOperationAction(ISD::FDIV, MVT::v1f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); setOperationAction(ISD::FMA, MVT::v1f64, Expand); setOperationAction(ISD::FMUL, MVT::v1f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); setOperationAction(ISD::FNEG, MVT::v1f64, Expand); setOperationAction(ISD::FPOW, MVT::v1f64, Expand); setOperationAction(ISD::FREM, MVT::v1f64, Expand); setOperationAction(ISD::FROUND, MVT::v1f64, Expand); setOperationAction(ISD::FRINT, MVT::v1f64, Expand); setOperationAction(ISD::FSIN, MVT::v1f64, Expand); setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); setOperationAction(ISD::FSUB, MVT::v1f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); setOperationAction(ISD::SETCC, MVT::v1f64, Expand); setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); setOperationAction(ISD::SELECT, MVT::v1f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); setOperationAction(ISD::MUL, MVT::v1i64, Expand); // AArch64 doesn't have a direct vector ->f32 conversion instructions for // elements smaller than i32, so promote the input to i32 first. setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16 // -> v8f16 conversions. setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote); // Similarly, there is no direct i32 -> f64 vector conversion instruction. setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); // Or, direct i32 -> f16 vector conversion. Set it so custom, so the // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); setOperationAction(ISD::CTTZ, MVT::v2i8, Expand); setOperationAction(ISD::CTTZ, MVT::v4i16, Expand); setOperationAction(ISD::CTTZ, MVT::v2i32, Expand); setOperationAction(ISD::CTTZ, MVT::v1i64, Expand); setOperationAction(ISD::CTTZ, MVT::v16i8, Expand); setOperationAction(ISD::CTTZ, MVT::v8i16, Expand); setOperationAction(ISD::CTTZ, MVT::v4i32, Expand); setOperationAction(ISD::CTTZ, MVT::v2i64, Expand); // AArch64 doesn't have MUL.2d: setOperationAction(ISD::MUL, MVT::v2i64, Expand); // Custom handling for some quad-vector types to detect MULL. setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); // Likewise, narrowing and extending vector loads/stores aren't handled // directly. for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } // AArch64 has implementations of a lot of rounding-like FP operations. for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, Ty, Legal); setOperationAction(ISD::FNEARBYINT, Ty, Legal); setOperationAction(ISD::FCEIL, Ty, Legal); setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FTRUNC, Ty, Legal); setOperationAction(ISD::FROUND, Ty, Legal); } } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { if (VT == MVT::v2f32 || VT == MVT::v4f16) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType(ISD::LOAD, VT, MVT::v2i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType(ISD::STORE, VT, MVT::v2i32); } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType(ISD::LOAD, VT, MVT::v2i64); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType(ISD::STORE, VT, MVT::v2i64); } // Mark vector float intrinsics as expand. if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); // But we do support custom-lowering for FCOPYSIGN. setOperationAction(ISD::FCOPYSIGN, VT, Custom); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); for (MVT InnerVT : MVT::all_valuetypes()) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); // CNT supports only B element sizes. if (VT != MVT::v8i8 && VT != MVT::v16i8) setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); // [SU][MIN|MAX] are available for all NEON types apart from i64. if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!). if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16) for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN, ISD::FMINNUM, ISD::FMAXNUM}) setOperationAction(Opcode, VT, Legal); if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); } } } void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &AArch64::FPR64RegClass); addTypeForNEON(VT, MVT::v2i32); } void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &AArch64::FPR128RegClass); addTypeForNEON(VT, MVT::v4i32); } EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); } /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void AArch64TargetLowering::computeKnownBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { default: break; case AArch64ISD::CSEL: { APInt KnownZero2, KnownOne2; DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; break; } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { default: return; case Intrinsic::aarch64_ldaxr: case Intrinsic::aarch64_ldxr: { unsigned BitWidth = KnownOne.getBitWidth(); EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } break; } case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); switch (IntNo) { default: break; case Intrinsic::aarch64_neon_umaxv: case Intrinsic::aarch64_neon_uminv: { // Figure out the datatype of the vector operand. The UMINV instruction // will zero extend the result, so we can mark as known zero all the // bits larger than the element datatype. 32-bit or larget doesn't need // this as those are legal types and will be handled by isel directly. MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); unsigned BitWidth = KnownZero.getBitWidth(); if (VT == MVT::v8i8 || VT == MVT::v16i8) { assert(BitWidth >= 8 && "Unexpected width!"); APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); KnownZero |= Mask; } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { assert(BitWidth >= 16 && "Unexpected width!"); APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); KnownZero |= Mask; } break; } break; } } } } MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, EVT) const { return MVT::i64; } bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, bool *Fast) const { if (Subtarget->requiresStrictAlign()) return false; if (Fast) { // Some CPUs are fine with unaligned stores except for 128-bit ones. *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 || // See comments in performSTORECombine() for more details about // these conditions. // Code that uses clang vector extensions can mark that it // wants unaligned accesses to be treated as fast by // underspecifying alignment to be 1 or 2. Align <= 2 || // Disregard v2i64. Memcpy lowering produces those and splitting // them regresses performance on micro-benchmarks and olden/bh. VT == MVT::v2i64; } return true; } FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { return AArch64::createFastISel(funcInfo, libInfo); } const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((AArch64ISD::NodeType)Opcode) { case AArch64ISD::FIRST_NUMBER: break; case AArch64ISD::CALL: return "AArch64ISD::CALL"; case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL"; case AArch64ISD::CSINV: return "AArch64ISD::CSINV"; case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; case AArch64ISD::SUBS: return "AArch64ISD::SUBS"; case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; case AArch64ISD::DUP: return "AArch64ISD::DUP"; case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32"; case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64"; case AArch64ISD::MOVI: return "AArch64ISD::MOVI"; case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift"; case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit"; case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl"; case AArch64ISD::FMOV: return "AArch64ISD::FMOV"; case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift"; case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl"; case AArch64ISD::BICi: return "AArch64ISD::BICi"; case AArch64ISD::ORRi: return "AArch64ISD::ORRi"; case AArch64ISD::BSL: return "AArch64ISD::BSL"; case AArch64ISD::NEG: return "AArch64ISD::NEG"; case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1"; case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2"; case AArch64ISD::UZP1: return "AArch64ISD::UZP1"; case AArch64ISD::UZP2: return "AArch64ISD::UZP2"; case AArch64ISD::TRN1: return "AArch64ISD::TRN1"; case AArch64ISD::TRN2: return "AArch64ISD::TRN2"; case AArch64ISD::REV16: return "AArch64ISD::REV16"; case AArch64ISD::REV32: return "AArch64ISD::REV32"; case AArch64ISD::REV64: return "AArch64ISD::REV64"; case AArch64ISD::EXT: return "AArch64ISD::EXT"; case AArch64ISD::VSHL: return "AArch64ISD::VSHL"; case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR"; case AArch64ISD::VASHR: return "AArch64ISD::VASHR"; case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ"; case AArch64ISD::CMGE: return "AArch64ISD::CMGE"; case AArch64ISD::CMGT: return "AArch64ISD::CMGT"; case AArch64ISD::CMHI: return "AArch64ISD::CMHI"; case AArch64ISD::CMHS: return "AArch64ISD::CMHS"; case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ"; case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE"; case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT"; case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz"; case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz"; case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz"; case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz"; case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz"; case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz"; case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz"; case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; case AArch64ISD::SADDV: return "AArch64ISD::SADDV"; case AArch64ISD::UADDV: return "AArch64ISD::UADDV"; case AArch64ISD::SMINV: return "AArch64ISD::SMINV"; case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ"; case AArch64ISD::TBZ: return "AArch64ISD::TBZ"; case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ"; case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH"; case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST"; case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I"; case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I"; case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; case AArch64ISD::LD2post: return "AArch64ISD::LD2post"; case AArch64ISD::LD3post: return "AArch64ISD::LD3post"; case AArch64ISD::LD4post: return "AArch64ISD::LD4post"; case AArch64ISD::ST2post: return "AArch64ISD::ST2post"; case AArch64ISD::ST3post: return "AArch64ISD::ST3post"; case AArch64ISD::ST4post: return "AArch64ISD::ST4post"; case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post"; case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post"; case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post"; case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post"; case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post"; case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post"; case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost"; case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost"; case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost"; case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost"; case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost"; case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost"; case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost"; case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost"; case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost"; case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost"; case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost"; case AArch64ISD::SMULL: return "AArch64ISD::SMULL"; case AArch64ISD::UMULL: return "AArch64ISD::UMULL"; case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE"; case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS"; case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE"; case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS"; } return nullptr; } MachineBasicBlock * AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *MBB) const { // We materialise the F128CSEL pseudo-instruction as some control flow and a // phi node: // OrigBB: // [... previous instrs leading to comparison ...] // b.ne TrueBB // b EndBB // TrueBB: // ; Fallthrough // EndBB: // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); DebugLoc DL = MI.getDebugLoc(); MachineFunction::iterator It = ++MBB->getIterator(); unsigned DestReg = MI.getOperand(0).getReg(); unsigned IfTrueReg = MI.getOperand(1).getReg(); unsigned IfFalseReg = MI.getOperand(2).getReg(); unsigned CondCode = MI.getOperand(3).getImm(); bool NZCVKilled = MI.getOperand(4).isKill(); MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, TrueBB); MF->insert(It, EndBB); // Transfer rest of current basic-block to EndBB EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); EndBB->transferSuccessorsAndUpdatePHIs(MBB); BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); MBB->addSuccessor(TrueBB); MBB->addSuccessor(EndBB); // TrueBB falls through to the end. TrueBB->addSuccessor(EndBB); if (!NZCVKilled) { TrueBB->addLiveIn(AArch64::NZCV); EndBB->addLiveIn(AArch64::NZCV); } BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) .addReg(IfTrueReg) .addMBB(TrueBB) .addReg(IfFalseReg) .addMBB(MBB); MI.eraseFromParent(); return EndBB; } MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { default: #ifndef NDEBUG MI.dump(); #endif llvm_unreachable("Unexpected instruction for custom inserter!"); case AArch64::F128CSEL: return EmitF128CSEL(MI, BB); case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); } } //===----------------------------------------------------------------------===// // AArch64 Lowering private implementation. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 /// CC static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: return AArch64CC::NE; case ISD::SETEQ: return AArch64CC::EQ; case ISD::SETGT: return AArch64CC::GT; case ISD::SETGE: return AArch64CC::GE; case ISD::SETLT: return AArch64CC::LT; case ISD::SETLE: return AArch64CC::LE; case ISD::SETUGT: return AArch64CC::HI; case ISD::SETUGE: return AArch64CC::HS; case ISD::SETULT: return AArch64CC::LO; case ISD::SETULE: return AArch64CC::LS; } } /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC. static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2) { CondCode2 = AArch64CC::AL; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: CondCode = AArch64CC::EQ; break; case ISD::SETGT: case ISD::SETOGT: CondCode = AArch64CC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = AArch64CC::GE; break; case ISD::SETOLT: CondCode = AArch64CC::MI; break; case ISD::SETOLE: CondCode = AArch64CC::LS; break; case ISD::SETONE: CondCode = AArch64CC::MI; CondCode2 = AArch64CC::GT; break; case ISD::SETO: CondCode = AArch64CC::VC; break; case ISD::SETUO: CondCode = AArch64CC::VS; break; case ISD::SETUEQ: CondCode = AArch64CC::EQ; CondCode2 = AArch64CC::VS; break; case ISD::SETUGT: CondCode = AArch64CC::HI; break; case ISD::SETUGE: CondCode = AArch64CC::PL; break; case ISD::SETLT: case ISD::SETULT: CondCode = AArch64CC::LT; break; case ISD::SETLE: case ISD::SETULE: CondCode = AArch64CC::LE; break; case ISD::SETNE: case ISD::SETUNE: CondCode = AArch64CC::NE; break; } } /// Convert a DAG fp condition code to an AArch64 CC. /// This differs from changeFPCCToAArch64CC in that it returns cond codes that /// should be AND'ed instead of OR'ed. static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2) { CondCode2 = AArch64CC::AL; switch (CC) { default: changeFPCCToAArch64CC(CC, CondCode, CondCode2); assert(CondCode2 == AArch64CC::AL); break; case ISD::SETONE: // (a one b) // == ((a olt b) || (a ogt b)) // == ((a ord b) && (a une b)) CondCode = AArch64CC::VC; CondCode2 = AArch64CC::NE; break; case ISD::SETUEQ: // (a ueq b) // == ((a uno b) || (a oeq b)) // == ((a ule b) && (a uge b)) CondCode = AArch64CC::PL; CondCode2 = AArch64CC::LE; break; } } /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 /// CC usable with the vector instructions. Fewer operations are available /// without a real NZCV register, so we have to use less efficient combinations /// to get the same effect. static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert) { Invert = false; switch (CC) { default: // Mostly the scalar mappings work fine. changeFPCCToAArch64CC(CC, CondCode, CondCode2); break; case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; case ISD::SETO: CondCode = AArch64CC::MI; CondCode2 = AArch64CC::GE; break; case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE: case ISD::SETUGT: case ISD::SETUGE: // All of the compare-mask comparisons are ordered, but we can switch // between the two by a double inversion. E.g. ULE == !OGT. Invert = true; changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2); break; } } static bool isLegalArithImmed(uint64_t C) { // Matches AArch64DAGToDAGISel::SelectArithImmed(). return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); } static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) { EVT VT = LHS.getValueType(); if (VT.isFloatingPoint()) { assert(VT != MVT::f128); if (VT == MVT::f16) { LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); VT = MVT::f32; } return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); } // The CMP instruction is just an alias for SUBS, and representing it as // SUBS means that it's possible to get CSE with subtract operations. // A later phase can perform the optimization of setting the destination // register to WZR/XZR if it ends up being unused. unsigned Opcode = AArch64ISD::SUBS; if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags // can be set differently by this operation. It comes down to whether // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then // everything is fine. If not then the optimization is wrong. Thus general // comparisons are only valid if op2 != 0. // So, finally, the only LLVM-native comparisons that don't mention C and V // are SETEQ and SETNE. They're the only ones we can safely use CMN for in // the absence of information about op2. Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) { // Similarly, (CMP (and X, Y), 0) can be implemented with a TST // (a.k.a. ANDS) except that the flags are only guaranteed to work for one // of the signed comparisons. Opcode = AArch64ISD::ANDS; RHS = LHS.getOperand(1); LHS = LHS.getOperand(0); } return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) .getValue(1); } /// \defgroup AArch64CCMP CMP;CCMP matching /// /// These functions deal with the formation of CMP;CCMP;... sequences. /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of /// a comparison. They set the NZCV flags to a predefined value if their /// predicate is false. This allows to express arbitrary conjunctions, for /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))" /// expressed as: /// cmp A /// ccmp B, inv(CB), CA /// check for CB flags /// /// In general we can create code for arbitrary "... (and (and A B) C)" /// sequences. We can also implement some "or" expressions, because "(or A B)" /// is equivalent to "not (and (not A) (not B))" and we can implement some /// negation operations: /// We can negate the results of a single comparison by inverting the flags /// used when the predicate fails and inverting the flags tested in the next /// instruction; We can also negate the results of the whole previous /// conditional compare sequence by inverting the flags tested in the next /// instruction. However there is no way to negate the result of a partial /// sequence. /// /// Therefore on encountering an "or" expression we can negate the subtree on /// one side and have to be able to push the negate to the leafs of the subtree /// on the other side (see also the comments in code). As complete example: /// "or (or (setCA (cmp A)) (setCB (cmp B))) /// (and (setCC (cmp C)) (setCD (cmp D)))" /// is transformed to /// "not (and (not (and (setCC (cmp C)) (setCC (cmp D)))) /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))" /// and implemented as: /// cmp C /// ccmp D, inv(CD), CC /// ccmp A, CA, inv(CD) /// ccmp B, CB, inv(CA) /// check for CB flags /// A counterexample is "or (and A B) (and C D)" which cannot be implemented /// by conditional compare sequences. /// @{ /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG) { unsigned Opcode = 0; if (LHS.getValueType().isFloatingPoint()) { assert(LHS.getValueType() != MVT::f128); if (LHS.getValueType() == MVT::f16) { LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS); RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS); } Opcode = AArch64ISD::FCCMP; } else if (RHS.getOpcode() == ISD::SUB) { SDValue SubOp0 = RHS.getOperand(0); if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // See emitComparison() on why we can only do this for SETEQ and SETNE. Opcode = AArch64ISD::CCMN; RHS = RHS.getOperand(1); } } if (Opcode == 0) Opcode = AArch64ISD::CCMP; SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); } /// Returns true if @p Val is a tree of AND/OR/SETCC operations. /// CanPushNegate is set to true if we can push a negate operation through /// the tree in a was that we are left with AND operations and negate operations /// at the leafs only. i.e. "not (or (or x y) z)" can be changed to /// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be /// brought into such a form. static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { if (Val->getOperand(0).getValueType() == MVT::f128) return false; CanNegate = true; return true; } // Protect against exponential runtime and stack overflow. if (Depth > 6) return false; if (Opcode == ISD::AND || Opcode == ISD::OR) { SDValue O0 = Val->getOperand(0); SDValue O1 = Val->getOperand(1); bool CanNegateL; if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1)) return false; bool CanNegateR; if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1)) return false; if (Opcode == ISD::OR) { // For an OR expression we need to be able to negate at least one side or // we cannot do the transformation at all. if (!CanNegateL && !CanNegateR) return false; // We can however change a (not (or x y)) to (and (not x) (not y)) if we // can negate the x and y subtrees. CanNegate = CanNegateL && CanNegateR; } else { // If the operands are OR expressions then we finally need to negate their // outputs, we can only do that for the operand with emitted last by // negating OutCC, not for both operands. bool NeedsNegOutL = O0->getOpcode() == ISD::OR; bool NeedsNegOutR = O1->getOpcode() == ISD::OR; if (NeedsNegOutL && NeedsNegOutR) return false; // We cannot negate an AND operation (it would become an OR), CanNegate = false; } return true; } return false; } /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain /// of CCMP/CFCMP ops. See @ref AArch64CCMP. /// Tries to transform the given i1 producing node @p Val to a series compare /// and conditional compare operations. @returns an NZCV flags producing node /// and sets @p OutCC to the flags that should be tested or returns SDValue() if /// transformation was not possible. /// On recursive invocations @p PushNegate may be set to true to have negation /// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate /// for the comparisons in the current subtree; @p Depth limits the search /// depth to avoid stack overflow. static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate) { // We're at a tree leaf, produce a conditional comparison operation. unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { SDValue LHS = Val->getOperand(0); SDValue RHS = Val->getOperand(1); ISD::CondCode CC = cast(Val->getOperand(2))->get(); bool isInteger = LHS.getValueType().isInteger(); if (Negate) CC = getSetCCInverse(CC, isInteger); SDLoc DL(Val); // Determine OutCC and handle FP special case. if (isInteger) { OutCC = changeIntCCToAArch64CC(CC); } else { assert(LHS.getValueType().isFloatingPoint()); AArch64CC::CondCode ExtraCC; changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); // Some floating point conditions can't be tested with a single condition // code. Construct an additional comparison in this case. if (ExtraCC != AArch64CC::AL) { SDValue ExtraCmp; if (!CCOp.getNode()) ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); else ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, ExtraCC, DL, DAG); CCOp = ExtraCmp; Predicate = ExtraCC; } } // Produce a normal comparison if we are first in the chain if (!CCOp) return emitComparison(LHS, RHS, CC, DL, DAG); // Otherwise produce a ccmp. return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, DAG); } assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"); // Check if both sides can be transformed. SDValue LHS = Val->getOperand(0); SDValue RHS = Val->getOperand(1); // In case of an OR we need to negate our operands and the result. // (A v B) <=> not(not(A) ^ not(B)) bool NegateOpsAndResult = Opcode == ISD::OR; // We can negate the results of all previous operations by inverting the // predicate flags giving us a free negation for one side. The other side // must be negatable by itself. if (NegateOpsAndResult) { // See which side we can negate. bool CanNegateL; bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL); assert(isValidL && "Valid conjunction/disjunction tree"); (void)isValidL; #ifndef NDEBUG bool CanNegateR; bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR); assert(isValidR && "Valid conjunction/disjunction tree"); assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree"); #endif // Order the side which we cannot negate to RHS so we can emit it first. if (!CanNegateL) std::swap(LHS, RHS); } else { bool NeedsNegOutL = LHS->getOpcode() == ISD::OR; assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && "Valid conjunction/disjunction tree"); // Order the side where we need to negate the output flags to RHS so it // gets emitted first. if (NeedsNegOutL) std::swap(LHS, RHS); } // Emit RHS. If we want to negate the tree we only need to push a negate // through if we are already in a PushNegate case, otherwise we can negate // the "flags to test" afterwards. AArch64CC::CondCode RHSCC; SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate, CCOp, Predicate); if (NegateOpsAndResult && !Negate) RHSCC = AArch64CC::getInvertedCondCode(RHSCC); // Emit LHS. We may need to negate it. SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC, NegateOpsAndResult, CmpR, RHSCC); // If we transformed an OR to and AND then we have to negate the result // (or absorb the Negate parameter). if (NegateOpsAndResult && !Negate) OutCC = AArch64CC::getInvertedCondCode(OutCC); return CmpL; } /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain /// of CCMP/CFCMP ops. See @ref AArch64CCMP. /// \see emitConjunctionDisjunctionTreeRec(). static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC) { bool CanNegate; if (!isConjunctionDisjunctionTree(Val, CanNegate)) return SDValue(); return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); } /// @} static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl) { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { EVT VT = RHS.getValueType(); uint64_t C = RHSC->getZExtValue(); if (!isLegalArithImmed(C)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: if ((VT == MVT::i32 && C != 0x80000000 && isLegalArithImmed((uint32_t)(C - 1))) || (VT == MVT::i64 && C != 0x80000000ULL && isLegalArithImmed(C - 1ULL))) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; RHS = DAG.getConstant(C, dl, VT); } break; case ISD::SETULT: case ISD::SETUGE: if ((VT == MVT::i32 && C != 0 && isLegalArithImmed((uint32_t)(C - 1))) || (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; RHS = DAG.getConstant(C, dl, VT); } break; case ISD::SETLE: case ISD::SETGT: if ((VT == MVT::i32 && C != INT32_MAX && isLegalArithImmed((uint32_t)(C + 1))) || (VT == MVT::i64 && C != INT64_MAX && isLegalArithImmed(C + 1ULL))) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; RHS = DAG.getConstant(C, dl, VT); } break; case ISD::SETULE: case ISD::SETUGT: if ((VT == MVT::i32 && C != UINT32_MAX && isLegalArithImmed((uint32_t)(C + 1))) || (VT == MVT::i64 && C != UINT64_MAX && isLegalArithImmed(C + 1ULL))) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; RHS = DAG.getConstant(C, dl, VT); } break; } } } SDValue Cmp; AArch64CC::CondCode AArch64CC; if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { const ConstantSDNode *RHSC = cast(RHS); // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. // For the i8 operand, the largest immediate is 255, so this can be easily // encoded in the compare instruction. For the i16 operand, however, the // largest immediate cannot be encoded in the compare. // Therefore, use a sign extending load and cmn to avoid materializing the // -1 constant. For example, // movz w1, #65535 // ldrh w0, [x0, #0] // cmp w0, w1 // > // ldrsh w0, [x0, #0] // cmn w0, #1 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) // if and only if (sext LHS) == (sext RHS). The checks are in place to // ensure both the LHS and RHS are truly zero extended and to make sure the // transformation is profitable. if ((RHSC->getZExtValue() >> 16 == 0) && isa(LHS) && cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && cast(LHS)->getMemoryVT() == MVT::i16 && LHS.getNode()->hasNUsesOfValue(1, 0)) { int16_t ValueofRHS = cast(RHS)->getZExtValue(); if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, DAG.getValueType(MVT::i16)); Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, RHS.getValueType()), CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); } } if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) { if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) { if ((CC == ISD::SETNE) ^ RHSC->isNullValue()) AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); } } } if (!Cmp) { Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); } AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); return Cmp; } static std::pair getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && "Unsupported value type"); SDValue Value, Overflow; SDLoc DL(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); unsigned Opc = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown overflow instruction!"); case ISD::SADDO: Opc = AArch64ISD::ADDS; CC = AArch64CC::VS; break; case ISD::UADDO: Opc = AArch64ISD::ADDS; CC = AArch64CC::HS; break; case ISD::SSUBO: Opc = AArch64ISD::SUBS; CC = AArch64CC::VS; break; case ISD::USUBO: Opc = AArch64ISD::SUBS; CC = AArch64CC::LO; break; // Multiply needs a little bit extra work. case ISD::SMULO: case ISD::UMULO: { CC = AArch64CC::NE; bool IsSigned = Op.getOpcode() == ISD::SMULO; if (Op.getValueType() == MVT::i32) { unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; // For a 32 bit multiply with overflow check we want the instruction // selector to generate a widening multiply (SMADDL/UMADDL). For that we // need to generate the following pattern: // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b)) LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, DAG.getConstant(0, DL, MVT::i64)); // On AArch64 the upper 32 bits are always zero extended for a 32 bit // operation. We need to clear out the upper 32 bits, because we used a // widening multiply that wrote all 64 bits. In the end this should be a // noop. Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); if (IsSigned) { // The signed overflow check requires more than just a simple check for // any bit set in the upper 32 bits of the result. These bits could be // just the sign bits of a negative number. To perform the overflow // check we have to arithmetic shift right the 32nd bit of the result by // 31 bits. Then we compare the result to the upper 32 bits. SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, DAG.getConstant(32, DL, MVT::i64)); UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, DAG.getConstant(31, DL, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); } else { // The overflow check for unsigned multiply is easy. We only need to // check if any of the upper 32 bits are set. This can be done with a // CMP (shifted register). For that we need to generate the following // pattern: // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, DL, MVT::i64)); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, DL, MVT::i64), UpperBits).getValue(1); } break; } assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type"); // For the 64 bit multiply Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); if (IsSigned) { SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, DAG.getConstant(63, DL, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); } else { SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, DL, MVT::i64), UpperBits).getValue(1); } break; } } // switch (...) if (Opc) { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); // Emit the AArch64 operation with overflow check. Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); Overflow = Value.getValue(1); } return std::make_pair(Value, Overflow); } SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { SmallVector Ops(Op->op_begin(), Op->op_end()); return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; } static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { SDValue Sel = Op.getOperand(0); SDValue Other = Op.getOperand(1); // If neither operand is a SELECT_CC, give up. if (Sel.getOpcode() != ISD::SELECT_CC) std::swap(Sel, Other); if (Sel.getOpcode() != ISD::SELECT_CC) return Op; // The folding we want to perform is: // (xor x, (select_cc a, b, cc, 0, -1) ) // --> // (csel x, (xor x, -1), cc ...) // // The latter will get matched to a CSINV instruction. ISD::CondCode CC = cast(Sel.getOperand(4))->get(); SDValue LHS = Sel.getOperand(0); SDValue RHS = Sel.getOperand(1); SDValue TVal = Sel.getOperand(2); SDValue FVal = Sel.getOperand(3); SDLoc dl(Sel); // FIXME: This could be generalized to non-integer comparisons. if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) return Op; ConstantSDNode *CFVal = dyn_cast(FVal); ConstantSDNode *CTVal = dyn_cast(TVal); // The values aren't constants, this isn't the pattern we're looking for. if (!CFVal || !CTVal) return Op; // We can commute the SELECT_CC by inverting the condition. This // might be needed to make this fit into a CSINV pattern. if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); } // If the constants line up, perform the transform! if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); FVal = Other; TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, DAG.getConstant(-1ULL, dl, Other.getValueType())); return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, CCVal, Cmp); } return Op; } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = AArch64ISD::ADDS; break; case ISD::SUBC: Opc = AArch64ISD::SUBS; break; case ISD::ADDE: Opc = AArch64ISD::ADCS; ExtraOp = true; break; case ISD::SUBE: Opc = AArch64ISD::SBCS; ExtraOp = true; break; } if (!ExtraOp) return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); SDLoc dl(Op); AArch64CC::CondCode CC; // The actual operation that sets the overflow or carry flag. SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); // We use an inverted condition, because the conditional select is inverted // too. This will allow it to be selected to a single instruction: // CSINC Wd, WZR, WZR, invert(cond). SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } // Prefetch operands are: // 1: Address to prefetch // 2: bool isWrite // 3: int locality (0 = no locality ... 3 = extreme locality) // 4: bool isDataCache static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); unsigned IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Locality = cast(Op.getOperand(3))->getZExtValue(); unsigned IsData = cast(Op.getOperand(4))->getZExtValue(); bool IsStream = !Locality; // When the locality number is set if (Locality) { // The front-end should have filtered out the out-of-range values assert(Locality <= 3 && "Prefetch locality out-of-range"); // The locality degree is the opposite of the cache speed. // Put the number the other way around. // The encoding starts at 0 for level 1 Locality = 3 - Locality; } // built the mask value encoding the expected behavior. unsigned PrfOp = (IsWrite << 4) | // Load/Store bit (!IsData << 3) | // IsDataCache bit (Locality << 1) | // Cache level bits (unsigned)IsStream; // Stream bit return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1)); } SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); RTLIB::Libcall LC; LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); return LowerF128Call(Op, DAG, LC); } SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; } RTLIB::Libcall LC; LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); // FP_ROUND node has a second operand indicating whether it is known to be // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); unsigned NumElts = InVT.getVectorNumElements(); // f16 vectors are promoted to f32 before a conversion. if (InVT.getVectorElementType() == MVT::f16) { MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); SDLoc dl(Op); return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); } if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); } if (VT.getSizeInBits() > InVT.getSizeInBits()) { SDLoc dl(Op); MVT ExtVT = MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } // Type changing conversions are illegal. return Op; } SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { if (Op.getOperand(0).getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); // f16 conversions are promoted to f32. if (Op.getOperand(0).getValueType() == MVT::f16) { SDLoc dl(Op); return DAG.getNode( Op.getOpcode(), dl, Op.getValueType(), DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0))); } if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; } RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); SmallVector Ops(Op->op_begin(), Op->op_end()); return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. // Any additional optimization in this function should be recorded // in the cost tables. EVT VT = Op.getValueType(); SDLoc dl(Op); SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); if (VT.getSizeInBits() < InVT.getSizeInBits()) { MVT CastVT = MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), InVT.getVectorNumElements()); In = DAG.getNode(Op.getOpcode(), dl, CastVT, In); return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); } if (VT.getSizeInBits() > InVT.getSizeInBits()) { unsigned CastOpc = Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; EVT CastVT = VT.changeVectorElementTypeToInteger(); In = DAG.getNode(CastOpc, dl, CastVT, In); return DAG.getNode(Op.getOpcode(), dl, VT, In); } return Op; } SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); // f16 conversions are promoted to f32. if (Op.getValueType() == MVT::f16) { SDLoc dl(Op); return DAG.getNode( ISD::FP_ROUND, dl, MVT::f16, DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)), DAG.getIntPtrConstant(0, dl)); } // i128 conversions are libcalls. if (Op.getOperand(0).getValueType() == MVT::i128) return SDValue(); // Other conversions are legal, unless it's to the completely software-based // fp128. if (Op.getValueType() != MVT::f128) return Op; RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); return LowerF128Call(Op, DAG, LC); } SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { // For iOS, we want to call an alternative entry point: __sincos_stret, // which returns the values in two S / D registers. SDLoc dl(Op); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); ArgListTy Args; ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) { if (Op.getValueType() != MVT::f16) return SDValue(); assert(Op.getOperand(0).getValueType() == MVT::i16); SDLoc DL(Op); Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); return SDValue( DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op, DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), 0); } static EVT getExtensionTo64Bits(const EVT &OrigVT) { if (OrigVT.getSizeInBits() >= 64) return OrigVT; assert(OrigVT.isSimple() && "Expecting a simple value type"); MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; switch (OrigSimpleTy) { default: llvm_unreachable("Unexpected Vector Type"); case MVT::v2i8: case MVT::v2i16: return MVT::v2i32; case MVT::v4i8: return MVT::v4i16; } } static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode) { // The vector originally had a size of OrigTy. It was then extended to ExtTy. // We expect the ExtTy to be 128-bits total. If the OrigTy is less than // 64-bits we need to insert a new extension so that it will be 64-bits. assert(ExtTy.is128BitVector() && "Unexpected extension size"); if (OrigTy.getSizeInBits() >= 64) return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. EVT NewVT = getExtensionTo64Bits(OrigTy); return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned) { EVT VT = N->getValueType(0); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (const SDValue &Elt : N->op_values()) { if (ConstantSDNode *C = dyn_cast(Elt)) { unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfSize = EltSize / 2; if (isSigned) { if (!isIntN(HalfSize, C->getSExtValue())) return false; } else { if (!isUIntN(HalfSize, C->getZExtValue())) return false; } continue; } return false; } return true; } static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG, N->getOperand(0)->getValueType(0), N->getValueType(0), N->getOpcode()); assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); SDLoc dl(N); unsigned EltSize = VT.getScalarSizeInBits() / 2; unsigned NumElts = VT.getVectorNumElements(); MVT TruncVT = MVT::getIntegerVT(EltSize); SmallVector Ops; for (unsigned i = 0; i != NumElts; ++i) { ConstantSDNode *C = cast(N->getOperand(i)); const APInt &CInt = C->getAPIntValue(); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); } static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND) return true; if (isExtendedBUILD_VECTOR(N, DAG, true)) return true; return false; } static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::ZERO_EXTEND) return true; if (isExtendedBUILD_VECTOR(N, DAG, false)) return true; return false; } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) && isSignExtended(N1, DAG); } return false; } static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); } return false; } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. EVT VT = Op.getValueType(); assert(VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"); SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; bool isMLA = false; bool isN0SExt = isSignExtended(N0, DAG); bool isN1SExt = isSignExtended(N1, DAG); if (isN0SExt && isN1SExt) NewOpc = AArch64ISD::SMULL; else { bool isN0ZExt = isZeroExtended(N0, DAG); bool isN1ZExt = isZeroExtended(N1, DAG); if (isN0ZExt && isN1ZExt) NewOpc = AArch64ISD::UMULL; else if (isN1SExt || isN1ZExt) { // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these // into (s/zext A * s/zext C) + (s/zext B * s/zext C) if (isN1SExt && isAddSubSExt(N0, DAG)) { NewOpc = AArch64ISD::SMULL; isMLA = true; } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { NewOpc = AArch64ISD::UMULL; isMLA = true; } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { std::swap(N0, N1); NewOpc = AArch64ISD::UMULL; isMLA = true; } } if (!NewOpc) { if (VT == MVT::v2i64) // Fall through to expand this. It is not legal. return SDValue(); else // Other vector multiplications are legal. return Op; } } // Legalize to a S/UMULL instruction SDLoc DL(Op); SDValue Op0; SDValue Op1 = skipExtensionForVectorMULL(N1, DAG); if (!isMLA) { Op0 = skipExtensionForVectorMULL(N0, DAG); assert(Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"); return DAG.getNode(NewOpc, DL, VT, Op0, Op1); } // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during // isel lowering to take advantage of no-stall back to back s/umul + s/umla. // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG); SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); return DAG.getNode(N0->getOpcode(), DL, VT, DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::aarch64_neon_smax: return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::aarch64_neon_umax: return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::aarch64_neon_smin: return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::aarch64_neon_umin: return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } } SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("unimplemented operand"); return SDValue(); case ISD::BITCAST: return LowerBITCAST(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: case ISD::USUBO: case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::FADD: return LowerF128Call(Op, DAG, RTLIB::ADD_F128); case ISD::FSUB: return LowerF128Call(Op, DAG, RTLIB::SUB_F128); case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128); case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerVectorSRA_SRL_SHL(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::AND: return LowerVectorAND(Op, DAG); case ISD::OR: return LowerVectorOR(Op, DAG); case ISD::XOR: return LowerXOR(Op, DAG); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); } } //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// #include "AArch64GenCallingConv.inc" /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention."); case CallingConv::WebKit_JS: return CC_AArch64_WebKit_JS; case CallingConv::GHC: return CC_AArch64_GHC; case CallingConv::C: case CallingConv::Fast: case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; } } CCAssignFn * AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const { return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS : RetCC_AArch64_AAPCS; } SDValue AArch64TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // At this point, Ins[].VT may already be promoted to i32. To correctly // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here // we use a special version of AnalyzeFormalArguments to pass in ValVT and // LocVT. unsigned NumArgs = Ins.size(); Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned i = 0; i != NumArgs; ++i) { MVT ValVT = Ins[i].VT; if (Ins[i].isOrigArg()) { std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[i].getOrigArgIndex(); // Get type of the original argument. EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(), /*AllowUnknown*/ true); MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) ValVT = MVT::i8; else if (ActualMVT == MVT::i16) ValVT = MVT::i16; } CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); assert(!Res && "Call operand has unhandled type"); (void)Res; } assert(ArgLocs.size() == Ins.size()); SmallVector ArgValues; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (Ins[i].Flags.isByVal()) { // Byval is used for HFAs in the PCS, but the system should work in a // non-compliant manner for larger structs. EVT PtrVT = getPointerTy(DAG.getDataLayout()); int Size = Ins[i].Flags.getByValSize(); unsigned NumRegs = (Size + 7) / 8; // FIXME: This works on big-endian for composite byvals, which are the common // case. It should also work for fundamental types too. unsigned FrameIdx = MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT); InVals.push_back(FrameIdxN); continue; } if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); SDValue ArgValue; const TargetRegisterClass *RC; if (RegVT == MVT::i32) RC = &AArch64::GPR32RegClass; else if (RegVT == MVT::i64) RC = &AArch64::GPR64RegClass; else if (RegVT == MVT::f16) RC = &AArch64::FPR16RegClass; else if (RegVT == MVT::f32) RC = &AArch64::FPR32RegClass; else if (RegVT == MVT::f64 || RegVT.is64BitVector()) RC = &AArch64::FPR64RegClass; else if (RegVT == MVT::f128 || RegVT.is128BitVector()) RC = &AArch64::FPR128RegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); // If this is an 8, 16 or 32-bit value, it is really passed promoted // to 64 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); break; case CCValAssign::AExt: case CCValAssign::SExt: case CCValAssign::ZExt: // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt // nodes after our lowering. assert(RegVT == Ins[i].VT && "incorrect register location selected"); break; } InVals.push_back(ArgValue); } else { // VA.isRegLoc() assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); unsigned ArgSize = VA.getValVT().getSizeInBits() / 8; uint32_t BEAlign = 0; if (!Subtarget->isLittleEndian() && ArgSize < 8 && !Ins[i].Flags.isInConsecutiveRegs()) BEAlign = 8 - ArgSize; int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; MVT MemVT = VA.getValVT(); switch (VA.getLocInfo()) { default: break; case CCValAssign::BCvt: MemVT = VA.getLocVT(); break; case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; case CCValAssign::ZExt: ExtType = ISD::ZEXTLOAD; break; case CCValAssign::AExt: ExtType = ISD::EXTLOAD; break; } ArgValue = DAG.getExtLoad( ExtType, DL, VA.getLocVT(), Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MemVT); InVals.push_back(ArgValue); } } // varargs AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { if (!Subtarget->isTargetDarwin()) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. saveVarArgRegisters(CCInfo, DAG, DL, Chain); } // This will point to the next argument passed via stack. unsigned StackOffset = CCInfo.getNextStackOffset(); // We currently pass all varargs at 8-byte alignment. StackOffset = ((StackOffset + 7) & ~7); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); } unsigned StackArgSize = CCInfo.getNextStackOffset(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { // This is a non-standard ABI so by fiat I say we're allowed to make full // use of the stack area to be popped, which must be aligned to 16 bytes in // any case: StackArgSize = alignTo(StackArgSize, 16); // If we're expected to restore the stack (e.g. fastcc) then we'll be adding // a multiple of 16. FuncInfo->setArgumentStackToRestore(StackArgSize); // This realignment carries over to the available bytes below. Our own // callers will guarantee the space is free by giving an aligned value to // CALLSEQ_START. } // Even if we're not expected to free up the space, it's useful to know how // much is there while considering tail calls (because we can reuse it). FuncInfo->setBytesInStackArgArea(StackArgSize); return Chain; } void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); SmallVector MemOps; static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 }; static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); } } FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); if (Subtarget->hasFPARMv8()) { static const MCPhysReg FPRArgRegs[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); int FPRIdx = 0; if (FPRSaveSize != 0) { FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false); SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT); for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(16, DL, PtrVT)); } } FuncInfo->setVarArgsFPRIndex(FPRIdx); FuncInfo->setVarArgsFPRSize(FPRSaveSize); } if (!MemOps.empty()) { Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue AArch64TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference if (i == 0 && isThisReturn) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); continue; } SDValue Val = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC) { return CC == CallingConv::Fast; } /// Return true if we might ever do TCO for calls with this calling convention. static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { case CallingConv::C: case CallingConv::PreserveMost: case CallingConv::Swift: return true; default: return canGuaranteeTCO(CC); } } bool AArch64TargetLowering::isEligibleForTailCallOptimization( SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { if (!mayTailCallThisCC(CalleeCC)) return false; MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; // Byval parameters hand the function a pointer directly into the stack area // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. for (Function::const_arg_iterator i = CallerF->arg_begin(), e = CallerF->arg_end(); i != e; ++i) if (i->hasByValAttr()) return false; if (getTargetMachine().Options.GuaranteedTailCallOpt) return canGuaranteeTCO(CalleeCC) && CCMatch; // Externally-defined functions with weak linkage should not be // tail-called on AArch64 when the OS does not support dynamic // pre-emption of symbols, as the AAELF spec requires normal calls // to undefined weak functions to be replaced with a NOP or jump to the // next instruction. The behaviour of branch instructions in this // situation (as used for tail calls) is implementation-defined, so we // cannot rely on the linker replacing the tail call with a return. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) return false; } // Now we search for cases where we can use a tail call without changing the // ABI. Sibcall is used in some places (particularly gcc) to refer to this // concept. // I want anyone implementing a new calling convention to think long and hard // about this assert. assert((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"); LLVMContext &C = *DAG.getContext(); if (isVarArg && !Outs.empty()) { // At least two cases here: if caller is fastcc then we can't have any // memory arguments (we'd be expected to clean up the stack afterwards). If // caller is C then we could potentially use its argument area. // FIXME: for now we take the most conservative of these in both cases: // disallow all variadic memory operands. SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); for (const CCValAssign &ArgLoc : ArgLocs) if (!ArgLoc.isRegLoc()) return false; } // Check that the call results are passed in the same way. if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, CCAssignFnForCall(CalleeCC, isVarArg), CCAssignFnForCall(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (!CCMatch) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } // Nothing more to check if the callee is taking no arguments if (Outs.empty()) return true; SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); const AArch64FunctionInfo *FuncInfo = MF.getInfo(); // If the stack arguments for this call do not fit into our own save area then // the call cannot be made tail. if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) return false; const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) return false; return true; } SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const { SmallVector ArgChains; int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; // Include the original chain at the beginning of the list. When this is // used by target LowerCall hooks, this helps legalize find the // CALLSEQ_BEGIN node. ArgChains.push_back(Chain); // Add a chain value for each stack argument corresponding for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) if (LoadSDNode *L = dyn_cast(*U)) if (FrameIndexSDNode *FI = dyn_cast(L->getBasePtr())) if (FI->getIndex() < 0) { int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); int64_t InLastByte = InFirstByte; InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || (FirstByte <= InFirstByte && InFirstByte <= LastByte)) ArgChains.push_back(SDValue(L, 1)); } // Build a tokenfactor for all the chains. return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const { return CallCC == CallingConv::Fast && TailCallOpt; } /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, /// and add input and output parameter nodes. SDValue AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; SmallVector &Outs = CLI.Outs; SmallVector &OutVals = CLI.OutVals; SmallVector &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); bool IsThisReturn = false; AArch64FunctionInfo *FuncInfo = MF.getInfo(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; if (IsTailCall) { // Check if it's really possible to do a tail call. IsTailCall = isEligibleForTailCallOptimization( Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG); if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // A sibling call is one where we're under the usual C ABI and not planning // to change that but can still do a tail call: if (!TailCallOpt && IsTailCall) IsSibCall = true; if (IsTailCall) ++NumTailCalls; } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); if (IsVarArg) { // Handle fixed and variable vector arguments differently. // Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/ !Outs[i].IsFixed); bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); assert(!Res && "Call operand has unhandled type"); (void)Res; } } else { // At this point, Outs[].VT may already be promoted to i32. To correctly // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here // we use a special version of AnalyzeCallOperands to pass in ValVT and // LocVT. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ValVT = Outs[i].VT; // Get type of the original argument. EVT ActualVT = getValueType(DAG.getDataLayout(), CLI.getArgs()[Outs[i].OrigArgIndex].Ty, /*AllowUnknown*/ true); MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) ValVT = MVT::i8; else if (ActualMVT == MVT::i16) ValVT = MVT::i16; CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); assert(!Res && "Call operand has unhandled type"); (void)Res; } } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); if (IsSibCall) { // Since we're not changing the ABI to make this a tail call, the memory // operands are already available in the caller's incoming argument space. NumBytes = 0; } // FPDiff is the byte offset of the call's argument area from the callee's. // Stores to callee stack arguments will be placed in FixedStackSlots offset // by this amount for a tail call. In a sibling call it must be 0 because the // caller will deallocate the entire stack and the callee still expects its // arguments to begin at SP+0. Completely unused for non-tail calls. int FPDiff = 0; if (IsTailCall && !IsSibCall) { unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); // Since callee will pop argument stack as a tail call, we must keep the // popped size 16-byte aligned. NumBytes = alignTo(NumBytes, 16); // FPDiff will be negative if this tail call requires more space than we // would automatically have in our incoming argument space. Positive if we // can actually shrink the stack. FPDiff = NumReusableBytes - NumBytes; // The stack pointer must be 16-byte aligned at all times it's used for a // memory operation, which in practice means at *all* times and in // particular across call boundaries. Therefore our own arguments started at // a 16-byte aligned SP and the delta applied for the tail call should // satisfy the same constraint. assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); } // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy(DAG.getDataLayout())); SmallVector, 8> RegsToPass; SmallVector MemOpChains; auto PtrVT = getPointerTy(DAG.getDataLayout()); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::AExt: if (Outs[realArgIdx].ArgVT == MVT::i1) { // AAPCS requires i1 to be zero-extended to 8-bits by the caller. Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg); } Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; case CCValAssign::FPExt: Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); break; } if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i64) { assert(VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"); IsThisReturn = true; } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc()); SDValue DstAddr; MachinePointerInfo DstInfo; // FIXME: This works on big-endian for composite byvals, which are the // common case. It should also work for fundamental types too. uint32_t BEAlign = 0; unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8 : VA.getValVT().getSizeInBits(); OpSize = (OpSize + 7) / 8; if (!Subtarget->isLittleEndian() && !Flags.isByVal() && !Flags.isInConsecutiveRegs()) { if (OpSize < 8) BEAlign = 8 - OpSize; } unsigned LocMemOffset = VA.getLocMemOffset(); int32_t Offset = LocMemOffset + BEAlign; SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); if (IsTailCall) { Offset = Offset + FPDiff; int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); DstAddr = DAG.getFrameIndex(FI, PtrVT); DstInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Make sure any stack arguments overlapping with where we're storing // are loaded before this eventual operation. Otherwise they'll be // clobbered. Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); } else { SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset); } if (Outs[i].Flags.isByVal()) { SDValue SizeNode = DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), /*isVol = */ false, /*AlwaysInline = */ false, /*isTailCall = */ false, DstInfo, MachinePointerInfo()); MemOpChains.push_back(Cpy); } else { // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already // promoted to a legal register type i32, we should truncate Arg back to // i1/i8/i16. if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo); MemOpChains.push_back(Store); } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (auto &RegToPass : RegsToPass) { Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first, RegToPass.second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (getTargetMachine().getCodeModel() == CodeModel::Large && Subtarget->isTargetMachO()) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); bool InternalLinkage = GV->hasInternalLinkage(); if (InternalLinkage) Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); else { Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *Sym = S->getSymbol(); Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *Sym = S->getSymbol(); Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } // We don't usually want to end the call-sequence here because we would tidy // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be // in the correct location. if (IsTailCall && !IsSibCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), DAG.getIntPtrConstant(0, DL, true), InFlag, DL); InFlag = Chain.getValue(1); } std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); if (IsTailCall) { // Each tail call may have to adjust the stack by a different amount, so // this information must travel along with the operation for eventual // consumption by emitEpilogue. Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); } // Add argument registers to the end of the list so that they are known live // into the call. for (auto &RegToPass : RegsToPass) Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); if (IsThisReturn) { // For 'this' returns, use the X0-preserving mask if applicable Mask = TRI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { IsThisReturn = false; Mask = TRI->getCallPreservedMask(MF, CallConv); } } else Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // If we're doing a tall call, use a TC_RETURN here rather than an // actual call instruction. if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), DAG.getIntPtrConstant(CalleePopBytes, DL, true), InFlag, DL); if (!Ins.empty()) InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, InVals, IsThisReturn, IsThisReturn ? OutVals[0] : SDValue()); } bool AArch64TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS : RetCC_AArch64_AAPCS; SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC); } SDValue AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS : RetCC_AArch64_AAPCS; SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC); // Copy the result values into the output registers. SDValue Flag; SmallVector RetOps(1, Chain); for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[realRVLocIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: if (Outs[i].ArgVT == MVT::i1) { // AAPCS requires i1 to be zero-extended to i8 by the producer of the // value. This is strictly redundant on Darwin (which uses "zeroext // i1"), but will be optimised out before ISel. Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); } break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; } Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (AArch64::GPR64RegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); else if (AArch64::FPR64RegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); const GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); assert(cast(Op)->getOffset() == 0 && "unexpected offset in global node"); // This also catched the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes instead of using a wrapper node. return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); } if (getTargetMachine().getCodeModel() == CodeModel::Large) { const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( AArch64ISD::WrapperLarge, DL, PtrVT, DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); } else { // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and // the only correct model on Darwin. SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags | AArch64II::MO_PAGE); unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } } /// \brief Convert a TLS address reference into the correct sequence of loads /// and calls to compute the variable's address (for Darwin, currently) and /// return an SDValue containing the final node. /// Darwin only has one TLS scheme which must be capable of dealing with the /// fully general situation, in the worst case. This means: /// + "extern __thread" declaration. /// + Defined in a possibly unknown dynamic library. /// /// The general system is that each __thread variable has a [3 x i64] descriptor /// which contains information used by the runtime to calculate the address. The /// only part of this the compiler needs to know about is the first xword, which /// contains a function pointer that must be called with the address of the /// entire descriptor in "x0". /// /// Since this descriptor may be in a different unit, in general even the /// descriptor must be accessed via an indirect load. The "ideal" code sequence /// is: /// adrp x0, _var@TLVPPAGE /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor /// ldr x1, [x0] ; x1 contains 1st entry of descriptor, /// ; the function pointer /// blr x1 ; Uses descriptor address in x0 /// ; Address of _var is now in x0. /// /// If the address of _var's descriptor *is* known to the linker, then it can /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for /// a slight efficiency gain. SDValue AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); SDLoc DL(Op); MVT PtrVT = getPointerTy(DAG.getDataLayout()); const GlobalValue *GV = cast(Op)->getGlobal(); SDValue TLVPAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr); // The first entry in the descriptor is a function pointer that we must call // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 8, MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); Chain = FuncTLVGet.getValue(1); MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setAdjustsStack(true); // TLS calls preserve all registers except those that absolutely must be // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be // silly). const uint32_t *Mask = Subtarget->getRegisterInfo()->getTLSCallPreservedMask(); // Finally, we can make the call. This is just a degenerate version of a // normal AArch64 call node: x0 takes the address of the descriptor, and // returns the address of the variable in this thread. Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue()); Chain = DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64), DAG.getRegisterMask(Mask), Chain.getValue(1)); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1)); } /// When accessing thread-local variables under either the general-dynamic or /// local-dynamic system, we make a "TLS-descriptor" call. The variable will /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry /// is a function pointer to carry out the resolution. /// /// The sequence is: /// adrp x0, :tlsdesc:var /// ldr x1, [x0, #:tlsdesc_lo12:var] /// add x0, x0, #:tlsdesc_lo12:var /// .tlsdesccall var /// blr x1 /// (TPIDR_EL0 offset now in x0) /// /// The above sequence must be produced unscheduled, to enable the linker to /// optimize/relax this sequence. /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the /// above sequence, and expanded really late in the compilation flow, to ensure /// the sequence is produced as per above. SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr}); SDValue Glue = Chain.getValue(1); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } SDValue AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "This function expects an ELF target"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "ELF TLS only supported in small memory model"); // Different choices can be made for the maximum size of the TLS area for a // module. For the small address model, the default TLS size is 16MiB and the // maximum TLS size is 4GiB. // FIXME: add -mtls-size command line option and make it control the 16MiB // vs. 4GiB code sequence generation. const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); if (!EnableAArch64ELFLocalDynamicTLSGeneration) { if (Model == TLSModel::LocalDynamic) Model = TLSModel::GeneralDynamic; } SDValue TPOff; EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); const GlobalValue *GV = GA->getGlobal(); SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); if (Model == TLSModel::LocalExec) { SDValue HiVar = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); SDValue TPWithOff_lo = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, HiVar, DAG.getTargetConstant(0, DL, MVT::i32)), 0); SDValue TPWithOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo, LoVar, DAG.getTargetConstant(0, DL, MVT::i32)), 0); return TPWithOff; } else if (Model == TLSModel::InitialExec) { TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); } else if (Model == TLSModel::LocalDynamic) { // Local-dynamic accesses proceed in two phases. A general-dynamic TLS // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate // the beginning of the module's TLS region, followed by a DTPREL offset // calculation. // These accesses will need deduplicating if there's more than one. AArch64FunctionInfo *MFI = DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS); // Now we can calculate the offset from TPIDR_EL0 to this module's // thread-local area. TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); // Now use :dtprel_whatever: operations to calculate this variable's offset // in its thread-storage area. SDValue HiVar = DAG.getTargetGlobalAddress( GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); SDValue LoVar = DAG.getTargetGlobalAddress( GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, DAG.getTargetConstant(0, DL, MVT::i32)), 0); TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, DAG.getTargetConstant(0, DL, MVT::i32)), 0); } else if (Model == TLSModel::GeneralDynamic) { // The call needs a relocation too for linker relaxation. It doesn't make // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of // the address. SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); // Finally we can make a call to calculate the offset from tpidr_el0. TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); } else llvm_unreachable("Unsupported ELF TLS access model"); return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); } SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->isTargetDarwin()) return LowerDarwinGlobalTLSAddress(Op, DAG); else if (Subtarget->isTargetELF()) return LowerELFGlobalTLSAddress(Op, DAG); llvm_unreachable("Unexpected platform trying to use TLS"); } SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); // Handle f128 first, since lowering it will result in comparing the return // value of a libcall against zero, which is just what the rest of LowerBR_CC // is expecting to deal with. if (LHS.getValueType() == MVT::f128) { softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. unsigned Opc = LHS.getOpcode(); if (LHS.getResNo() == 1 && isOneConstant(RHS) && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unexpected condition code."); // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) return SDValue(); // The actual operation with overflow check. AArch64CC::CondCode OFCC; SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG); if (CC == ISD::SETNE) OFCC = getInvertedCondCode(OFCC); SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32); return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Overflow); } if (LHS.getValueType().isInteger()) { assert((LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); // If the RHS of the comparison is zero, we can potentially fold this // to a specialized branch. const ConstantSDNode *RHSC = dyn_cast(RHS); if (RHSC && RHSC->getZExtValue() == 0) { if (CC == ISD::SETEQ) { // See if we can use a TBZ to fold in an AND as well. // TBZ has a smaller branch displacement than CBZ. If the offset is // out of bounds, a late MI-layer pass rewrites branches. // 403.gcc is an example that hits this case. if (LHS.getOpcode() == ISD::AND && isa(LHS.getOperand(1)) && isPowerOf2_64(LHS.getConstantOperandVal(1))) { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest); } return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); } else if (CC == ISD::SETNE) { // See if we can use a TBZ to fold in an AND as well. // TBZ has a smaller branch displacement than CBZ. If the offset is // out of bounds, a late MI-layer pass rewrites branches. // 403.gcc is an example that hits this case. if (LHS.getOpcode() == ISD::AND && isa(LHS.getOperand(1)) && isPowerOf2_64(LHS.getConstantOperandVal(1))) { SDValue Test = LHS.getOperand(0); uint64_t Mask = LHS.getConstantOperandVal(1); return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, DAG.getConstant(Log2_64(Mask), dl, MVT::i64), Dest); } return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) { // Don't combine AND since emitComparison converts the AND to an ANDS // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. uint64_t Mask = LHS.getValueSizeInBits() - 1; return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS, DAG.getConstant(Mask, dl, MVT::i64), Dest); } } if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && LHS.getOpcode() != ISD::AND) { // Don't combine AND since emitComparison converts the AND to an ANDS // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. uint64_t Mask = LHS.getValueSizeInBits() - 1; return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS, DAG.getConstant(Mask, dl, MVT::i64), Dest); } SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two branches to implement. SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue BR1 = DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, Cmp); } return BR1; } SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue In1 = Op.getOperand(0); SDValue In2 = Op.getOperand(1); EVT SrcVT = In2.getValueType(); if (SrcVT.bitsLT(VT)) In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); else if (SrcVT.bitsGT(VT)) In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL)); EVT VecVT; EVT EltVT; uint64_t EltMask; SDValue VecVal1, VecVal2; if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { EltVT = MVT::i32; VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32); EltMask = 0x80000000ULL; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, DAG.getUNDEF(VecVT), In1); VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, DAG.getUNDEF(VecVT), In2); } else { VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); } } else if (VT == MVT::f64 || VT == MVT::v2f64) { EltVT = MVT::i64; VecVT = MVT::v2i64; // We want to materialize a mask with the high bit set, but the AdvSIMD // immediate moves cannot materialize that in a single instruction for // 64-bit elements. Instead, materialize zero and then negate it. EltMask = 0; if (!VT.isVector()) { VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, DAG.getUNDEF(VecVT), In1); VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, DAG.getUNDEF(VecVT), In2); } else { VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); } } else { llvm_unreachable("Invalid type for copysign!"); } SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT); // If we couldn't materialize the mask above, then the mask vector will be // the zero vector, and we need to negate it here. if (VT == MVT::f64 || VT == MVT::v2f64) { BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); } SDValue Sel = DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); if (VT == MVT::f32) return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel); else if (VT == MVT::f64) return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel); else return DAG.getNode(ISD::BITCAST, DL, VT, Sel); } SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { if (DAG.getMachineFunction().getFunction()->hasFnAttribute( Attribute::NoImplicitFloat)) return SDValue(); if (!Subtarget->hasNEON()) return SDValue(); // While there is no integer popcount instruction, it can // be more efficiently lowered to the following sequence that uses // AdvSIMD registers/instructions as long as the copies to/from // the AdvSIMD registers are cheap. // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd // CNT V0.8B, V0.8B // 8xbyte pop-counts // ADDV B0, V0.8B // sum 8xbyte pop-counts // UMOV X0, V0.B[0] // copy byte result back to integer reg SDValue Val = Op.getOperand(0); SDLoc DL(Op); EVT VT = Op.getValueType(); if (VT == MVT::i32) Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val); Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val); SDValue UaddLV = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); if (VT == MVT::i64) UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); return UaddLV; } SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc dl(Op); // We chose ZeroOrOneBooleanContents, so use zero and one. EVT VT = Op.getValueType(); SDValue TVal = DAG.getConstant(1, dl, VT); SDValue FVal = DAG.getConstant(0, dl, VT); // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); // If softenSetCCOperands returned a scalar, use it. if (!RHS.getNode()) { assert(LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"); return LHS; } } if (LHS.getValueType().isInteger()) { SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be // matched to a single CSINC instruction. return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); } // Now we know we're dealing with FP values. assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead // and do the comparison. SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); if (CC2 == AArch64CC::AL) { changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be // matched to a single CSINC instruction. return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); } else { // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't // totally clean. Some of them require two CSELs to implement. As is in // this case, we emit the first CSEL and then emit a second using the output // of the first as the RHS. We're effectively OR'ing the two CC's together. // FIXME: It would be nice if we could match the two CSELs to two CSINCs. SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } } SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, const SDLoc &dl, SelectionDAG &DAG) const { // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } // Also handle f16, for which we need to do a f32 comparison. if (LHS.getValueType() == MVT::f16) { LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); } // Next, handle integers. if (LHS.getValueType().isInteger()) { assert((LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); unsigned Opcode = AArch64ISD::CSEL; // If both the TVal and the FVal are constants, see if we can swap them in // order to for a CSINV or CSINC out of them. ConstantSDNode *CFVal = dyn_cast(FVal); ConstantSDNode *CTVal = dyn_cast(TVal); if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); } else if (TVal.getOpcode() == ISD::XOR) { // If TVal is a NOT we want to swap TVal and FVal so that we can match // with a CSINV rather than a CSEL. if (isAllOnesConstant(TVal.getOperand(1))) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); } } else if (TVal.getOpcode() == ISD::SUB) { // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so // that we can match with a CSNEG rather than a CSEL. if (isNullConstant(TVal.getOperand(0))) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); } } else if (CTVal && CFVal) { const int64_t TrueVal = CTVal->getSExtValue(); const int64_t FalseVal = CFVal->getSExtValue(); bool Swap = false; // If both TVal and FVal are constants, see if FVal is the // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC // instead of a CSEL in that case. if (TrueVal == ~FalseVal) { Opcode = AArch64ISD::CSINV; } else if (TrueVal == -FalseVal) { Opcode = AArch64ISD::CSNEG; } else if (TVal.getValueType() == MVT::i32) { // If our operands are only 32-bit wide, make sure we use 32-bit // arithmetic for the check whether we can use CSINC. This ensures that // the addition in the check will wrap around properly in case there is // an overflow (which would not be the case if we do the check with // 64-bit arithmetic). const uint32_t TrueVal32 = CTVal->getZExtValue(); const uint32_t FalseVal32 = CFVal->getZExtValue(); if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) { Opcode = AArch64ISD::CSINC; if (TrueVal32 > FalseVal32) { Swap = true; } } // 64-bit check whether we can use CSINC. } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) { Opcode = AArch64ISD::CSINC; if (TrueVal > FalseVal) { Swap = true; } } // Swap TVal and FVal if necessary. if (Swap) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); } if (Opcode != AArch64ISD::CSEL) { // Drop FVal since we can get its value by simply inverting/negating // TVal. FVal = TVal; } } // Avoid materializing a constant when possible by reusing a known value in // a register. However, don't perform this optimization if the known value // is one, zero or negative one in the case of a CSEL. We can always // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the // FVal, respectively. ConstantSDNode *RHSVal = dyn_cast(RHS); if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() && !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) { AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to // "a != C ? x : a" to avoid materializing C. if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ) TVal = LHS; else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE) FVal = LHS; } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) { assert (CTVal && CFVal && "Expected constant operands for CSNEG."); // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to // avoid materializing C. AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) { Opcode = AArch64ISD::CSINV; TVal = LHS; FVal = DAG.getConstant(0, dl, FVal.getValueType()); } } SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); EVT VT = TVal.getValueType(); return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); } // Now we know we're dealing with FP values. assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); assert(LHS.getValueType() == RHS.getValueType()); EVT VT = TVal.getValueType(); SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two CSELs to implement. AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); if (DAG.getTarget().Options.UnsafeFPMath) { // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0. ConstantFPSDNode *RHSVal = dyn_cast(RHS); if (RHSVal && RHSVal->isZero()) { ConstantFPSDNode *CFVal = dyn_cast(FVal); ConstantFPSDNode *CTVal = dyn_cast(TVal); if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) && CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType()) TVal = LHS; else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) && CFVal && CFVal->isZero() && FVal.getValueType() == LHS.getValueType()) FVal = LHS; } } // Emit first, and possibly only, CSEL. SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); // If we need a second CSEL, emit it, using the output of the first as the // RHS. We're effectively OR'ing the two CC's together. if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } // Otherwise, return the output of the first CSEL. return CS1; } SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TVal = Op.getOperand(2); SDValue FVal = Op.getOperand(3); SDLoc DL(Op); return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); } SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCVal = Op->getOperand(0); SDValue TVal = Op->getOperand(1); SDValue FVal = Op->getOperand(2); SDLoc DL(Op); unsigned Opc = CCVal.getOpcode(); // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select // instruction. if (CCVal.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) return SDValue(); AArch64CC::CondCode OFCC; SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32); return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, Overflow); } // Lower it the same way as we would lower a SELECT_CC node. ISD::CondCode CC; SDValue LHS, RHS; if (CCVal.getOpcode() == ISD::SETCC) { LHS = CCVal.getOperand(0); RHS = CCVal.getOperand(1); CC = cast(CCVal->getOperand(2))->get(); } else { LHS = CCVal; RHS = DAG.getConstant(0, DL, CCVal.getValueType()); CC = ISD::SETNE; } return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); } SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( AArch64ISD::WrapperLarge, DL, PtrVT, DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G0 | MO_NC)); } SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { SDValue GotAddr = DAG.getTargetConstantPool( CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_GOT); return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); } const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( AArch64ISD::WrapperLarge, DL, PtrVT, DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_G3), DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_G2 | MO_NC), DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_G1 | MO_NC), DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_G0 | MO_NC)); } else { // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on // ELF, the only valid one on Darwin. SDValue Hi = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_PAGE); SDValue Lo = DAG.getTargetConstantPool( CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC); SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast(Op)->getBlockAddress(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( AArch64ISD::WrapperLarge, DL, PtrVT, DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); } else { SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } } SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const { AArch64FunctionInfo *FuncInfo = DAG.getMachineFunction().getInfo(); SDLoc DL(Op); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy(DAG.getDataLayout())); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call // Standard, section B.3. MachineFunction &MF = DAG.getMachineFunction(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); SDValue VAList = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); SmallVector MemOps; // void *__stack at offset 0 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT); MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, MachinePointerInfo(SV), /* Alignment = */ 8)); // void *__gr_top at offset 8 int GPRSize = FuncInfo->getVarArgsGPRSize(); if (GPRSize > 0) { SDValue GRTop, GRTopAddr; GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT)); GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT); GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop, DAG.getConstant(GPRSize, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, MachinePointerInfo(SV, 8), /* Alignment = */ 8)); } // void *__vr_top at offset 16 int FPRSize = FuncInfo->getVarArgsFPRSize(); if (FPRSize > 0) { SDValue VRTop, VRTopAddr; VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(16, DL, PtrVT)); VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT); VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop, DAG.getConstant(FPRSize, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, MachinePointerInfo(SV, 16), /* Alignment = */ 8)); } // int __gr_offs at offset 24 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT)); MemOps.push_back(DAG.getStore( Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr, MachinePointerInfo(SV, 24), /* Alignment = */ 4)); // int __vr_offs at offset 28 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT)); MemOps.push_back(DAG.getStore( Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr, MachinePointerInfo(SV, 28), /* Alignment = */ 4)); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) : LowerAAPCS_VASTART(Op, DAG); } SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, DL, MVT::i32), 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"); const Value *V = cast(Op.getOperand(2))->getValue(); EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); unsigned Align = Op.getConstantOperandVal(3); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V)); Chain = VAList.getValue(1); if (Align > 8) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align - 1, DL, PtrVT)); VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, DAG.getConstant(-(int64_t)Align, DL, PtrVT)); } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); // Scalar integer and FP values smaller than 64 bits are implicitly extended // up to 64 bits. At the very least, we have to increase the striding of the // vaargs list to match this, and for FP values we need to introduce // FP_ROUND nodes as well. if (VT.isInteger() && !VT.isVector()) ArgSize = 8; bool NeedFPTrunc = false; if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { ArgSize = 8; NeedFPTrunc = true; } // Increment the pointer, VAList, to the next vaarg SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(ArgSize, DL, PtrVT)); // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V)); // Load the actual argument out of the pointer VAList if (NeedFPTrunc) { // Load the value as an f64. SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo()); // Round the value down to an f32. SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), DAG.getIntPtrConstant(1, DL)); SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; // Merge the rounded value with the chain output of the load. return DAG.getMergeValues(Ops, DL); } return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo()); } SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { unsigned Reg = StringSwitch(RegName) .Case("sp", AArch64::SP) .Default(0); if (Reg) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); } SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout())); return DAG.getLoad(VT, DL, DAG.getEntryNode(), DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which // is "undef". We wanted 0, so CSEL it directly. SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ, dl, DAG); SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); HiBitsForLo = DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), HiBitsForLo, CCVal, Cmp); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, DAG.getConstant(VTBits, dl, MVT::i64)); SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); SDValue LoForNormalShift = DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo); Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, dl, DAG); CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, LoForNormalShift, CCVal, Cmp); // AArch64 shifts larger than the register width are wrapped rather than // clamped, so we can't just emit "hi >> x". SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiForBigShift = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, dl, MVT::i64)) : DAG.getConstant(0, dl, VT); SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, HiForNormalShift, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which // is "undef". We wanted 0, so CSEL it directly. SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ, dl, DAG); SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); LoBitsForHi = DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), LoBitsForHi, CCVal, Cmp); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, DAG.getConstant(VTBits, dl, MVT::i64)); SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); SDValue HiForNormalShift = DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi); SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, dl, DAG); CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, HiForNormalShift, CCVal, Cmp); // AArch64 shifts of larger than register sizes are wrapped rather than // clamped, so we can't just emit "lo << a" if a is too big. SDValue LoForBigShift = DAG.getConstant(0, dl, VT); SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, LoForNormalShift, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } bool AArch64TargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode *GA) const { // The AArch64 target doesn't support folding offsets into global addresses. return false; } bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. // FIXME: We should be able to handle f128 as well with a clever lowering. if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) return true; if (VT == MVT::f64) return AArch64_AM::getFP64Imm(Imm) != -1; else if (VT == MVT::f32) return AArch64_AM::getFP32Imm(Imm) != -1; return false; } //===----------------------------------------------------------------------===// // AArch64 Optimization Hooks //===----------------------------------------------------------------------===// static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps) { EVT VT = Operand.getValueType(); if (ST->hasNEON() && (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 || VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 || VT == MVT::v4f32)) { if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified) // For the reciprocal estimates, convergence is quadratic, so the number // of digits is doubled after each iteration. In ARMv8, the accuracy of // the initial estimate is 2^-8. Thus the number of extra steps to refine // the result for float (23 mantissa bits) is 2 and for double (52 // mantissa bits) is 3. ExtraSteps = VT == MVT::f64 ? 3 : 2; return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand); } return SDValue(); } SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const { if (Enabled == ReciprocalEstimate::Enabled || (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt())) if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand, DAG, ExtraSteps)) { SDLoc DL(Operand); EVT VT = Operand.getValueType(); SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2) // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) for (int i = ExtraSteps; i > 0; --i) { SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, &Flags); Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags); Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags); } if (!Reciprocal) { EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ); Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags); // Correct the result if the operand is 0.0. Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, Eq, Operand, Estimate); } ExtraSteps = 0; return Estimate; } return SDValue(); } SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps) const { if (Enabled == ReciprocalEstimate::Enabled) if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand, DAG, ExtraSteps)) { SDLoc DL(Operand); EVT VT = Operand.getValueType(); SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); // Newton reciprocal iteration: E * (2 - X * E) // AArch64 reciprocal iteration instruction: (2 - M * N) for (int i = ExtraSteps; i > 0; --i) { SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, Estimate, &Flags); Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags); } ExtraSteps = 0; return Estimate; } return SDValue(); } //===----------------------------------------------------------------------===// // AArch64 Inline Assembly Support //===----------------------------------------------------------------------===// // Table of Constraints // TODO: This is the current set of constraints supported by ARM for the // compiler, not all of them may make sense, e.g. S may be difficult to support. // // r - A general register // w - An FP/SIMD register of some size in the range v0-v31 // x - An FP/SIMD register of some size in the range v0-v15 // I - Constant that can be used with an ADD instruction // J - Constant that can be used with a SUB instruction // K - Constant that can be used with a 32-bit logical instruction // L - Constant that can be used with a 64-bit logical instruction // M - Constant that can be used as a 32-bit MOV immediate // N - Constant that can be used as a 64-bit MOV immediate // Q - A memory reference with base register and no offset // S - A symbolic address // Y - Floating point constant zero // Z - Integer constant zero // // Note that general register operands will be output using their 64-bit x // register name, whatever the size of the variable, unless the asm operand // is prefixed by the %w modifier. Floating-point and SIMD register operands // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or // %q modifier. const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const { // At this point, we have to lower this constraint to something else, so we // lower it to an "r" or "w". However, by doing this we will force the result // to be in register, while the X constraint is much more permissive. // // Although we are correct (we are free to emit anything, without // constraints), we might break use cases that would expect us to be more // efficient and emit something else. if (!Subtarget->hasFPARMv8()) return "r"; if (ConstraintVT.isFloatingPoint()) return "w"; if (ConstraintVT.isVector() && (ConstraintVT.getSizeInBits() == 64 || ConstraintVT.getSizeInBits() == 128)) return "w"; return "r"; } /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. AArch64TargetLowering::ConstraintType AArch64TargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'z': return C_Other; case 'x': case 'w': return C_RegisterClass; // An address with a single base register. Due to the way we // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; } } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight AArch64TargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'x': case 'w': if (type->isFloatingPointTy() || type->isVectorTy()) weight = CW_Register; break; case 'z': weight = CW_Constant; break; } return weight; } std::pair AArch64TargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': if (VT.getSizeInBits() == 64) return std::make_pair(0U, &AArch64::GPR64commonRegClass); return std::make_pair(0U, &AArch64::GPR32commonRegClass); case 'w': if (VT.getSizeInBits() == 16) return std::make_pair(0U, &AArch64::FPR16RegClass); if (VT.getSizeInBits() == 32) return std::make_pair(0U, &AArch64::FPR32RegClass); if (VT.getSizeInBits() == 64) return std::make_pair(0U, &AArch64::FPR64RegClass); if (VT.getSizeInBits() == 128) return std::make_pair(0U, &AArch64::FPR128RegClass); break; // The instructions that this constraint is designed for can // only take 128-bit registers so just use that regclass. case 'x': if (VT.getSizeInBits() == 128) return std::make_pair(0U, &AArch64::FPR128_loRegClass); break; } } if (StringRef("{cc}").equals_lower(Constraint)) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // Not found as a standard register? if (!Res.second) { unsigned Size = Constraint.size(); if ((Size == 4 || Size == 5) && Constraint[0] == '{' && tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { int RegNo; bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo); if (!Failed && RegNo >= 0 && RegNo <= 31) { // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size. // By default we'll emit v0-v31 for this unless there's a modifier where // we'll emit the correct register as well. if (VT != MVT::Other && VT.getSizeInBits() == 64) { Res.first = AArch64::FPR64RegClass.getRegister(RegNo); Res.second = &AArch64::FPR64RegClass; } else { Res.first = AArch64::FPR128RegClass.getRegister(RegNo); Res.second = &AArch64::FPR128RegClass; } } } } return Res; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void AArch64TargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; // This set of constraints deal with valid constants for various instructions. // Validate and return a target constant for them if we can. case 'z': { // 'z' maps to xzr or wzr so it needs an input of 0. if (!isNullConstant(Op)) return; if (Op.getValueType() == MVT::i64) Result = DAG.getRegister(AArch64::XZR, MVT::i64); else Result = DAG.getRegister(AArch64::WZR, MVT::i32); break; } case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': ConstantSDNode *C = dyn_cast(Op); if (!C) return; // Grab the value and do some validation. uint64_t CVal = C->getZExtValue(); switch (ConstraintLetter) { // The I constraint applies only to simple ADD or SUB immediate operands: // i.e. 0 to 4095 with optional shift by 12 // The J constraint applies only to ADD or SUB immediates that would be // valid when negated, i.e. if [an add pattern] were to be output as a SUB // instruction [or vice versa], in other words -1 to -4095 with optional // left shift by 12. case 'I': if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal)) break; return; case 'J': { uint64_t NVal = -C->getSExtValue(); if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) { CVal = C->getSExtValue(); break; } return; } // The K and L constraints apply *only* to logical immediates, including // what used to be the MOVI alias for ORR (though the MOVI alias has now // been removed and MOV should be used). So these constraints have to // distinguish between bit patterns that are valid 32-bit or 64-bit // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice // versa. case 'K': if (AArch64_AM::isLogicalImmediate(CVal, 32)) break; return; case 'L': if (AArch64_AM::isLogicalImmediate(CVal, 64)) break; return; // The M and N constraints are a superset of K and L respectively, for use // with the MOV (immediate) alias. As well as the logical immediates they // also match 32 or 64-bit immediates that can be loaded either using a // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca // (M) or 64-bit 0x1234000000000000 (N) etc. // As a note some of this code is liberally stolen from the asm parser. case 'M': { if (!isUInt<32>(CVal)) return; if (AArch64_AM::isLogicalImmediate(CVal, 32)) break; if ((CVal & 0xFFFF) == CVal) break; if ((CVal & 0xFFFF0000ULL) == CVal) break; uint64_t NCVal = ~(uint32_t)CVal; if ((NCVal & 0xFFFFULL) == NCVal) break; if ((NCVal & 0xFFFF0000ULL) == NCVal) break; return; } case 'N': { if (AArch64_AM::isLogicalImmediate(CVal, 64)) break; if ((CVal & 0xFFFFULL) == CVal) break; if ((CVal & 0xFFFF0000ULL) == CVal) break; if ((CVal & 0xFFFF00000000ULL) == CVal) break; if ((CVal & 0xFFFF000000000000ULL) == CVal) break; uint64_t NCVal = ~CVal; if ((NCVal & 0xFFFFULL) == NCVal) break; if ((NCVal & 0xFFFF0000ULL) == NCVal) break; if ((NCVal & 0xFFFF00000000ULL) == NCVal) break; if ((NCVal & 0xFFFF000000000000ULL) == NCVal) break; return; } default: return; } // All assembler immediates are 64-bit integers. Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64); break; } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } //===----------------------------------------------------------------------===// // AArch64 Advanced SIMD Support //===----------------------------------------------------------------------===// /// WidenVector - Given a value in the V64 register class, produce the /// equivalent value in the V128 register class. static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) { EVT VT = V64Reg.getValueType(); unsigned NarrowSize = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType().getSimpleVT(); MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); SDLoc DL(V64Reg); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), V64Reg, DAG.getConstant(0, DL, MVT::i32)); } /// getExtFactor - Determine the adjustment factor for the position when /// generating an "extract from vector registers" instruction. static unsigned getExtFactor(SDValue &V) { EVT EltType = V.getValueType().getVectorElementType(); return EltType.getSizeInBits() / 8; } /// NarrowVector - Given a value in the V128 register class, produce the /// equivalent value in the V64 register class. static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { EVT VT = V128Reg.getValueType(); unsigned WideSize = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType().getSimpleVT(); MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); SDLoc DL(V128Reg); return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg); } // Gather data to see if the operation can be modelled as a // shuffle in combination with VEXTs. SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); struct ShuffleSourceInfo { SDValue Vec; unsigned MinElt; unsigned MaxElt; // We may insert some combination of BITCASTs and VEXT nodes to force Vec to // be compatible with the shuffle we intend to construct. As a result // ShuffleVec will be some sliding window into the original Vec. SDValue ShuffleVec; // Code should guarantee that element i in Vec starts at element "WindowBase // + i * WindowScale in ShuffleVec". int WindowBase; int WindowScale; ShuffleSourceInfo(SDValue Vec) : Vec(Vec), MinElt(std::numeric_limits::max()), MaxElt(0), ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } }; // First gather all vectors used as an immediate source for this BUILD_VECTOR // node. SmallVector Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(V.getOperand(1))) { // A shuffle can only come from building a vector from various // elements of other vectors, provided their indices are constant. return SDValue(); } // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); auto Source = find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } // Currently only do something sane when at most two source vectors // are involved. if (Sources.size() > 2) return SDValue(); // Find out the smallest element size among result and two sources, and use // it as element size to build the shuffle_vector. EVT SmallestEltTy = VT.getVectorElementType(); for (auto &Source : Sources) { EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); if (SrcEltTy.bitsLT(SmallestEltTy)) { SmallestEltTy = SrcEltTy; } } unsigned ResMultiplier = VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits(); NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); // If the source vector is too wide or too narrow, we may nevertheless be able // to construct a compatible shuffle either by concatenating it with UNDEF or // extracting a suitable range of elements. for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); if (SrcVT.getSizeInBits() == VT.getSizeInBits()) continue; // This stage of the search produces a source with the same element type as // the original, but with a total width matching the BUILD_VECTOR output. EVT EltVT = SrcVT.getVectorElementType(); unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); if (SrcVT.getSizeInBits() < VT.getSizeInBits()) { assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits()); // We can pad out the smaller vector for free, so if it's part of a // shuffle... Src.ShuffleVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, DAG.getUNDEF(Src.ShuffleVec.getValueType())); continue; } assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()); if (Src.MaxElt - Src.MinElt >= NumSrcElts) { // Span too large for a VEXT to cope return SDValue(); } if (Src.MinElt >= NumSrcElts) { // The extraction can just take the second half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i64)); Src.WindowBase = -NumSrcElts; } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i64)); } else { // An actual VEXT is needed SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i64)); SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i64)); unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1); Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } // Another possible incompatibility occurs from the vector element types. We // can fix this by bitcasting the source vectors to the same type we intend // for the shuffle. for (auto &Src : Sources) { EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); if (SrcEltTy == SmallestEltTy) continue; assert(ShuffleVT.getVectorElementType() == SmallestEltTy); Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits(); Src.WindowBase *= Src.WindowScale; } // Final sanity check before we try to actually produce a shuffle. DEBUG( for (auto Src : Sources) assert(Src.ShuffleVec.getValueType() == ShuffleVT); ); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); if (Entry.isUndef()) continue; auto Src = find(Sources, Entry.getOperand(0)); int EltNo = cast(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit // trunc. So only std::min(SrcBits, DestBits) actually get defined in this // segment. EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); int BitsDefined = std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits()); int LanesDefined = BitsDefined / BitsPerShuffleLane; // This source is expected to fill ResMultiplier lanes of the final shuffle, // starting at the appropriate offset. int *LaneMask = &Mask[i * ResMultiplier]; int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; ExtractBase += NumElts * (Src - Sources.begin()); for (int j = 0; j < LanesDefined; ++j) LaneMask[j] = ExtractBase + j; } // Final check before we try to produce nonsense... if (!isShuffleMaskLegal(Mask, ShuffleVT)) return SDValue(); SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } // check if an EXT instruction can handle the shuffle mask when the // vector sources of the shuffle are the same. static bool isSingletonEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, just follow it // back to index zero and keep going. ++ExpectedElt; if (ExpectedElt == NumElts) ExpectedElt = 0; if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } return true; } // check if an EXT instruction can handle the shuffle mask when the // vector sources of the shuffle are different. static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, unsigned &Imm) { // Look for the first non-undef element. const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); // Benefit form APInt to handle overflow when calculating expected element. unsigned NumElts = VT.getVectorNumElements(); unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); // The following shuffle indices must be the successive elements after the // first real element. const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(), [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;}); if (FirstWrongElt != M.end()) return false; // The index of an EXT is the first element if it is not UNDEF. // Watch out for the beginning UNDEFs. The EXT index should be the expected // value of the first element. E.g. // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. // ExpectedElt is the last mask index plus 1. Imm = ExpectedElt.getZExtValue(); // There are two difference cases requiring to reverse input vectors. // For example, for vector <4 x i32> we have the following cases, // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) // For both cases, we finally use mask <5, 6, 7, 0>, which requires // to reverse two input vectors. if (Imm < NumElts) ReverseEXT = true; else Imm -= NumElts; return true; } /// isREVMask - Check if a vector shuffle corresponds to a REV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"); unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; // If the first shuffle index is UNDEF, be optimistic. if (M[0] < 0) BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) return false; } return true; } static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != Idx) || (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) return false; Idx += 1; } return true; } static bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned)M[i] != 2 * i + WhichResult) return false; } return true; } static bool isTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult)) return false; } return true; } /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. static bool isZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != Idx) || (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx)) return false; Idx += 1; } return true; } /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, static bool isUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned Half = VT.getVectorNumElements() / 2; WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned j = 0; j != 2; ++j) { unsigned Idx = WhichResult; for (unsigned i = 0; i != Half; ++i) { int MIdx = M[i + j * Half]; if (MIdx >= 0 && (unsigned)MIdx != Idx) return false; Idx += 2; } } return true; } /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. static bool isTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult)) return false; } return true; } static bool isINSMask(ArrayRef M, int NumInputElements, bool &DstIsLeft, int &Anomaly) { if (M.size() != static_cast(NumInputElements)) return false; int NumLHSMatch = 0, NumRHSMatch = 0; int LastLHSMismatch = -1, LastRHSMismatch = -1; for (int i = 0; i < NumInputElements; ++i) { if (M[i] == -1) { ++NumLHSMatch; ++NumRHSMatch; continue; } if (M[i] == i) ++NumLHSMatch; else LastLHSMismatch = i; if (M[i] == i + NumInputElements) ++NumRHSMatch; else LastRHSMismatch = i; } if (NumLHSMatch == NumInputElements - 1) { DstIsLeft = true; Anomaly = LastLHSMismatch; return true; } else if (NumRHSMatch == NumInputElements - 1) { DstIsLeft = false; Anomaly = LastRHSMismatch; return true; } return false; } static bool isConcatMask(ArrayRef Mask, EVT VT, bool SplitLHS) { if (VT.getSizeInBits() != 128) return false; unsigned NumElts = VT.getVectorNumElements(); for (int I = 0, E = NumElts / 2; I != E; I++) { if (Mask[I] != I) return false; } int Offset = NumElts / 2; for (int I = NumElts / 2, E = NumElts; I != E; I++) { if (Mask[I] != I + SplitLHS * Offset) return false; } return true; } static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue V0 = Op.getOperand(0); SDValue V1 = Op.getOperand(1); ArrayRef Mask = cast(Op)->getMask(); if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() || VT.getVectorElementType() != V1.getValueType().getVectorElementType()) return SDValue(); bool SplitV0 = V0.getValueSizeInBits() == 128; if (!isConcatMask(Mask, VT, SplitV0)) return SDValue(); EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), VT.getVectorNumElements() / 2); if (SplitV0) { V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, DAG.getConstant(0, DL, MVT::i64)); } if (V1.getValueSizeInBits() == 128) { V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, DAG.getConstant(0, DL, MVT::i64)); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VREV, OP_VDUP0, OP_VDUP1, OP_VDUP2, OP_VDUP3, OP_VEXT1, OP_VEXT2, OP_VEXT3, OP_VUZPL, // VUZP, left result OP_VUZPR, // VUZP, right result OP_VZIPL, // VZIP, left result OP_VZIPR, // VZIP, right result OP_VTRNL, // VTRN, left result OP_VTRNR // VTRN, right result }; if (OpNum == OP_COPY) { if (LHSID == (1 * 9 + 2) * 9 + 3) return LHS; assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); EVT VT = OpLHS.getValueType(); switch (OpNum) { default: llvm_unreachable("Unknown shuffle opcode!"); case OP_VREV: // VREV divides the vector in half and swaps within the half. if (VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::f32) return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS); // vrev <4 x i16> -> REV32 if (VT.getVectorElementType() == MVT::i16 || VT.getVectorElementType() == MVT::f16) return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS); // vrev <4 x i8> -> REV16 assert(VT.getVectorElementType() == MVT::i8); return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS); case OP_VDUP0: case OP_VDUP1: case OP_VDUP2: case OP_VDUP3: { EVT EltTy = VT.getVectorElementType(); unsigned Opcode; if (EltTy == MVT::i8) Opcode = AArch64ISD::DUPLANE8; else if (EltTy == MVT::i16 || EltTy == MVT::f16) Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) Opcode = AArch64ISD::DUPLANE32; else if (EltTy == MVT::i64 || EltTy == MVT::f64) Opcode = AArch64ISD::DUPLANE64; else llvm_unreachable("Invalid vector element type?"); if (VT.getSizeInBits() == 64) OpLHS = WidenVector(OpLHS, DAG); SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64); return DAG.getNode(Opcode, dl, VT, OpLHS, Lane); } case OP_VEXT1: case OP_VEXT2: case OP_VEXT3: { unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS, DAG.getConstant(Imm, dl, MVT::i32)); } case OP_VUZPL: return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); case OP_VUZPR: return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); case OP_VZIPL: return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); case OP_VZIPR: return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); case OP_VTRNL: return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); case OP_VTRNR: return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); } } static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the TBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc DL(Op); EVT EltVT = Op.getValueType().getVectorElementType(); unsigned BytesPerElt = EltVT.getSizeInBits() / 8; SmallVector TBLMask; for (int Val : ShuffleMask) { for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { unsigned Offset = Byte + Val * BytesPerElt; TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32)); } } MVT IndexVT = MVT::v8i8; unsigned IndexLen = 8; if (Op.getValueSizeInBits() == 128) { IndexVT = MVT::v16i8; IndexLen = 16; } SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1); SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2); SDValue Shuffle; if (V2.getNode()->isUndef()) { if (IndexLen == 8) V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst, DAG.getBuildVector(IndexVT, DL, makeArrayRef(TBLMask.data(), IndexLen))); } else { if (IndexLen == 8) { V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst, DAG.getBuildVector(IndexVT, DL, makeArrayRef(TBLMask.data(), IndexLen))); } else { // FIXME: We cannot, for the moment, emit a TBL2 instruction because we // cannot currently represent the register constraints on the input // table registers. // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst, // DAG.getBuildVector(IndexVT, DL, &TBLMask[0], // IndexLen)); Shuffle = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst, V2Cst, DAG.getBuildVector(IndexVT, DL, makeArrayRef(TBLMask.data(), IndexLen))); } } return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle); } static unsigned getDUPLANEOp(EVT EltType) { if (EltType == MVT::i8) return AArch64ISD::DUPLANE8; if (EltType == MVT::i16 || EltType == MVT::f16) return AArch64ISD::DUPLANE16; if (EltType == MVT::i32 || EltType == MVT::f32) return AArch64ISD::DUPLANE32; if (EltType == MVT::i64 || EltType == MVT::f64) return AArch64ISD::DUPLANE64; llvm_unreachable("Invalid vector element type?"); } SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again // during code selection. This is more efficient and avoids the possibility // of inconsistencies between legalization and selection. ArrayRef ShuffleMask = SVN->getMask(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(), V1.getOperand(0)); // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- // constant. If so, we can just reference the lane's definition directly. if (V1.getOpcode() == ISD::BUILD_VECTOR && !isa(V1.getOperand(Lane))) return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane)); // Otherwise, duplicate from the lane of the input vector. unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); // SelectionDAGBuilder may have "helpfully" already extracted or conatenated // to make a vector of the same size as this SHUFFLE. We can ignore the // extract entirely, and canonicalise the concat using WidenVector. if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { Lane += cast(V1.getOperand(1))->getZExtValue(); V1 = V1.getOperand(0); } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) { unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2; Lane -= Idx * VT.getVectorNumElements() / 2; V1 = WidenVector(V1.getOperand(Idx), DAG); } else if (VT.getSizeInBits() == 64) V1 = WidenVector(V1, DAG); return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64)); } if (isREVMask(ShuffleMask, VT, 64)) return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2); if (isREVMask(ShuffleMask, VT, 32)) return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2); if (isREVMask(ShuffleMask, VT, 16)) return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2); bool ReverseEXT = false; unsigned Imm; if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) { if (ReverseEXT) std::swap(V1, V2); Imm *= getExtFactor(V1); return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2, DAG.getConstant(Imm, dl, MVT::i32)); } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) { Imm *= getExtFactor(V1); return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1, DAG.getConstant(Imm, dl, MVT::i32)); } unsigned WhichResult; if (isZIPMask(ShuffleMask, VT, WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); } if (isUZPMask(ShuffleMask, VT, WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); } if (isTRNMask(ShuffleMask, VT, WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); } if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); } if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); } if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) { unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); } if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG)) return Concat; bool DstIsLeft; int Anomaly; int NumInputElements = V1.getValueType().getVectorNumElements(); if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) { SDValue DstVec = DstIsLeft ? V1 : V2; SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64); SDValue SrcVec = V1; int SrcLane = ShuffleMask[Anomaly]; if (SrcLane >= NumInputElements) { SrcVec = V2; SrcLane -= VT.getVectorNumElements(); } SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64); EVT ScalarVT = VT.getVectorElementType(); if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger()) ScalarVT = MVT::i32; return DAG.getNode( ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV), DstLaneV); } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. unsigned NumElts = VT.getVectorNumElements(); if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = ShuffleMask[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + PFIndexes[2] * 9 + PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } return GenerateTBL(Op, ShuffleMask, DAG); } static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits) { EVT VT = BVN->getValueType(0); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { unsigned NumSplats = VT.getSizeInBits() / SplatBitSize; for (unsigned i = 0; i < NumSplats; ++i) { CnstBits <<= SplatBitSize; UndefBits <<= SplatBitSize; CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits()); UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits()); } return true; } return false; } SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, SelectionDAG &DAG) const { BuildVectorSDNode *BVN = dyn_cast(Op.getOperand(1).getNode()); SDValue LHS = Op.getOperand(0); SDLoc dl(Op); EVT VT = Op.getValueType(); if (!BVN) return Op; APInt CnstBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); if (resolveBuildVector(BVN, CnstBits, UndefBits)) { // We only have BIC vector immediate instruction, which is and-not. CnstBits = ~CnstBits; // We make use of a little bit of goto ickiness in order to avoid having to // duplicate the immediate matching logic for the undef toggled case. bool SecondTry = false; AttemptModImm: if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { CnstBits = CnstBits.zextOrTrunc(64); uint64_t CnstVal = CnstBits.getZExtValue(); if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } if (SecondTry) goto FailedModImm; SecondTry = true; CnstBits = ~UndefBits; goto AttemptModImm; } // We can always fall back to a non-immediate AND. FailedModImm: return Op; } // Specialized code to quickly find if PotentialBVec is a BuildVector that // consists of only the same constant int value, returned in reference arg // ConstVal static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal) { BuildVectorSDNode *Bvec = dyn_cast(PotentialBVec); if (!Bvec) return false; ConstantSDNode *FirstElt = dyn_cast(Bvec->getOperand(0)); if (!FirstElt) return false; EVT VT = Bvec->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 1; i < NumElts; ++i) if (dyn_cast(Bvec->getOperand(i)) != FirstElt) return false; ConstVal = FirstElt->getZExtValue(); return true; } static unsigned getIntrinsicID(const SDNode *N) { unsigned Opcode = N->getOpcode(); switch (Opcode) { default: return Intrinsic::not_intrinsic; case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast(N->getOperand(0))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) return IID; return Intrinsic::not_intrinsic; } } } // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a // BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2. // Also, logical shift right -> sri, with the same structure. static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); if (!VT.isVector()) return SDValue(); SDLoc DL(N); // Is the first op an AND? const SDValue And = N->getOperand(0); if (And.getOpcode() != ISD::AND) return SDValue(); // Is the second op an shl or lshr? SDValue Shift = N->getOperand(1); // This will have been turned into: AArch64ISD::VSHL vector, #shift // or AArch64ISD::VLSHR vector, #shift unsigned ShiftOpc = Shift.getOpcode(); if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR)) return SDValue(); bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR; // Is the shift amount constant? ConstantSDNode *C2node = dyn_cast(Shift.getOperand(1)); if (!C2node) return SDValue(); // Is the and mask vector all constant? uint64_t C1; if (!isAllConstantBuildVector(And.getOperand(1), C1)) return SDValue(); // Is C1 == ~C2, taking into account how much one can shift elements of a // particular size? uint64_t C2 = C2node->getZExtValue(); unsigned ElemSizeInBits = VT.getScalarSizeInBits(); if (C2 > ElemSizeInBits) return SDValue(); unsigned ElemMask = (1 << ElemSizeInBits) - 1; if ((C1 & ElemMask) != (~C2 & ElemMask)) return SDValue(); SDValue X = And.getOperand(0); SDValue Y = Shift.getOperand(0); unsigned Intrin = IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli; SDValue ResultSLI = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrin, DL, MVT::i32), X, Y, Shift.getOperand(1)); DEBUG(dbgs() << "aarch64-lower: transformed: \n"); DEBUG(N->dump(&DAG)); DEBUG(dbgs() << "into: \n"); DEBUG(ResultSLI->dump(&DAG)); ++NumShiftInserts; return ResultSLI; } SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, SelectionDAG &DAG) const { // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) if (EnableAArch64SlrGeneration) { if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG)) return Res; } BuildVectorSDNode *BVN = dyn_cast(Op.getOperand(0).getNode()); SDValue LHS = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); // OR commutes, so try swapping the operands. if (!BVN) { LHS = Op.getOperand(0); BVN = dyn_cast(Op.getOperand(1).getNode()); } if (!BVN) return Op; APInt CnstBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); if (resolveBuildVector(BVN, CnstBits, UndefBits)) { // We make use of a little bit of goto ickiness in order to avoid having to // duplicate the immediate matching logic for the undef toggled case. bool SecondTry = false; AttemptModImm: if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { CnstBits = CnstBits.zextOrTrunc(64); uint64_t CnstVal = CnstBits.getZExtValue(); if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } if (SecondTry) goto FailedModImm; SecondTry = true; CnstBits = UndefBits; goto AttemptModImm; } // We can always fall back to a non-immediate OR. FailedModImm: return Op; } // Normalize the operands of BUILD_VECTOR. The value of constant operands will // be truncated to fit element width. static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); EVT EltTy= VT.getVectorElementType(); if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16) return Op; SmallVector Ops; for (SDValue Lane : Op->ops()) { if (auto *CstLane = dyn_cast(Lane)) { APInt LowBits(EltTy.getSizeInBits(), CstLane->getZExtValue()); Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32); } Ops.push_back(Lane); } return DAG.getBuildVector(VT, dl, Ops); } SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); Op = NormalizeBuildVector(Op, DAG); BuildVectorSDNode *BVN = cast(Op.getNode()); APInt CnstBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); if (resolveBuildVector(BVN, CnstBits, UndefBits)) { // We make use of a little bit of goto ickiness in order to avoid having to // duplicate the immediate matching logic for the undef toggled case. bool SecondTry = false; AttemptModImm: if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { CnstBits = CnstBits.zextOrTrunc(64); uint64_t CnstVal = CnstBits.getZExtValue(); // Certain magic vector constants (used to express things like NOT // and NEG) are passed through unmodified. This allows codegen patterns // for these operations to match. Special-purpose patterns will lower // these immediates to MOVIs if it proves necessary. if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL)) return Op; // The many faces of MOVI... if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal); if (VT.getSizeInBits() == 128) { SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64, DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // Support the V64 version via subregister insertion. SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64, DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(264, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(272, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // The few faces of FMOV... if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) && VT.getSizeInBits() == 128) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal); SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64, DAG.getConstant(CnstVal, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } // The many faces of MVNI... CnstVal = ~CnstVal; if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(16, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(24, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(8, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(264, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, DAG.getConstant(CnstVal, dl, MVT::i32), DAG.getConstant(272, dl, MVT::i32)); return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); } } if (SecondTry) goto FailedModImm; SecondTry = true; CnstBits = UndefBits; goto AttemptModImm; } FailedModImm: // Scan through the operands to find some interesting properties we can // exploit: // 1) If only one value is used, we can use a DUP, or // 2) if only the low element is not undef, we can just insert that, or // 3) if only one constant value is used (w/ some non-constant lanes), // we can splat the constant value into the whole vector then fill // in the non-constant lanes. // 4) FIXME: If different constant values are used, but we can intelligently // select the values we'll be overwriting for the non-constant // lanes such that we can directly materialize the vector // some other way (MOVI, e.g.), we can be sneaky. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; bool usesOnlyOneConstantValue = true; bool isConstant = true; unsigned NumConstantLanes = 0; SDValue Value; SDValue ConstantValue; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; if (!isa(V) && !isa(V)) isConstant = false; if (isa(V) || isa(V)) { ++NumConstantLanes; if (!ConstantValue.getNode()) ConstantValue = V; else if (ConstantValue != V) usesOnlyOneConstantValue = false; } if (!Value.getNode()) Value = V; else if (V != Value) usesOnlyOneValue = false; } if (!Value.getNode()) return DAG.getUNDEF(VT); if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); // Use DUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. if (usesOnlyOneValue) { if (!isConstant) { if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Value.getValueType() != VT) return DAG.getNode(AArch64ISD::DUP, dl, VT, Value); // This is actually a DUPLANExx operation, which keeps everything vectory. // DUPLANE works on 128-bit vectors, widen it if necessary. SDValue Lane = Value.getOperand(1); Value = Value.getOperand(0); if (Value.getValueSizeInBits() == 64) Value = WidenVector(Value, DAG); unsigned Opcode = getDUPLANEOp(VT.getVectorElementType()); return DAG.getNode(Opcode, dl, VT, Value, Lane); } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector Ops; EVT EltTy = VT.getVectorElementType(); assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"); MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits()); for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); Val = LowerBUILD_VECTOR(Val, DAG); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } } // If there was only one constant value used and for more than one lane, // start by splatting that value, then replace the non-constant lanes. This // is better than the default, which will perform a separate initialization // for each lane. if (NumConstantLanes > 0 && usesOnlyOneConstantValue) { SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); // Now insert the non-constant lanes. for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); if (!isa(V) && !isa(V)) { // Note that type legalization likely mucked about with the VT of the // source operand, so we may have to convert it here before inserting. Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); } } return Val; } // If all elements are constants and the case above didn't get hit, fall back // to the default expansion, which will generate a load from the constant // pool. if (isConstant) return SDValue(); // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { if (SDValue shuffle = ReconstructShuffle(Op, DAG)) return shuffle; } // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // know the default expansion would otherwise fall back on something even // worse. For a vector with one or two non-undef values, that's // scalar_to_vector for the elements followed by a shuffle (provided the // shuffle is valid for the target) and materialization element by element // on the stack followed by a load for everything else. if (!isConstant && !usesOnlyOneValue) { SDValue Vec = DAG.getUNDEF(VT); SDValue Op0 = Op.getOperand(0); unsigned ElemSize = VT.getScalarSizeInBits(); unsigned i = 0; // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to // a) Avoid a RMW dependency on the full vector register, and // b) Allow the register coalescer to fold away the copy if the // value is already in an S or D register. // Do not do this for UNDEF/LOAD nodes because we have better patterns // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR. if (!Op0.isUndef() && Op0.getOpcode() != ISD::LOAD && (ElemSize == 32 || ElemSize == 64)) { unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub; MachineSDNode *N = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, DAG.getTargetConstant(SubIdx, dl, MVT::i32)); Vec = SDValue(N, 0); ++i; } for (; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); } return Vec; } // Just use the default expansion. We failed to find a better alternative. return SDValue(); } SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); // Check for non-constant or out of range lane. EVT VT = Op.getOperand(0).getValueType(); ConstantSDNode *CI = dyn_cast(Op.getOperand(2)); if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) return SDValue(); // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f16) return Op; if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16) return SDValue(); // For V64 types, we perform insertion by expanding the value // to a V128 type and perform the insertion on that. SDLoc DL(Op); SDValue WideVec = WidenVector(Op.getOperand(0), DAG); EVT WideTy = WideVec.getValueType(); SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec, Op.getOperand(1), Op.getOperand(2)); // Re-narrow the resultant vector. return NarrowVector(Node, DAG); } SDValue AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); // Check for non-constant or out of range lane. EVT VT = Op.getOperand(0).getValueType(); ConstantSDNode *CI = dyn_cast(Op.getOperand(1)); if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) return SDValue(); // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f16) return Op; if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16) return SDValue(); // For V64 types, we perform extraction by expanding the value // to a V128 type and perform the extraction on that. SDLoc DL(Op); SDValue WideVec = WidenVector(Op.getOperand(0), DAG); EVT WideTy = WideVec.getValueType(); EVT ExtrTy = WideTy.getVectorElementType(); if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8) ExtrTy = MVT::i32; // For extractions, we just return the result directly. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec, Op.getOperand(1)); } SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getOperand(0).getValueType(); SDLoc dl(Op); // Just in case... if (!VT.isVector()) return SDValue(); ConstantSDNode *Cst = dyn_cast(Op.getOperand(1)); if (!Cst) return SDValue(); unsigned Val = Cst->getZExtValue(); unsigned Size = Op.getValueSizeInBits(); // This will get lowered to an appropriate EXTRACT_SUBREG in ISel. if (Val == 0) return Op; // If this is extracting the upper 64-bits of a 128-bit vector, we match // that directly. if (Size == 64 && Val * VT.getScalarSizeInBits() == 64) return Op; return SDValue(); } bool AArch64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (M[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = M[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + PFIndexes[2] * 9 + PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return true; } bool DummyBool; int DummyInt; unsigned DummyUnsigned; return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) || isREVMask(M, VT, 32) || isREVMask(M, VT, 16) || isEXTMask(M, VT, DummyBool, DummyUnsigned) || // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM. isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) || isZIPMask(M, VT, DummyUnsigned) || isTRN_v_undef_Mask(M, VT, DummyUnsigned) || isUZP_v_undef_Mask(M, VT, DummyUnsigned) || isZIP_v_undef_Mask(M, VT, DummyUnsigned) || isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) || isConcatMask(M, VT, VT.getSizeInBits() == 128)); } /// getVShiftImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { // Ignore bit_converts. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ElementBits) || SplatBitSize > ElementBits) return false; Cnt = SplatBits.getSExtValue(); return true; } /// isVShiftLImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift left operation. That value must be in the range: /// 0 <= Value < ElementBits for a left shift; or /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (!getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); } /// isVShiftRImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift right operation. The value must be in the range: /// 1 <= Value <= ElementBits for a right shift; or static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (!getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); } SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); int64_t Cnt; if (!Op.getOperand(1).getValueType().isVector()) return Op; unsigned EltSize = VT.getScalarSizeInBits(); switch (Op.getOpcode()) { default: llvm_unreachable("unexpected shift opcode"); case ISD::SHL: if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), DAG.getConstant(Cnt, DL, MVT::i32)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32), Op.getOperand(0), Op.getOperand(1)); case ISD::SRA: case ISD::SRL: // Right shift immediate if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) { unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), DAG.getConstant(Cnt, DL, MVT::i32)); } // Right shift register. Note, there is not a shift right register // instruction, but the shift left register instruction takes a signed // value, where negative numbers specify a right shift. unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl : Intrinsic::aarch64_neon_ushl; // negate the shift amount SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1)); SDValue NegShiftLeft = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0), NegShift); return NegShiftLeft; } return SDValue(); } static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG) { EVT SrcVT = LHS.getValueType(); assert(VT.getSizeInBits() == SrcVT.getSizeInBits() && "function only supposed to emit natural comparisons"); BuildVectorSDNode *BVN = dyn_cast(RHS.getNode()); APInt CnstBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits); bool IsZero = IsCnst && (CnstBits == 0); if (SrcVT.getVectorElementType().isFloatingPoint()) { switch (CC) { default: return SDValue(); case AArch64CC::NE: { SDValue Fcmeq; if (IsZero) Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); else Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq); } case AArch64CC::EQ: if (IsZero) return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); case AArch64CC::GE: if (IsZero) return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS); case AArch64CC::GT: if (IsZero) return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); case AArch64CC::LS: if (IsZero) return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS); case AArch64CC::LT: if (!NoNans) return SDValue(); // If we ignore NaNs then we can use to the MI implementation. LLVM_FALLTHROUGH; case AArch64CC::MI: if (IsZero) return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS); } } switch (CC) { default: return SDValue(); case AArch64CC::NE: { SDValue Cmeq; if (IsZero) Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); else Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq); } case AArch64CC::EQ: if (IsZero) return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); case AArch64CC::GE: if (IsZero) return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS); return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS); case AArch64CC::GT: if (IsZero) return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS); case AArch64CC::LE: if (IsZero) return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS); return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS); case AArch64CC::LS: return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS); case AArch64CC::LO: return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS); case AArch64CC::LT: if (IsZero) return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS); case AArch64CC::HI: return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS); case AArch64CC::HS: return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS); } } SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger(); SDLoc dl(Op); if (LHS.getValueType().getVectorElementType().isInteger()) { assert(LHS.getValueType() == RHS.getValueType()); AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); SDValue Cmp = EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG); return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } if (LHS.getValueType().getVectorElementType() == MVT::f16) return SDValue(); assert(LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64); // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally // clean. Some of them require two branches to implement. AArch64CC::CondCode CC1, CC2; bool ShouldInvert; changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); if (!Cmp.getNode()) return SDValue(); if (CC2 != AArch64CC::AL) { SDValue Cmp2 = EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG); if (!Cmp2.getNode()) return SDValue(); Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2); } Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); if (ShouldInvert) return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType()); return Cmp; } /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { auto &DL = I.getModule()->getDataLayout(); switch (Intrinsic) { case Intrinsic::aarch64_neon_ld2: case Intrinsic::aarch64_neon_ld3: case Intrinsic::aarch64_neon_ld4: case Intrinsic::aarch64_neon_ld1x2: case Intrinsic::aarch64_neon_ld1x3: case Intrinsic::aarch64_neon_ld1x4: case Intrinsic::aarch64_neon_ld2lane: case Intrinsic::aarch64_neon_ld3lane: case Intrinsic::aarch64_neon_ld4lane: case Intrinsic::aarch64_neon_ld2r: case Intrinsic::aarch64_neon_ld3r: case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; Info.align = 0; Info.vol = false; // volatile loads with NEON intrinsics not supported Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::aarch64_neon_st2: case Intrinsic::aarch64_neon_st3: case Intrinsic::aarch64_neon_st4: case Intrinsic::aarch64_neon_st1x2: case Intrinsic::aarch64_neon_st1x3: case Intrinsic::aarch64_neon_st1x4: case Intrinsic::aarch64_neon_st2lane: case Intrinsic::aarch64_neon_st3lane: case Intrinsic::aarch64_neon_st4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; Info.align = 0; Info.vol = false; // volatile stores with NEON intrinsics not supported Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::aarch64_ldaxr: case Intrinsic::aarch64_ldxr: { PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::aarch64_ldaxp: case Intrinsic::aarch64_ldxp: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 16; Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; case Intrinsic::aarch64_stlxp: case Intrinsic::aarch64_stxp: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 16; Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; default: break; } return false; } // Truncations from 64-bit GPR to 32-bit GPR is free. bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 > NumBits2; } bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 > NumBits2; } /// Check if it is profitable to hoist instruction in then/else to if. /// Not profitable if I and it's user can form a FMA instruction /// because we prefer FMSUB/FMADD. bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { if (I->getOpcode() != Instruction::FMul) return true; if (I->getNumUses() != 1) return true; Instruction *User = I->user_back(); if (User && !(User->getOpcode() == Instruction::FSub || User->getOpcode() == Instruction::FAdd)) return true; const TargetOptions &Options = getTargetMachine().Options; const DataLayout &DL = I->getModule()->getDataLayout(); EVT VT = getValueType(DL, User->getOperand(0)->getType()); return !(isFMAFasterThanFMulAndFAdd(VT) && isOperationLegalOrCustom(ISD::FMA, VT) && (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); } // All 32-bit GPR operations implicitly zero the high-half of the corresponding // 64-bit GPR. bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { EVT VT1 = Val.getValueType(); if (isZExtFree(VT1, VT2)) { return true; } if (Val.getOpcode() != ISD::LOAD) return false; // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() && VT2.isSimple() && !VT2.isVector() && VT2.isInteger() && VT1.getSizeInBits() <= 32); } bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { if (isa(Ext)) return false; // Vector types are next free. if (Ext->getType()->isVectorTy()) return false; for (const Use &U : Ext->uses()) { // The extension is free if we can fold it with a left shift in an // addressing mode or an arithmetic operation: add, sub, and cmp. // Is there a shift? const Instruction *Instr = cast(U.getUser()); // Is this a constant shift? switch (Instr->getOpcode()) { case Instruction::Shl: if (!isa(Instr->getOperand(1))) return false; break; case Instruction::GetElementPtr: { gep_type_iterator GTI = gep_type_begin(Instr); auto &DL = Ext->getModule()->getDataLayout(); std::advance(GTI, U.getOperandNo()-1); Type *IdxTy = GTI.getIndexedType(); // This extension will end up with a shift because of the scaling factor. // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. // Get the shift amount based on the scaling factor: // log2(sizeof(IdxTy)) - log2(8). uint64_t ShiftAmt = countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3; // Is the constant foldable in the shift of the addressing mode? // I.e., shift amount is between 1 and 4 inclusive. if (ShiftAmt == 0 || ShiftAmt > 4) return false; break; } case Instruction::Trunc: // Check if this is a noop. // trunc(sext ty1 to ty2) to ty1. if (Instr->getType() == Ext->getOperand(0)->getType()) continue; LLVM_FALLTHROUGH; default: return false; } // At this point we can use the bfm family, so this extension is free // for that use. } return true; } bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const { if (!LoadedType.isSimple() || (!LoadedType.isInteger() && !LoadedType.isFloatingPoint())) return false; // Cyclone supports unaligned accesses. RequiredAligment = 0; unsigned NumBits = LoadedType.getSizeInBits(); return NumBits == 32 || NumBits == 64; } /// \brief Lower an interleaved load into a ldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements /// /// Into: /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr) /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 bool AArch64TargetLowering::lowerInterleavedLoad( LoadInst *LI, ArrayRef Shuffles, ArrayRef Indices, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); assert(!Shuffles.empty() && "Empty shufflevector input"); assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); unsigned VecSize = DL.getTypeSizeInBits(VecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. Type *EltTy = VecTy->getVectorElementType(); if (EltTy->isPointerTy()) VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace()); Type *Tys[2] = {VecTy, PtrTy}; static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2, Intrinsic::aarch64_neon_ld3, Intrinsic::aarch64_neon_ld4}; Function *LdNFunc = Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); IRBuilder<> Builder(LI); Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy); CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN"); // Replace uses of each shufflevector with the corresponding vector loaded // by ldN. for (unsigned i = 0; i < Shuffles.size(); i++) { ShuffleVectorInst *SVI = Shuffles[i]; unsigned Index = Indices[i]; Value *SubVec = Builder.CreateExtractValue(LdN, Index); // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); SVI->replaceAllUsesWith(SubVec); } return true; } /// \brief Get a mask consisting of sequential integers starting from \p Start. /// /// I.e. static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts) { SmallVector Mask; for (unsigned i = 0; i < NumElts; i++) Mask.push_back(Builder.getInt32(Start + i)); return ConstantVector::get(Mask); } /// \brief Lower an interleaved store into a stN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> /// store <12 x i32> %i.vec, <12 x i32>* %ptr /// /// Into: /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr) /// /// Note that the new shufflevectors will be removed and we'll only generate one /// st3 instruction in CodeGen. /// /// Example for a more general valid mask (Factor 3). Lower: /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1, /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> /// store <12 x i32> %i.vec, <12 x i32>* %ptr /// /// Into: /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7> /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr) bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); VectorType *VecTy = SVI->getType(); assert(VecTy->getVectorNumElements() % Factor == 0 && "Invalid interleaved store"); unsigned LaneLen = VecTy->getVectorNumElements() / Factor; Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) return false; Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { Type *IntTy = DL.getIntPtrType(EltTy); unsigned NumOpElts = dyn_cast(Op0->getType())->getVectorNumElements(); // Convert to the corresponding integer vector. Type *IntVecTy = VectorType::get(IntTy, NumOpElts); Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); SubVecTy = VectorType::get(IntTy, LaneLen); } Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace()); Type *Tys[2] = {SubVecTy, PtrTy}; static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2, Intrinsic::aarch64_neon_st3, Intrinsic::aarch64_neon_st4}; Function *StNFunc = Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); SmallVector Ops; // Split the shufflevector operands into sub vectors for the new stN call. auto Mask = SVI->getShuffleMask(); for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { if (Mask[j*Factor + i] >= 0) { StartMask = Mask[j*Factor + i] - j; break; } } // Note: If all elements in a chunk are undefs, StartMask=0! // Note: Filling undef gaps with random elements is ok, since // those elements were being written anyway (with undefs). // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); } } Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy)); Builder.CreateCall(StNFunc, Ops); return true; } static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck) { return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && (DstAlign == 0 || DstAlign % AlignCheck == 0)); } EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { // Don't use AdvSIMD to implement 16-byte memset. It would have taken one // instruction to materialize the v2i64 zero and one store (with restrictive // addressing mode). Just do two i64 store of zero-registers. bool Fast; const Function *F = MF.getFunction(); if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 && !F->hasFnAttribute(Attribute::NoImplicitFloat) && (memOpAlign(SrcAlign, DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast))) return MVT::i64; if (Size >= 4 && (memOpAlign(SrcAlign, DstAlign, 4) || (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast))) return MVT::i32; return MVT::Other; } // 12-bit optionally shifted immediates are legal for adds. bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { // Avoid UB for INT64_MIN. if (Immed == std::numeric_limits::min()) return false; // Same encoding for add/sub, just flip the sign. Immed = std::abs(Immed); return ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)); } // Integer comparisons are implemented with ADDS/SUBS, so the range of valid // immediates is the same as for an add or a sub. bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { return isLegalAddImmediate(Immed); } /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset // reg + SIZE_IN_BYTES * 12-bit unsigned offset // reg1 + reg2 // reg + SIZE_IN_BYTES * reg // No global is ever allowed as a base. if (AM.BaseGV) return false; // No reg+reg+imm addressing. if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) return false; // check reg + imm case: // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 uint64_t NumBytes = 0; if (Ty->isSized()) { uint64_t NumBits = DL.getTypeSizeInBits(Ty); NumBytes = NumBits / 8; if (!isPowerOf2_64(NumBits)) NumBytes = 0; } if (!AM.Scale) { int64_t Offset = AM.BaseOffs; // 9-bit signed offset if (isInt<9>(Offset)) return true; // 12-bit unsigned offset unsigned shift = Log2_64(NumBytes); if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && // Must be a multiple of NumBytes (NumBytes is a power of 2) (Offset >> shift) << shift == Offset) return true; return false; } // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes); } int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // Scaling factors are not free at all. // Operands | Rt Latency // ------------------------------------------- // Rt, [Xn, Xm] | 4 // ------------------------------------------- // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 // Rt, [Xn, Wm, #imm] | if (isLegalAddressingMode(DL, AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 if // it is not equal to 0 or 1. return AM.Scale != 0 && AM.Scale != 1; return -1; } bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; default: break; } return false; } const MCPhysReg * AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. static const MCPhysReg ScratchRegs[] = { AArch64::X16, AArch64::X17, AArch64::LR, 0 }; return ScratchRegs; } bool AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { EVT VT = N->getValueType(0); // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine // it with shift to let it be lowered to UBFX. if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && isa(N->getOperand(1))) { uint64_t TruncMask = N->getConstantOperandVal(1); if (isMask_64(TruncMask) && N->getOperand(0).getOpcode() == ISD::SRL && isa(N->getOperand(0)->getOperand(1))) return false; } return true; } bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return false; int64_t Val = Imm.getSExtValue(); if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize)) return true; if ((int64_t)Val < 0) Val = ~Val; if (BitSize == 32) Val &= (1LL << 32) - 1; unsigned LZ = countLeadingZeros((uint64_t)Val); unsigned Shift = (63 - LZ) / 16; // MOVZ is free so return true for one or fewer MOVK. return Shift < 3; } /// Turn vector tests of the signbit in the form of: /// xor (sra X, elt_size(X)-1), -1 /// into: /// cmge X, X, #0 static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (!Subtarget->hasNEON() || !VT.isVector()) return SDValue(); // There must be a shift right algebraic before the xor, and the xor must be a // 'not' operation. SDValue Shift = N->getOperand(0); SDValue Ones = N->getOperand(1); if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() || !ISD::isBuildVectorAllOnes(Ones.getNode())) return SDValue(); // The shift should be smearing the sign bit across each vector element. auto *ShiftAmt = dyn_cast(Shift.getOperand(1)); EVT ShiftEltTy = Shift.getValueType().getVectorElementType(); if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1) return SDValue(); return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0)); } // Generate SUBS and CSEL for integer abs. static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) // and change it to SUB and CSEL. if (VT.isInteger() && N->getOpcode() == ISD::XOR && N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) if (ConstantSDNode *Y1C = dyn_cast(N1.getOperand(1))) if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0.getOperand(0)); // Generate SUBS & CSEL. SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), N0.getOperand(0), DAG.getConstant(0, DL, VT)); return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, DAG.getConstant(AArch64CC::PL, DL, MVT::i32), SDValue(Cmp.getNode(), 1)); } return SDValue(); } static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget)) return Cmp; return performIntegerAbsCombine(N, DAG); } SDValue AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const { AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); if (isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV // fold (sdiv X, pow2) EVT VT = N->getValueType(0); if ((VT != MVT::i32 && VT != MVT::i64) || !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) return SDValue(); SDLoc DL(N); SDValue N0 = N->getOperand(0); unsigned Lg2 = Divisor.countTrailingZeros(); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); // Add (N0 < 0) ? Pow2 - 1 : 0; SDValue CCVal; SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); if (Created) { Created->push_back(Cmp.getNode()); Created->push_back(Add.getNode()); Created->push_back(CSel.getNode()); } // Divide by pow2. SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. if (Divisor.isNonNegative()) return SRA; if (Created) Created->push_back(SRA.getNode()); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); // The below optimizations require a constant RHS. if (!isa(N->getOperand(1))) return SDValue(); ConstantSDNode *C = cast(N->getOperand(1)); const APInt &ConstValue = C->getAPIntValue(); // Multiplication of a power of two plus/minus one can be done more // cheaply as as shift+add/sub. For now, this is true unilaterally. If // future CPUs have a cheaper MADD instruction, this may need to be // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and // 64-bit is 5 cycles, so this is always a win. // More aggressively, some multiplications N0 * C can be lowered to // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M, // e.g. 6=3*2=(2+1)*2. // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45 // which equals to (1+2)*16-(1+2). SDValue N0 = N->getOperand(0); // TrailingZeroes is used to test if the mul can be lowered to // shift+add+shift. unsigned TrailingZeroes = ConstValue.countTrailingZeros(); if (TrailingZeroes) { // Conservatively do not lower to shift+add+shift if the mul might be // folded into smul or umul. if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) || isZeroExtended(N0.getNode(), DAG))) return SDValue(); // Conservatively do not lower to shift+add+shift if the mul might be // folded into madd or msub. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD || N->use_begin()->getOpcode() == ISD::SUB)) return SDValue(); } // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub // and shift+add+shift. APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); unsigned ShiftAmt, AddSubOpc; // Is the shifted value the LHS operand of the add/sub? bool ShiftValUseIsN0 = true; // Do we need to negate the result? bool NegateResult = false; if (ConstValue.isNonNegative()) { // (mul x, 2^N + 1) => (add (shl x, N), x) // (mul x, 2^N - 1) => (sub (shl x, N), x) // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M) APInt SCVMinus1 = ShiftedConstValue - 1; APInt CVPlus1 = ConstValue + 1; if (SCVMinus1.isPowerOf2()) { ShiftAmt = SCVMinus1.logBase2(); AddSubOpc = ISD::ADD; } else if (CVPlus1.isPowerOf2()) { ShiftAmt = CVPlus1.logBase2(); AddSubOpc = ISD::SUB; } else return SDValue(); } else { // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) // (mul x, -(2^N + 1)) => - (add (shl x, N), x) APInt CVNegPlus1 = -ConstValue + 1; APInt CVNegMinus1 = -ConstValue - 1; if (CVNegPlus1.isPowerOf2()) { ShiftAmt = CVNegPlus1.logBase2(); AddSubOpc = ISD::SUB; ShiftValUseIsN0 = false; } else if (CVNegMinus1.isPowerOf2()) { ShiftAmt = CVNegMinus1.logBase2(); AddSubOpc = ISD::ADD; NegateResult = true; } else return SDValue(); } SDLoc DL(N); EVT VT = N->getValueType(0); SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShiftAmt, DL, MVT::i64)); SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0; SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal; SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1); assert(!(NegateResult && TrailingZeroes) && "NegateResult and TrailingZeroes cannot both be true for now."); // Negate the result. if (NegateResult) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); // Shift the result. if (TrailingZeroes) return DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getConstant(TrailingZeroes, DL, MVT::i64)); return Res; } static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG) { // Take advantage of vector comparisons producing 0 or -1 in each lane to // optimize away operation when it's from a constant. // // The general transformation is: // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> // AND(VECTOR_CMP(x,y), constant2) // constant2 = UNARYOP(constant) // Early exit if this isn't a vector operation, the operand of the // unary operation isn't a bitwise AND, or if the sizes of the operations // aren't the same. EVT VT = N->getValueType(0); if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits()) return SDValue(); // Now check that the other operand of the AND is a constant. We could // make the transformation for non-constant splats as well, but it's unclear // that would be a benefit as it would not eliminate any operations, just // perform one more step in scalar code before moving to the vector unit. if (BuildVectorSDNode *BV = dyn_cast(N->getOperand(0)->getOperand(1))) { // Bail out if the vector isn't a constant. if (!BV->isConstant()) return SDValue(); // Everything checks out. Build up the new and improved node. SDLoc DL(N); EVT IntVT = BV->getValueType(0); // Create a new constant of the appropriate type for the transformed // DAG. SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); // The AND node needs bitcasts to/from an integer vector type around it. SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst); SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, N->getOperand(0)->getOperand(0), MaskConst); SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd); return Res; } return SDValue(); } static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { // First try to optimize away the conversion when it's conditionally from // a constant. Vectors only. if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG)) return Res; EVT VT = N->getValueType(0); if (VT != MVT::f32 && VT != MVT::f64) return SDValue(); // Only optimize when the source and destination types have the same width. if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) return SDValue(); // If the result of an integer load is only used by an integer-to-float // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. // This eliminates an "integer-to-vector-move" UOP and improves throughput. SDValue N0 = N->getOperand(0); if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->getAlignment(), LN0->getMemOperand()->getFlags()); // Make sure successors of the original load stay after it by updating them // to use the new Chain. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); unsigned Opcode = (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF; return DAG.getNode(Opcode, SDLoc(N), VT, Load); } return SDValue(); } /// Fold a floating-point multiply by power of two into floating-point to /// fixed-point conversion. static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL) return SDValue(); SDValue ConstVec = Op->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); if (FloatBits != 32 && FloatBits != 64) return SDValue(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); if (IntBits != 16 && IntBits != 32 && IntBits != 64) return SDValue(); // Avoid conversions where iN is larger than the float (e.g., float -> i64). if (IntBits > FloatBits) return SDValue(); BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t Bits = IntBits == 64 ? 64 : 32; int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1); if (C == -1 || C == 0 || C > Bits) return SDValue(); MVT ResTy; unsigned NumLanes = Op.getValueType().getVectorNumElements(); switch (NumLanes) { default: return SDValue(); case 2: ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; break; case 4: ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64; break; } if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps()) return SDValue(); assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && "Illegal vector type after legalization"); SDLoc DL(N); bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs : Intrinsic::aarch64_neon_vcvtfp2fxu; SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy, DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); // We can handle smaller integers by generating an extra trunc. if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); return FixConv; } /// Fold a floating-point divide by power of two into fixed-point to /// floating-point conversion. static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); unsigned Opc = Op->getOpcode(); if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || !Op.getOperand(0).getValueType().isSimple() || (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP)) return SDValue(); SDValue ConstVec = N->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); int32_t IntBits = IntTy.getSizeInBits(); if (IntBits != 16 && IntBits != 32 && IntBits != 64) return SDValue(); MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); int32_t FloatBits = FloatTy.getSizeInBits(); if (FloatBits != 32 && FloatBits != 64) return SDValue(); // Avoid conversions where iN is larger than the float (e.g., i64 -> float). if (IntBits > FloatBits) return SDValue(); BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1); if (C == -1 || C == 0 || C > FloatBits) return SDValue(); MVT ResTy; unsigned NumLanes = Op.getValueType().getVectorNumElements(); switch (NumLanes) { default: return SDValue(); case 2: ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; break; case 4: ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64; break; } if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps()) return SDValue(); SDLoc DL(N); SDValue ConvInput = Op.getOperand(0); bool IsSigned = Opc == ISD::SINT_TO_FP; if (IntBits < FloatBits) ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, ResTy, ConvInput); unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp : Intrinsic::aarch64_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput, DAG.getConstant(C, DL, MVT::i32)); } /// An EXTR instruction is made up of two shifts, ORed together. This helper /// searches for and classifies those shifts. static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi) { if (N.getOpcode() == ISD::SHL) FromHi = false; else if (N.getOpcode() == ISD::SRL) FromHi = true; else return false; if (!isa(N.getOperand(1))) return false; ShiftAmount = N->getConstantOperandVal(1); Src = N->getOperand(0); return true; } /// EXTR instruction extracts a contiguous chunk of bits from two existing /// registers viewed as a high/low pair. This function looks for the pattern: /// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an /// EXTR. Can't quite be done in TableGen because the two immediates aren't /// independent. static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); EVT VT = N->getValueType(0); assert(N->getOpcode() == ISD::OR && "Unexpected root"); if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); SDValue LHS; uint32_t ShiftLHS = 0; bool LHSFromHi = false; if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) return SDValue(); SDValue RHS; uint32_t ShiftRHS = 0; bool RHSFromHi = false; if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) return SDValue(); // If they're both trying to come from the high part of the register, they're // not really an EXTR. if (LHSFromHi == RHSFromHi) return SDValue(); if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) return SDValue(); if (LHSFromHi) { std::swap(LHS, RHS); std::swap(ShiftLHS, ShiftRHS); } return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS, DAG.getConstant(ShiftRHS, DL, MVT::i64)); } static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); if (!VT.isVector()) return SDValue(); SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND) return SDValue(); SDValue N1 = N->getOperand(1); if (N1.getOpcode() != ISD::AND) return SDValue(); // We only have to look for constant vectors here since the general, variable // case can be handled in TableGen. unsigned Bits = VT.getScalarSizeInBits(); uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1); for (int i = 1; i >= 0; --i) for (int j = 1; j >= 0; --j) { BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(i)); BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(j)); if (!BVN0 || !BVN1) continue; bool FoundMatch = true; for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) { ConstantSDNode *CN0 = dyn_cast(BVN0->getOperand(k)); ConstantSDNode *CN1 = dyn_cast(BVN1->getOperand(k)); if (!CN0 || !CN1 || CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) { FoundMatch = false; break; } } if (FoundMatch) return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0), N0->getOperand(1 - i), N1->getOperand(1 - j)); } return SDValue(); } static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); if (SDValue Res = tryCombineToEXTR(N, DCI)) return Res; if (SDValue Res = tryCombineToBSL(N, DCI)) return Res; return SDValue(); } static SDValue performSRLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32) // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero. SDValue N0 = N->getOperand(0); if (N0.getOpcode() == ISD::BSWAP) { SDLoc DL(N); SDValue N1 = N->getOperand(1); SDValue N00 = N0.getOperand(0); if (ConstantSDNode *C = dyn_cast(N1)) { uint64_t ShiftAmt = C->getZExtValue(); if (VT == MVT::i32 && ShiftAmt == 16 && DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16))) return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); if (VT == MVT::i64 && ShiftAmt == 32 && DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32))) return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); } } return SDValue(); } static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); // Remove extraneous bitcasts around an extract_subvector. // For example, // (v4i16 (bitconvert // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1))))) // becomes // (extract_subvector ((v8i16 ...), (i64 4))) // Only interested in 64-bit vectors as the ultimate result. EVT VT = N->getValueType(0); if (!VT.isVector()) return SDValue(); if (VT.getSimpleVT().getSizeInBits() != 64) return SDValue(); // Is the operand an extract_subvector starting at the beginning or halfway // point of the vector? A low half may also come through as an // EXTRACT_SUBREG, so look for that, too. SDValue Op0 = N->getOperand(0); if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR && !(Op0->isMachineOpcode() && Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG)) return SDValue(); uint64_t idx = cast(Op0->getOperand(1))->getZExtValue(); if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) { if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0) return SDValue(); } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) { if (idx != AArch64::dsub) return SDValue(); // The dsub reference is equivalent to a lane zero subvector reference. idx = 0; } // Look through the bitcast of the input to the extract. if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST) return SDValue(); SDValue Source = Op0->getOperand(0)->getOperand(0); // If the source type has twice the number of elements as our destination // type, we know this is an extract of the high or low half of the vector. EVT SVT = Source->getValueType(0); if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2) return SDValue(); DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n"); // Create the simplified form to just extract the low or high half of the // vector directly rather than bothering with the bitcasts. SDLoc dl(N); unsigned NumElements = VT.getVectorNumElements(); if (idx) { SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx); } else { SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32); return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, Source, SubReg), 0); } } static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { SDLoc dl(N); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); // Optimize concat_vectors of truncated vectors, where the intermediate // type is illegal, to avoid said illegality, e.g., // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))), // (v2i16 (truncate (v2i64))))) // -> // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))), // (v4i32 (bitcast (v2i64))), // <0, 2, 4, 6>))) // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed // on both input and result type, so we might generate worse code. // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8. if (N->getNumOperands() == 2 && N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE) { SDValue N00 = N0->getOperand(0); SDValue N10 = N1->getOperand(0); EVT N00VT = N00.getValueType(); if (N00VT == N10.getValueType() && (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) && N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) { MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16); SmallVector Mask(MidVT.getVectorNumElements()); for (size_t i = 0; i < Mask.size(); ++i) Mask[i] = i * 2; return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getVectorShuffle( MidVT, dl, DAG.getNode(ISD::BITCAST, dl, MidVT, N00), DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask)); } } // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector // splat. The indexed instructions are going to be expecting a DUPLANE64, so // canonicalise to that. if (N0 == N1 && VT.getVectorNumElements() == 2) { assert(VT.getScalarSizeInBits() == 64); return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG), DAG.getConstant(0, dl, MVT::i64)); } // Canonicalise concat_vectors so that the right-hand vector has as few // bit-casts as possible before its real operation. The primary matching // destination for these operations will be the narrowing "2" instructions, // which depend on the operation being performed on this right-hand vector. // For example, // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS)))) // becomes // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) if (N1->getOpcode() != ISD::BITCAST) return SDValue(); SDValue RHS = N1->getOperand(0); MVT RHSTy = RHS.getValueType().getSimpleVT(); // If the RHS is not a vector, this is not the pattern we're looking for. if (!RHSTy.isVector()) return SDValue(); DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"); MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), RHSTy.getVectorNumElements() * 2); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, DAG.getNode(ISD::BITCAST, dl, RHSTy, N0), RHS)); } static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { // Wait 'til after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); // Transform a scalar conversion of a value from a lane extract into a // lane extract of a vector conversion. E.g., from foo1 to foo2: // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); } // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; } // // The second form interacts better with instruction selection and the // register allocator to avoid cross-class register copies that aren't // coalescable due to a lane reference. // Check the operand and see if it originates from a lane extract. SDValue Op1 = N->getOperand(1); if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { // Yep, no additional predication needed. Perform the transform. SDValue IID = N->getOperand(0); SDValue Shift = N->getOperand(2); SDValue Vec = Op1.getOperand(0); SDValue Lane = Op1.getOperand(1); EVT ResTy = N->getValueType(0); EVT VecResTy; SDLoc DL(N); // The vector width should be 128 bits by the time we get here, even // if it started as 64 bits (the extract_vector handling will have // done so). assert(Vec.getValueSizeInBits() == 128 && "unexpected vector size on extract_vector_elt!"); if (Vec.getValueType() == MVT::v4i32) VecResTy = MVT::v4f32; else if (Vec.getValueType() == MVT::v2i64) VecResTy = MVT::v2f64; else llvm_unreachable("unexpected vector type!"); SDValue Convert = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); } return SDValue(); } // AArch64 high-vector "long" operations are formed by performing the non-high // version on an extract_subvector of each operand which gets the high half: // // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS)) // // However, there are cases which don't have an extract_high explicitly, but // have another operation that can be made compatible with one for free. For // example: // // (dupv64 scalar) --> (extract_high (dup128 scalar)) // // This routine does the actual conversion of such DUPs, once outer routines // have determined that everything else is in order. // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold // similarly here. static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { switch (N.getOpcode()) { case AArch64ISD::DUP: case AArch64ISD::DUPLANE8: case AArch64ISD::DUPLANE16: case AArch64ISD::DUPLANE32: case AArch64ISD::DUPLANE64: case AArch64ISD::MOVI: case AArch64ISD::MOVIshift: case AArch64ISD::MOVIedit: case AArch64ISD::MOVImsl: case AArch64ISD::MVNIshift: case AArch64ISD::MVNImsl: break; default: // FMOV could be supported, but isn't very useful, as it would only occur // if you passed a bitcast' floating point immediate to an eligible long // integer op (addl, smull, ...). return SDValue(); } MVT NarrowTy = N.getSimpleValueType(); if (!NarrowTy.is64BitVector()) return SDValue(); MVT ElementTy = NarrowTy.getVectorElementType(); unsigned NumElems = NarrowTy.getVectorNumElements(); MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2); SDLoc dl(N); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()), DAG.getConstant(NumElems, dl, MVT::i64)); } static bool isEssentiallyExtractSubvector(SDValue N) { if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) return true; return N.getOpcode() == ISD::BITCAST && N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR; } /// \brief Helper structure to keep track of ISD::SET_CC operands. struct GenericSetCCInfo { const SDValue *Opnd0; const SDValue *Opnd1; ISD::CondCode CC; }; /// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code. struct AArch64SetCCInfo { const SDValue *Cmp; AArch64CC::CondCode CC; }; /// \brief Helper structure to keep track of SetCC information. union SetCCInfo { GenericSetCCInfo Generic; AArch64SetCCInfo AArch64; }; /// \brief Helper structure to be able to read SetCC information. If set to /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a /// GenericSetCCInfo. struct SetCCInfoAndKind { SetCCInfo Info; bool IsAArch64; }; /// \brief Check whether or not \p Op is a SET_CC operation, either a generic or /// an /// AArch64 lowered one. /// \p SetCCInfo is filled accordingly. /// \post SetCCInfo is meanginfull only when this function returns true. /// \return True when Op is a kind of SET_CC operation. static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { // If this is a setcc, this is straight forward. if (Op.getOpcode() == ISD::SETCC) { SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0); SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1); SetCCInfo.Info.Generic.CC = cast(Op.getOperand(2))->get(); SetCCInfo.IsAArch64 = false; return true; } // Otherwise, check if this is a matching csel instruction. // In other words: // - csel 1, 0, cc // - csel 0, 1, !cc if (Op.getOpcode() != AArch64ISD::CSEL) return false; // Set the information about the operands. // TODO: we want the operands of the Cmp not the csel SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3); SetCCInfo.IsAArch64 = true; SetCCInfo.Info.AArch64.CC = static_cast( cast(Op.getOperand(2))->getZExtValue()); // Check that the operands matches the constraints: // (1) Both operands must be constants. // (2) One must be 1 and the other must be 0. ConstantSDNode *TValue = dyn_cast(Op.getOperand(0)); ConstantSDNode *FValue = dyn_cast(Op.getOperand(1)); // Check (1). if (!TValue || !FValue) return false; // Check (2). if (!TValue->isOne()) { // Update the comparison when we are interested in !cc. std::swap(TValue, FValue); SetCCInfo.Info.AArch64.CC = AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC); } return TValue->isOne() && FValue->isNullValue(); } // Returns true if Op is setcc or zext of setcc. static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) { if (isSetCC(Op, Info)) return true; return ((Op.getOpcode() == ISD::ZERO_EXTEND) && isSetCC(Op->getOperand(0), Info)); } // The folding we want to perform is: // (add x, [zext] (setcc cc ...) ) // --> // (csel x, (add x, 1), !cc ...) // // The latter will get matched to a CSINC instruction. static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); SDValue LHS = Op->getOperand(0); SDValue RHS = Op->getOperand(1); SetCCInfoAndKind InfoAndKind; // If neither operand is a SET_CC, give up. if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) { std::swap(LHS, RHS); if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) return SDValue(); } // FIXME: This could be generatized to work for FP comparisons. EVT CmpVT = InfoAndKind.IsAArch64 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType() : InfoAndKind.Info.Generic.Opnd0->getValueType(); if (CmpVT != MVT::i32 && CmpVT != MVT::i64) return SDValue(); SDValue CCVal; SDValue Cmp; SDLoc dl(Op); if (InfoAndKind.IsAArch64) { CCVal = DAG.getConstant( AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl, MVT::i32); Cmp = *InfoAndKind.Info.AArch64.Cmp; } else Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1, ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true), CCVal, DAG, dl); EVT VT = Op->getValueType(0); LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT)); return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); } // The basic add/sub long vector instructions have variants with "2" on the end // which act on the high-half of their inputs. They are normally matched by // patterns like: // // (add (zeroext (extract_high LHS)), // (zeroext (extract_high RHS))) // -> uaddl2 vD, vN, vM // // However, if one of the extracts is something like a duplicate, this // instruction can still be used profitably. This function puts the DAG into a // more appropriate form for those patterns to trigger. static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { if (DCI.isBeforeLegalizeOps()) return SDValue(); MVT VT = N->getSimpleValueType(0); if (!VT.is128BitVector()) { if (N->getOpcode() == ISD::ADD) return performSetccAddFolding(N, DAG); return SDValue(); } // Make sure both branches are extended in the same way. SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if ((LHS.getOpcode() != ISD::ZERO_EXTEND && LHS.getOpcode() != ISD::SIGN_EXTEND) || LHS.getOpcode() != RHS.getOpcode()) return SDValue(); unsigned ExtType = LHS.getOpcode(); // It's not worth doing if at least one of the inputs isn't already an // extract, but we don't know which it'll be so we have to try both. if (isEssentiallyExtractSubvector(LHS.getOperand(0))) { RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG); if (!RHS.getNode()) return SDValue(); RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS); } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) { LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG); if (!LHS.getNode()) return SDValue(); LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS); } return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); } // Massage DAGs which we can use the high-half "long" operations on into // something isel will recognize better. E.g. // // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) --> // (aarch64_neon_umull (extract_high (v2i64 vec))) // (extract_high (v2i64 (dup128 scalar))))) // static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); assert(LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && "unexpected shape for long operation"); // Either node could be a DUP, but it's not worth doing both of them (you'd // just as well use the non-high version) so look for a corresponding extract // operation on the other "wing". if (isEssentiallyExtractSubvector(LHS)) { RHS = tryExtendDUPToExtractHigh(RHS, DAG); if (!RHS.getNode()) return SDValue(); } else if (isEssentiallyExtractSubvector(RHS)) { LHS = tryExtendDUPToExtractHigh(LHS, DAG); if (!LHS.getNode()) return SDValue(); } return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), N->getOperand(0), LHS, RHS); } static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { MVT ElemTy = N->getSimpleValueType(0).getScalarType(); unsigned ElemBits = ElemTy.getSizeInBits(); int64_t ShiftAmount; if (BuildVectorSDNode *BVN = dyn_cast(N->getOperand(2))) { APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ElemBits) || SplatBitSize != ElemBits) return SDValue(); ShiftAmount = SplatValue.getSExtValue(); } else if (ConstantSDNode *CVN = dyn_cast(N->getOperand(2))) { ShiftAmount = CVN->getSExtValue(); } else return SDValue(); unsigned Opcode; bool IsRightShift; switch (IID) { default: llvm_unreachable("Unknown shift intrinsic"); case Intrinsic::aarch64_neon_sqshl: Opcode = AArch64ISD::SQSHL_I; IsRightShift = false; break; case Intrinsic::aarch64_neon_uqshl: Opcode = AArch64ISD::UQSHL_I; IsRightShift = false; break; case Intrinsic::aarch64_neon_srshl: Opcode = AArch64ISD::SRSHR_I; IsRightShift = true; break; case Intrinsic::aarch64_neon_urshl: Opcode = AArch64ISD::URSHR_I; IsRightShift = true; break; case Intrinsic::aarch64_neon_sqshlu: Opcode = AArch64ISD::SQSHLU_I; IsRightShift = false; break; } if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { SDLoc dl(N); return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1), DAG.getConstant(-ShiftAmount, dl, MVT::i32)); } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) { SDLoc dl(N); return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1), DAG.getConstant(ShiftAmount, dl, MVT::i32)); } return SDValue(); } // The CRC32[BH] instructions ignore the high bits of their data operand. Since // the intrinsics must be legal and take an i32, this means there's almost // certainly going to be a zext in the DAG which we can eliminate. static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { SDValue AndN = N->getOperand(2); if (AndN.getOpcode() != ISD::AND) return SDValue(); ConstantSDNode *CMask = dyn_cast(AndN.getOperand(1)); if (!CMask || CMask->getZExtValue() != Mask) return SDValue(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32, N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); } static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG) { SDLoc dl(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), DAG.getNode(Opc, dl, N->getOperand(1).getSimpleValueType(), N->getOperand(1)), DAG.getConstant(0, dl, MVT::i64)); } static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; unsigned IID = getIntrinsicID(N); switch (IID) { default: break; case Intrinsic::aarch64_neon_vcvtfxs2fp: case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); case Intrinsic::aarch64_neon_saddv: return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG); case Intrinsic::aarch64_neon_uaddv: return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG); case Intrinsic::aarch64_neon_sminv: return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG); case Intrinsic::aarch64_neon_uminv: return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG); case Intrinsic::aarch64_neon_smaxv: return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG); case Intrinsic::aarch64_neon_umaxv: return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG); case Intrinsic::aarch64_neon_fmax: return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_fmin: return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_fmaxnm: return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_fminnm: return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_smull: case Intrinsic::aarch64_neon_umull: case Intrinsic::aarch64_neon_pmull: case Intrinsic::aarch64_neon_sqdmull: return tryCombineLongOpWithDup(IID, N, DCI, DAG); case Intrinsic::aarch64_neon_sqshl: case Intrinsic::aarch64_neon_uqshl: case Intrinsic::aarch64_neon_sqshlu: case Intrinsic::aarch64_neon_srshl: case Intrinsic::aarch64_neon_urshl: return tryCombineShiftImm(IID, N, DAG); case Intrinsic::aarch64_crc32b: case Intrinsic::aarch64_crc32cb: return tryCombineCRC32(0xff, N, DAG); case Intrinsic::aarch64_crc32h: case Intrinsic::aarch64_crc32ch: return tryCombineCRC32(0xffff, N, DAG); } return SDValue(); } static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then // we can convert that DUP into another extract_high (of a bigger DUP), which // helps the backend to decide that an sabdl2 would be useful, saving a real // extract_high operation. if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { SDNode *ABDNode = N->getOperand(0).getNode(); unsigned IID = getIntrinsicID(ABDNode); if (IID == Intrinsic::aarch64_neon_sabd || IID == Intrinsic::aarch64_neon_uabd) { SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG); if (!NewABD.getNode()) return SDValue(); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); } } // This is effectively a custom type legalization for AArch64. // // Type legalization will split an extend of a small, legal, type to a larger // illegal type by first splitting the destination type, often creating // illegal source types, which then get legalized in isel-confusing ways, // leading to really terrible codegen. E.g., // %result = v8i32 sext v8i8 %value // becomes // %losrc = extract_subreg %value, ... // %hisrc = extract_subreg %value, ... // %lo = v4i32 sext v4i8 %losrc // %hi = v4i32 sext v4i8 %hisrc // Things go rapidly downhill from there. // // For AArch64, the [sz]ext vector instructions can only go up one element // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 // take two instructions. // // This implies that the most efficient way to do the extend from v8i8 // to two v4i32 values is to first extend the v8i8 to v8i16, then do // the normal splitting to happen for the v8i16->v8i32. // This is pre-legalization to catch some cases where the default // type legalization will create ill-tempered code. if (!DCI.isBeforeLegalizeOps()) return SDValue(); // We're only interested in cleaning things up for non-legal vector types // here. If both the source and destination are legal, things will just // work naturally without any fiddling. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT ResVT = N->getValueType(0); if (!ResVT.isVector() || TLI.isTypeLegal(ResVT)) return SDValue(); // If the vector type isn't a simple VT, it's beyond the scope of what // we're worried about here. Let legalization do its thing and hope for // the best. SDValue Src = N->getOperand(0); EVT SrcVT = Src->getValueType(0); if (!ResVT.isSimple() || !SrcVT.isSimple()) return SDValue(); // If the source VT is a 64-bit vector, we can play games and get the // better results we want. if (SrcVT.getSizeInBits() != 64) return SDValue(); unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); unsigned ElementCount = SrcVT.getVectorNumElements(); SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount); SDLoc DL(N); Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); // Now split the rest of the operation into two halves, each with a 64 // bit source. EVT LoVT, HiVT; SDValue Lo, Hi; unsigned NumElements = ResVT.getVectorNumElements(); assert(!(NumElements & 1) && "Splitting vector, but not in half!"); LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), NumElements / 2); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, DAG.getConstant(0, DL, MVT::i64)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64)); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); // Now combine the parts back together so we still have a single result // like the combiner expects. return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts) { unsigned OrigAlignment = St.getAlignment(); unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8; // Create scalar stores. This is at least as good as the code sequence for a // split unaligned store which is a dup.s, ext.b, and two stores. // Most of the time the three stores should be replaced by store pair // instructions (stp). SDLoc DL(&St); SDValue BasePtr = St.getBasePtr(); const MachinePointerInfo &PtrInfo = St.getPointerInfo(); SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, OrigAlignment, St.getMemOperand()->getFlags()); unsigned Offset = EltOffset; while (--NumVecElts) { unsigned Alignment = MinAlign(OrigAlignment, Offset); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(Offset, DL, MVT::i64)); NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, PtrInfo.getWithOffset(Offset), Alignment, St.getMemOperand()->getFlags()); Offset += EltOffset; } return NewST1; } /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The /// load store optimizer pass will merge them to store pair stores. This should /// be better than a movi to create the vector zero followed by a vector store /// if the zero constant is not re-used, since one instructions and one register /// live range will be removed. /// /// For example, the final generated code should be: /// /// stp xzr, xzr, [x0] /// /// instead of: /// /// movi v0.2d, #0 /// str q0, [x0] /// static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) { SDValue StVal = St.getValue(); EVT VT = StVal.getValueType(); // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or // 2, 3 or 4 i32 elements. int NumVecElts = VT.getVectorNumElements(); if (!(((NumVecElts == 2 || NumVecElts == 3) && VT.getVectorElementType().getSizeInBits() == 64) || ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) && VT.getVectorElementType().getSizeInBits() == 32))) return SDValue(); if (StVal.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); // If the zero constant has more than one use then the vector store could be // better since the constant mov will be amortized and stp q instructions // should be able to be formed. if (!StVal.hasOneUse()) return SDValue(); // If the immediate offset of the address operand is too large for the stp // instruction, then bail out. if (DAG.isBaseWithConstantOffset(St.getBasePtr())) { int64_t Offset = St.getBasePtr()->getConstantOperandVal(1); if (Offset < -512 || Offset > 504) return SDValue(); } for (int I = 0; I < NumVecElts; ++I) { SDValue EltVal = StVal.getOperand(I); if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal)) return SDValue(); } // Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from // undoing this transformation. SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32 ? DAG.getRegister(AArch64::WZR, MVT::i32) : DAG.getRegister(AArch64::XZR, MVT::i64); return splitStoreSplat(DAG, St, SplatVal, NumVecElts); } /// Replace a splat of a scalar to a vector store by scalar stores of the scalar /// value. The load store optimizer pass will merge them to store pair stores. /// This has better performance than a splat of the scalar followed by a split /// vector store. Even if the stores are not merged it is four stores vs a dup, /// followed by an ext.b and two stores. static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) { SDValue StVal = St.getValue(); EVT VT = StVal.getValueType(); // Don't replace floating point stores, they possibly won't be transformed to // stp because of the store pair suppress pass. if (VT.isFloatingPoint()) return SDValue(); // We can express a splat as store pair(s) for 2 or 4 elements. unsigned NumVecElts = VT.getVectorNumElements(); if (NumVecElts != 4 && NumVecElts != 2) return SDValue(); // Check that this is a splat. // Make sure that each of the relevant vector element locations are inserted // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32. std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1); SDValue SplatVal; for (unsigned I = 0; I < NumVecElts; ++I) { // Check for insert vector elements. if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) return SDValue(); // Check that same value is inserted at each vector element. if (I == 0) SplatVal = StVal.getOperand(1); else if (StVal.getOperand(1) != SplatVal) return SDValue(); // Check insert element index. ConstantSDNode *CIndex = dyn_cast(StVal.getOperand(2)); if (!CIndex) return SDValue(); uint64_t IndexVal = CIndex->getZExtValue(); if (IndexVal >= NumVecElts) return SDValue(); IndexNotInserted.reset(IndexVal); StVal = StVal.getOperand(0); } // Check that all vector element locations were inserted to. if (IndexNotInserted.any()) return SDValue(); return splitStoreSplat(DAG, St, SplatVal, NumVecElts); } static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { if (!DCI.isBeforeLegalize()) return SDValue(); StoreSDNode *S = cast(N); if (S->isVolatile()) return SDValue(); SDValue StVal = S->getValue(); EVT VT = StVal.getValueType(); if (!VT.isVector()) return SDValue(); // If we get a splat of zeros, convert this vector store to a store of // scalars. They will be merged into store pairs of xzr thereby removing one // instruction and one register. if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S)) return ReplacedZeroSplat; // FIXME: The logic for deciding if an unaligned store should be split should // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be // a call to that function here. if (!Subtarget->isMisaligned128StoreSlow()) return SDValue(); // Don't split at -Oz. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); // Don't split v2i64 vectors. Memcpy lowering produces those and splitting // those up regresses performance on micro-benchmarks and olden/bh. if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64) return SDValue(); // Split unaligned 16B stores. They are terrible for performance. // Don't split stores with alignment of 1 or 2. Code that uses clang vector // extensions can use this to mark that it does not want splitting to happen // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of // eliminating alignment hazards is only 1 in 8 for alignment of 2. if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 || S->getAlignment() <= 2) return SDValue(); // If we get a splat of a scalar convert this vector store to a store of // scalars. They will be merged into store pairs thereby removing two // instructions. if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S)) return ReplacedSplat; SDLoc DL(S); unsigned NumElts = VT.getVectorNumElements() / 2; // Split VT into two. EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, DAG.getConstant(0, DL, MVT::i64)); SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, DAG.getConstant(NumElts, DL, MVT::i64)); SDValue BasePtr = S->getBasePtr(); SDValue NewST1 = DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), S->getAlignment(), S->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(8, DL, MVT::i64)); return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, S->getPointerInfo(), S->getAlignment(), S->getMemOperand()->getFlags()); } /// Target-specific DAG combine function for post-increment LD1 (lane) and /// post-increment LD1R. static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); unsigned LoadIdx = IsLaneOp ? 1 : 0; SDNode *LD = N->getOperand(LoadIdx).getNode(); // If it is not LOAD, can not do such combine. if (LD->getOpcode() != ISD::LOAD) return SDValue(); LoadSDNode *LoadSDN = cast(LD); EVT MemVT = LoadSDN->getMemoryVT(); // Check if memory operand is the same type as the vector element. if (MemVT != VT.getVectorElementType()) return SDValue(); // Check if there are other uses. If so, do not combine as it will introduce // an extra load. for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE; ++UI) { if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result. continue; if (*UI != N) return SDValue(); } SDValue Addr = LD->getOperand(1); SDValue Vector = N->getOperand(0); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = Addr.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User->getOpcode() != ISD::ADD || UI.getUse().getResNo() != Addr.getResNo()) continue; // Check that the add is independent of the load. Otherwise, folding it // would create a cycle. if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User)) continue; // Also check that add is not used in the vector operand. This would also // create a cycle. if (User->isPredecessorOf(Vector.getNode())) continue; // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { uint32_t IncVal = CInc->getZExtValue(); unsigned NumBytes = VT.getScalarSizeInBits() / 8; if (IncVal != NumBytes) continue; Inc = DAG.getRegister(AArch64::XZR, MVT::i64); } // Finally, check that the vector doesn't depend on the load. // Again, this would create a cycle. // The load depending on the vector is fine, as that's the case for the // LD1*post we'll eventually generate anyway. if (LoadSDN->isPredecessorOf(Vector.getNode())) continue; SmallVector Ops; Ops.push_back(LD->getOperand(0)); // Chain if (IsLaneOp) { Ops.push_back(Vector); // The vector to be inserted Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector } Ops.push_back(Addr); Ops.push_back(Inc); EVT Tys[3] = { VT, MVT::i64, MVT::Other }; SDVTList SDTys = DAG.getVTList(Tys); unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost; SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, MemVT, LoadSDN->getMemOperand()); // Update the uses. SDValue NewResults[] = { SDValue(LD, 0), // The result of load SDValue(UpdN.getNode(), 2) // Chain }; DCI.CombineTo(LD, NewResults); DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register break; } return SDValue(); } /// Simplify \Addr given that the top byte of it is ignored by HW during /// address translation. static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { APInt DemandedMask = APInt::getLowBitsSet(64, 56); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); return true; } return false; } static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { if (SDValue Split = splitStores(N, DCI, DAG, Subtarget)) return Split; if (Subtarget->supportsAddressTopByteIgnored() && performTBISimplification(N->getOperand(2), DCI, DAG)) return SDValue(N, 0); return SDValue(); } /// This function handles the log2-shuffle pattern produced by the /// LoopVectorizer for the across vector reduction. It consists of /// log2(NumVectorElements) steps and, in each step, 2^(s) elements /// are reduced, where s is an induction variable from 0 to /// log2(NumVectorElements). static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, unsigned Op, SelectionDAG &DAG) { EVT VTy = OpV->getOperand(0).getValueType(); if (!VTy.isVector()) return SDValue(); int NumVecElts = VTy.getVectorNumElements(); if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { if (NumVecElts != 4) return SDValue(); } else { if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) return SDValue(); } int NumExpectedSteps = APInt(8, NumVecElts).logBase2(); SDValue PreOp = OpV; // Iterate over each step of the across vector reduction. for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) { SDValue CurOp = PreOp.getOperand(0); SDValue Shuffle = PreOp.getOperand(1); if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) { // Try to swap the 1st and 2nd operand as add and min/max instructions // are commutative. CurOp = PreOp.getOperand(1); Shuffle = PreOp.getOperand(0); if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); } // Check if the input vector is fed by the operator we want to handle, // except the last step; the very first input vector is not necessarily // the same operator we are handling. if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1))) return SDValue(); // Check if it forms one step of the across vector reduction. // E.g., // %cur = add %1, %0 // %shuffle = vector_shuffle %cur, <2, 3, u, u> // %pre = add %cur, %shuffle if (Shuffle.getOperand(0) != CurOp) return SDValue(); int NumMaskElts = 1 << CurStep; ArrayRef Mask = cast(Shuffle)->getMask(); // Check mask values in each step. // We expect the shuffle mask in each step follows a specific pattern // denoted here by the form, where M is a sequence of integers // starting from NumMaskElts, increasing by 1, and the number integers // in M should be NumMaskElts. U is a sequence of UNDEFs and the number // of undef in U should be NumVecElts - NumMaskElts. // E.g., for <8 x i16>, mask values in each step should be : // step 0 : <1,u,u,u,u,u,u,u> // step 1 : <2,3,u,u,u,u,u,u> // step 2 : <4,5,6,7,u,u,u,u> for (int i = 0; i < NumVecElts; ++i) if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) || (i >= NumMaskElts && !(Mask[i] < 0))) return SDValue(); PreOp = CurOp; } unsigned Opcode; bool IsIntrinsic = false; switch (Op) { default: llvm_unreachable("Unexpected operator for across vector reduction"); case ISD::ADD: Opcode = AArch64ISD::UADDV; break; case ISD::SMAX: Opcode = AArch64ISD::SMAXV; break; case ISD::UMAX: Opcode = AArch64ISD::UMAXV; break; case ISD::SMIN: Opcode = AArch64ISD::SMINV; break; case ISD::UMIN: Opcode = AArch64ISD::UMINV; break; case ISD::FMAXNUM: Opcode = Intrinsic::aarch64_neon_fmaxnmv; IsIntrinsic = true; break; case ISD::FMINNUM: Opcode = Intrinsic::aarch64_neon_fminnmv; IsIntrinsic = true; break; } SDLoc DL(N); return IsIntrinsic ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0), DAG.getConstant(Opcode, DL, MVT::i32), PreOp) : DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp), DAG.getConstant(0, DL, MVT::i64)); } /// Target-specific DAG combine for the across vector min/max reductions. /// This function specifically handles the final clean-up step of the vector /// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle /// pattern, which narrows down and finds the final min/max value from all /// elements of the vector. /// For example, for a <16 x i8> vector : /// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> /// %smax0 = smax %arr, svn0 /// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u> /// %smax1 = smax %smax0, %svn1 /// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u> /// %smax2 = smax %smax1, svn2 /// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> /// %sc = setcc %smax2, %svn3, gt /// %n0 = extract_vector_elt %sc, #0 /// %n1 = extract_vector_elt %smax2, #0 /// %n2 = extract_vector_elt $smax2, #1 /// %result = select %n0, %n1, n2 /// becomes : /// %1 = smaxv %0 /// %result = extract_vector_elt %1, 0 static SDValue performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue N0 = N->getOperand(0); SDValue IfTrue = N->getOperand(1); SDValue IfFalse = N->getOperand(2); // Check if the SELECT merges up the final result of the min/max // from a vector. if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT || IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); // Expect N0 is fed by SETCC. SDValue SetCC = N0.getOperand(0); EVT SetCCVT = SetCC.getValueType(); if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() || SetCCVT.getVectorElementType() != MVT::i1) return SDValue(); SDValue VectorOp = SetCC.getOperand(0); unsigned Op = VectorOp->getOpcode(); // Check if the input vector is fed by the operator we want to handle. if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM) return SDValue(); EVT VTy = VectorOp.getValueType(); if (!VTy.isVector()) return SDValue(); if (VTy.getSizeInBits() < 64) return SDValue(); EVT EltTy = VTy.getVectorElementType(); if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { if (EltTy != MVT::f32) return SDValue(); } else { if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) return SDValue(); } // Check if extracting from the same vector. // For example, // %sc = setcc %vector, %svn1, gt // %n0 = extract_vector_elt %sc, #0 // %n1 = extract_vector_elt %vector, #0 // %n2 = extract_vector_elt $vector, #1 if (!(VectorOp == IfTrue->getOperand(0) && VectorOp == IfFalse->getOperand(0))) return SDValue(); // Check if the condition code is matched with the operator type. ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) || (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) || (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) || (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) || (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE && CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT && CC != ISD::SETGE) || (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE && CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT && CC != ISD::SETLE)) return SDValue(); // Expect to check only lane 0 from the vector SETCC. if (!isNullConstant(N0.getOperand(1))) return SDValue(); // Expect to extract the true value from lane 0. if (!isNullConstant(IfTrue.getOperand(1))) return SDValue(); // Expect to extract the false value from lane 1. if (!isOneConstant(IfFalse.getOperand(1))) return SDValue(); return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG); } /// Target-specific DAG combine for the across vector add reduction. /// This function specifically handles the final clean-up step of the vector /// add reduction produced by the LoopVectorizer. It is the log2-shuffle /// pattern, which adds all elements of a vector together. /// For example, for a <4 x i32> vector : /// %1 = vector_shuffle %0, <2,3,u,u> /// %2 = add %0, %1 /// %3 = vector_shuffle %2, <1,u,u,u> /// %4 = add %2, %3 /// %result = extract_vector_elt %4, 0 /// becomes : /// %0 = uaddv %0 /// %result = extract_vector_elt %0, 0 static SDValue performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Check if the input vector is fed by the ADD. if (N0->getOpcode() != ISD::ADD) return SDValue(); // The vector extract idx must constant zero because we only expect the final // result of the reduction is placed in lane 0. if (!isNullConstant(N1)) return SDValue(); EVT VTy = N0.getValueType(); if (!VTy.isVector()) return SDValue(); EVT EltTy = VTy.getVectorElementType(); if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) return SDValue(); if (VTy.getSizeInBits() < 64) return SDValue(); return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG); } /// Target-specific DAG combine function for NEON load/store intrinsics /// to merge base address updates. static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); unsigned AddrOpIdx = N->getNumOperands() - 1; SDValue Addr = N->getOperand(AddrOpIdx); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = Addr.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User->getOpcode() != ISD::ADD || UI.getUse().getResNo() != Addr.getResNo()) continue; // Check that the add is independent of the load/store. Otherwise, folding // it would create a cycle. if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) continue; // Find the new opcode for the updating load/store. bool IsStore = false; bool IsLaneOp = false; bool IsDupOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; NumVecs = 2; break; case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post; NumVecs = 3; break; case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post; NumVecs = 4; break; case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post; NumVecs = 2; IsStore = true; break; case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post; NumVecs = 3; IsStore = true; break; case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post; NumVecs = 4; IsStore = true; break; case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post; NumVecs = 2; break; case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post; NumVecs = 3; break; case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post; NumVecs = 4; break; case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post; NumVecs = 2; IsStore = true; break; case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post; NumVecs = 3; IsStore = true; break; case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post; NumVecs = 4; IsStore = true; break; case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost; NumVecs = 2; IsDupOp = true; break; case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost; NumVecs = 3; IsDupOp = true; break; case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost; NumVecs = 4; IsDupOp = true; break; case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost; NumVecs = 2; IsLaneOp = true; break; case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost; NumVecs = 3; IsLaneOp = true; break; case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost; NumVecs = 4; IsLaneOp = true; break; case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost; NumVecs = 2; IsStore = true; IsLaneOp = true; break; case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost; NumVecs = 3; IsStore = true; IsLaneOp = true; break; case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost; NumVecs = 4; IsStore = true; IsLaneOp = true; break; } EVT VecTy; if (IsStore) VecTy = N->getOperand(2).getValueType(); else VecTy = N->getValueType(0); // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { uint32_t IncVal = CInc->getZExtValue(); unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (IsLaneOp || IsDupOp) NumBytes /= VecTy.getVectorNumElements(); if (IncVal != NumBytes) continue; Inc = DAG.getRegister(AArch64::XZR, MVT::i64); } SmallVector Ops; Ops.push_back(N->getOperand(0)); // Incoming chain // Load lane and store have vector list as input. if (IsLaneOp || IsStore) for (unsigned i = 2; i < AddrOpIdx; ++i) Ops.push_back(N->getOperand(i)); Ops.push_back(Addr); // Base register Ops.push_back(Inc); // Return Types. EVT Tys[6]; unsigned NumResultVecs = (IsStore ? 0 : NumVecs); unsigned n; for (n = 0; n < NumResultVecs; ++n) Tys[n] = VecTy; Tys[n++] = MVT::i64; // Type of write back register Tys[n] = MVT::Other; // Type of the chain SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2)); MemIntrinsicSDNode *MemInt = cast(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. std::vector NewResults; for (unsigned i = 0; i < NumResultVecs; ++i) { NewResults.push_back(SDValue(UpdN.getNode(), i)); } NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; } return SDValue(); } // Checks to see if the value is the prescribed width and returns information // about its extension mode. static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) { ExtType = ISD::NON_EXTLOAD; switch(V.getNode()->getOpcode()) { default: return false; case ISD::LOAD: { LoadSDNode *LoadNode = cast(V.getNode()); if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) { ExtType = LoadNode->getExtensionType(); return true; } return false; } case ISD::AssertSext: { VTSDNode *TypeNode = cast(V.getNode()->getOperand(1)); if ((TypeNode->getVT() == MVT::i8 && width == 8) || (TypeNode->getVT() == MVT::i16 && width == 16)) { ExtType = ISD::SEXTLOAD; return true; } return false; } case ISD::AssertZext: { VTSDNode *TypeNode = cast(V.getNode()->getOperand(1)); if ((TypeNode->getVT() == MVT::i8 && width == 8) || (TypeNode->getVT() == MVT::i16 && width == 16)) { ExtType = ISD::ZEXTLOAD; return true; } return false; } case ISD::Constant: case ISD::TargetConstant: { return std::abs(cast(V.getNode())->getSExtValue()) < 1LL << (width - 1); } } return true; } // This function does a whole lot of voodoo to determine if the tests are // equivalent without and with a mask. Essentially what happens is that given a // DAG resembling: // // +-------------+ +-------------+ +-------------+ +-------------+ // | Input | | AddConstant | | CompConstant| | CC | // +-------------+ +-------------+ +-------------+ +-------------+ // | | | | // V V | +----------+ // +-------------+ +----+ | | // | ADD | |0xff| | | // +-------------+ +----+ | | // | | | | // V V | | // +-------------+ | | // | AND | | | // +-------------+ | | // | | | // +-----+ | | // | | | // V V V // +-------------+ // | CMP | // +-------------+ // // The AND node may be safely removed for some combinations of inputs. In // particular we need to take into account the extension type of the Input, // the exact values of AddConstant, CompConstant, and CC, along with the nominal // width of the input (this can work for any width inputs, the above graph is // specific to 8 bits. // // The specific equations were worked out by generating output tables for each // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The // problem was simplified by working with 4 bit inputs, which means we only // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8 // patterns present in both extensions (0,7). For every distinct set of // AddConstant and CompConstants bit patterns we can consider the masked and // unmasked versions to be equivalent if the result of this function is true for // all 16 distinct bit patterns of for the current extension type of Input (w0). // // sub w8, w0, w1 // and w10, w8, #0x0f // cmp w8, w2 // cset w9, AArch64CC // cmp w10, w2 // cset w11, AArch64CC // cmp w9, w11 // cset w0, eq // ret // // Since the above function shows when the outputs are equivalent it defines // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and // would be expensive to run during compiles. The equations below were written // in a test harness that confirmed they gave equivalent outputs to the above // for all inputs function, so they can be used determine if the removal is // legal instead. // // isEquivalentMaskless() is the code for testing if the AND can be removed // factored out of the DAG recognition as the DAG can take several forms. static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant) { // By being careful about our equations and only writing the in term // symbolic values and well known constants (0, 1, -1, MaxUInt) we can // make them generally applicable to all bit widths. int MaxUInt = (1 << width); // For the purposes of these comparisons sign extending the type is // equivalent to zero extending the add and displacing it by half the integer // width. Provided we are careful and make sure our equations are valid over // the whole range we can just adjust the input and avoid writing equations // for sign extended inputs. if (ExtType == ISD::SEXTLOAD) AddConstant -= (1 << (width-1)); switch(CC) { case AArch64CC::LE: case AArch64CC::GT: if ((AddConstant == 0) || (CompConstant == MaxUInt - 1 && AddConstant < 0) || (AddConstant >= 0 && CompConstant < 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant)) return true; break; case AArch64CC::LT: case AArch64CC::GE: if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant)) return true; break; case AArch64CC::HI: case AArch64CC::LS: if ((AddConstant >= 0 && CompConstant < 0) || (AddConstant <= 0 && CompConstant >= -1 && CompConstant < AddConstant + MaxUInt)) return true; break; case AArch64CC::PL: case AArch64CC::MI: if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) || (AddConstant < 0 && CompConstant <= AddConstant)) return true; break; case AArch64CC::LO: case AArch64CC::HS: if ((AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant >= 0 && CompConstant <= AddConstant + MaxUInt)) return true; break; case AArch64CC::EQ: case AArch64CC::NE: if ((AddConstant > 0 && CompConstant < 0) || (AddConstant < 0 && CompConstant >= 0 && CompConstant < AddConstant + MaxUInt) || (AddConstant >= 0 && CompConstant >= 0 && CompConstant >= AddConstant) || (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant)) return true; break; case AArch64CC::VS: case AArch64CC::VC: case AArch64CC::AL: case AArch64CC::NV: return true; case AArch64CC::Invalid: break; } return false; } static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex) { unsigned CC = cast(N->getOperand(CCIndex))->getSExtValue(); SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); unsigned CondOpcode = SubsNode->getOpcode(); if (CondOpcode != AArch64ISD::SUBS) return SDValue(); // There is a SUBS feeding this condition. Is it fed by a mask we can // use? SDNode *AndNode = SubsNode->getOperand(0).getNode(); unsigned MaskBits = 0; if (AndNode->getOpcode() != ISD::AND) return SDValue(); if (ConstantSDNode *CN = dyn_cast(AndNode->getOperand(1))) { uint32_t CNV = CN->getZExtValue(); if (CNV == 255) MaskBits = 8; else if (CNV == 65535) MaskBits = 16; } if (!MaskBits) return SDValue(); SDValue AddValue = AndNode->getOperand(0); if (AddValue.getOpcode() != ISD::ADD) return SDValue(); // The basic dag structure is correct, grab the inputs and validate them. SDValue AddInputValue1 = AddValue.getNode()->getOperand(0); SDValue AddInputValue2 = AddValue.getNode()->getOperand(1); SDValue SubsInputValue = SubsNode->getOperand(1); // The mask is present and the provenance of all the values is a smaller type, // lets see if the mask is superfluous. if (!isa(AddInputValue2.getNode()) || !isa(SubsInputValue.getNode())) return SDValue(); ISD::LoadExtType ExtType; if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) || !checkValueWidth(AddInputValue2, MaskBits, ExtType) || !checkValueWidth(AddInputValue1, MaskBits, ExtType) ) return SDValue(); if(!isEquivalentMaskless(CC, MaskBits, ExtType, cast(AddInputValue2.getNode())->getSExtValue(), cast(SubsInputValue.getNode())->getSExtValue())) return SDValue(); // The AND is not necessary, remove it. SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0), SubsNode->getValueType(1)); SDValue Ops[] = { AddValue, SubsNode->getOperand(1) }; SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops); DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode()); return SDValue(N, 0); } // Optimize compare with zero and branch. static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3)) N = NV.getNode(); SDValue Chain = N->getOperand(0); SDValue Dest = N->getOperand(1); SDValue CCVal = N->getOperand(2); SDValue Cmp = N->getOperand(3); assert(isa(CCVal) && "Expected a ConstantSDNode here!"); unsigned CC = cast(CCVal)->getZExtValue(); if (CC != AArch64CC::EQ && CC != AArch64CC::NE) return SDValue(); unsigned CmpOpc = Cmp.getOpcode(); if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS) return SDValue(); // Only attempt folding if there is only one use of the flag and no use of the // value. if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1)) return SDValue(); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); assert(LHS.getValueType() == RHS.getValueType() && "Expected the value type to be the same for both operands!"); if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) return SDValue(); if (isNullConstant(LHS)) std::swap(LHS, RHS); if (!isNullConstant(RHS)) return SDValue(); if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA || LHS.getOpcode() == ISD::SRL) return SDValue(); // Fold the compare into the branch instruction. SDValue BR; if (CC == AArch64CC::EQ) BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); else BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, BR, false); return SDValue(); } // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test // as well as whether the test should be inverted. This code is required to // catch these cases (as opposed to standard dag combines) because // AArch64ISD::TBZ is matched during legalization. static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG) { if (!Op->hasOneUse()) return Op; // We don't handle undef/constant-fold cases below, as they should have // already been taken care of (e.g. and of 0, test of undefined shifted bits, // etc.) // (tbz (trunc x), b) -> (tbz x, b) // This case is just here to enable more of the below cases to be caught. if (Op->getOpcode() == ISD::TRUNCATE && Bit < Op->getValueType(0).getSizeInBits()) { return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); } if (Op->getNumOperands() != 2) return Op; auto *C = dyn_cast(Op->getOperand(1)); if (!C) return Op; switch (Op->getOpcode()) { default: return Op; // (tbz (and x, m), b) -> (tbz x, b) case ISD::AND: if ((C->getZExtValue() >> Bit) & 1) return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); return Op; // (tbz (shl x, c), b) -> (tbz x, b-c) case ISD::SHL: if (C->getZExtValue() <= Bit && (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { Bit = Bit - C->getZExtValue(); return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); } return Op; // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x case ISD::SRA: Bit = Bit + C->getZExtValue(); if (Bit >= Op->getValueType(0).getSizeInBits()) Bit = Op->getValueType(0).getSizeInBits() - 1; return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); // (tbz (srl x, c), b) -> (tbz x, b+c) case ISD::SRL: if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { Bit = Bit + C->getZExtValue(); return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); } return Op; // (tbz (xor x, -1), b) -> (tbnz x, b) case ISD::XOR: if ((C->getZExtValue() >> Bit) & 1) Invert = !Invert; return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); } } // Optimize test single bit zero/non-zero and branch. static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { unsigned Bit = cast(N->getOperand(2))->getZExtValue(); bool Invert = false; SDValue TestSrc = N->getOperand(1); SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG); if (TestSrc == NewTestSrc) return SDValue(); unsigned NewOpc = N->getOpcode(); if (Invert) { if (NewOpc == AArch64ISD::TBZ) NewOpc = AArch64ISD::TBNZ; else { assert(NewOpc == AArch64ISD::TBNZ); NewOpc = AArch64ISD::TBZ; } } SDLoc DL(N); return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc, DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3)); } // vselect (v1i1 setcc) -> // vselect (v1iXX setcc) (XX is the size of the compared operand type) // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine // such VSELECT. static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); EVT CCVT = N0.getValueType(); if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || CCVT.getVectorElementType() != MVT::i1) return SDValue(); EVT ResVT = N->getValueType(0); EVT CmpVT = N0.getOperand(0).getValueType(); // Only combine when the result type is of the same size as the compared // operands. if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) return SDValue(); SDValue IfTrue = N->getOperand(1); SDValue IfFalse = N->getOperand(2); SDValue SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, IfTrue, IfFalse); } /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with /// the compare-mask instructions rather than going via NZCV, even if LHS and /// RHS are really scalar. This replaces any scalar setcc in the above pattern /// with a vector one followed by a DUP shuffle on the result. static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT ResVT = N->getValueType(0); if (N0.getOpcode() != ISD::SETCC) return SDValue(); // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered // scalar SetCCResultType. We also don't expect vectors, because we assume // that selects fed by vector SETCCs are canonicalized to VSELECT. assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"); // If NumMaskElts == 0, the comparison is larger than select result. The // largest real NEON comparison is 64-bits per lane, which means the result is // at most 32-bits and an illegal vector. Just bail out for now. EVT SrcVT = N0.getOperand(0).getValueType(); // Don't try to do this optimization when the setcc itself has i1 operands. // There are no legal vectors of i1, so this would be pointless. if (SrcVT == MVT::i1) return SDValue(); int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits(); if (!ResVT.isVector() || NumMaskElts == 0) return SDValue(); SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts); EVT CCVT = SrcVT.changeVectorElementTypeToInteger(); // Also bail out if the vector CCVT isn't the same size as ResVT. // This can happen if the SETCC operand size doesn't divide the ResVT size // (e.g., f64 vs v3f32). if (CCVT.getSizeInBits() != ResVT.getSizeInBits()) return SDValue(); // Make sure we didn't create illegal types, if we're not supposed to. assert(DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)); // First perform a vector comparison, where lane 0 is the one we're interested // in. SDLoc DL(N0); SDValue LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0)); SDValue RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1)); SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2)); // Now duplicate the comparison mask we want across all other lanes. SmallVector DUPMask(CCVT.getVectorNumElements(), 0); SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask); Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(), Mask); return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } /// Get rid of unnecessary NVCASTs (that don't change the type). static SDValue performNVCASTCombine(SDNode *N) { if (N->getValueType(0) == N->getOperand(0).getValueType()) return N->getOperand(0); return SDValue(); } SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::ADD: case ISD::SUB: return performAddSubLongCombine(N, DCI, DAG); case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMulCombine(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return performIntToFpCombine(N, DAG, Subtarget); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return performFpToIntCombine(N, DAG, DCI, Subtarget); case ISD::FDIV: return performFDivCombine(N, DAG, DCI, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return performIntrinsicCombine(N, DCI, Subtarget); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: return performExtendCombine(N, DCI, DAG); case ISD::BITCAST: return performBitcastCombine(N, DCI, DAG); case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); case ISD::SELECT: { SDValue RV = performSelectCombine(N, DCI); if (!RV.getNode()) RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget); return RV; } case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); case ISD::LOAD: if (performTBISimplification(N->getOperand(1), DCI, DAG)) return SDValue(N, 0); break; case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: return performBRCONDCombine(N, DCI, DAG); case AArch64ISD::TBNZ: case AArch64ISD::TBZ: return performTBZCombine(N, DCI, DAG); case AArch64ISD::CSEL: return performCONDCombine(N, DCI, DAG, 2, 3); case AArch64ISD::DUP: return performPostLD1Combine(N, DCI, false); case AArch64ISD::NVCAST: return performNVCASTCombine(N); case ISD::INSERT_VECTOR_ELT: return performPostLD1Combine(N, DCI, true); case ISD::EXTRACT_VECTOR_ELT: return performAcrossLaneAddReductionCombine(N, DAG, Subtarget); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { case Intrinsic::aarch64_neon_ld2: case Intrinsic::aarch64_neon_ld3: case Intrinsic::aarch64_neon_ld4: case Intrinsic::aarch64_neon_ld1x2: case Intrinsic::aarch64_neon_ld1x3: case Intrinsic::aarch64_neon_ld1x4: case Intrinsic::aarch64_neon_ld2lane: case Intrinsic::aarch64_neon_ld3lane: case Intrinsic::aarch64_neon_ld4lane: case Intrinsic::aarch64_neon_ld2r: case Intrinsic::aarch64_neon_ld3r: case Intrinsic::aarch64_neon_ld4r: case Intrinsic::aarch64_neon_st2: case Intrinsic::aarch64_neon_st3: case Intrinsic::aarch64_neon_st4: case Intrinsic::aarch64_neon_st1x2: case Intrinsic::aarch64_neon_st1x3: case Intrinsic::aarch64_neon_st1x4: case Intrinsic::aarch64_neon_st2lane: case Intrinsic::aarch64_neon_st3lane: case Intrinsic::aarch64_neon_st4lane: return performNEONPostLDSTCombine(N, DCI, DAG); default: break; } } return SDValue(); } // Check if the return value is used as only a return value, as otherwise // we can't perform a tail-call. In particular, we need to check for // target ISD nodes that are returns and any other "odd" constructs // that the generic analysis code won't necessarily catch. bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; bool HasRet = false; for (SDNode *Node : Copy->uses()) { if (Node->getOpcode() != AArch64ISD::RET_FLAG) return false; HasRet = true; } if (!HasRet) return false; Chain = TCChain; return true; } // Return whether the an instruction can potentially be optimized to a tail // call. This will cause the optimizers to attempt to move, or duplicate, // return instructions to help enable tail call optimizations for this // instruction. bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { return CI->isTailCall(); } bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, bool &IsInc, SelectionDAG &DAG) const { if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) return false; Base = Op->getOperand(0); // All of the indexed addressing mode instructions take a signed // 9 bit immediate offset. if (ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { int64_t RHSC = RHS->getSExtValue(); if (Op->getOpcode() == ISD::SUB) RHSC = -(uint64_t)RHSC; if (!isInt<9>(RHSC)) return false; IsInc = (Op->getOpcode() == ISD::ADD); Offset = Op->getOperand(1); return true; } return false; } bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); } else if (StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); } else return false; bool IsInc; if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) return false; AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; return true; } bool AArch64TargetLowering::getPostIndexedAddressParts( SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); } else if (StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); } else return false; bool IsInc; if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) return false; // Post-indexing updates the base, so it's not a valid transform // if that's not the same as the load's pointer. if (Ptr != Base) return false; AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; return true; } static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { SDLoc DL(N); SDValue Op = N->getOperand(0); if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16) return; Op = SDValue( DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, DAG.getUNDEF(MVT::i32), Op, DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), 0); Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op)); } static void ReplaceReductionResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp) { EVT LoVT, HiVT; SDValue Lo, Hi; SDLoc dl(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi); SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal); Results.push_back(SplitVal); } static std::pair splitInt128(SDValue N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N); SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, DAG.getNode(ISD::SRL, DL, MVT::i128, N, DAG.getConstant(64, DL, MVT::i64))); return std::make_pair(Lo, Hi); } static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl & Results, SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i128 && "AtomicCmpSwap on types less than 128 should be legal"); auto Desired = splitInt128(N->getOperand(2), DAG); auto New = splitInt128(N->getOperand(3), DAG); SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second, New.first, New.second, N->getOperand(0)}; SDNode *CmpSwap = DAG.getMachineNode( AArch64::CMP_SWAP_128, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), Ops); MachineFunction &MF = DAG.getMachineFunction(); MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1); Results.push_back(SDValue(CmpSwap, 0)); Results.push_back(SDValue(CmpSwap, 1)); Results.push_back(SDValue(CmpSwap, 3)); } void AArch64TargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this"); case ISD::BITCAST: ReplaceBITCASTResults(N, Results, DAG); return; case AArch64ISD::SADDV: ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV); return; case AArch64ISD::UADDV: ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV); return; case AArch64ISD::SMINV: ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV); return; case AArch64ISD::UMINV: ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV); return; case AArch64ISD::SMAXV: ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV); return; case AArch64ISD::UMAXV: ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV); return; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); // Let normal code take care of it by not adding anything to Results. return; case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG); return; } } bool AArch64TargetLowering::useLoadStackGuardNode() const { if (!Subtarget->isTargetAndroid()) return true; return TargetLowering::useLoadStackGuardNode(); } unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are three or more FDIVs. return 3; } TargetLoweringBase::LegalizeTypeAction AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { MVT SVT = VT.getSimpleVT(); // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8, // v4i16, v2i32 instead of to promote. if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 || SVT == MVT::v1f32) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } // Loads and stores less than 128-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); return Size == 128; } // Loads and stores less than 128-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } // For the real atomic operations, we have ldxr/stxr up to 128 bits, TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. return getTargetMachine().getOptLevel() != 0; } Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = isAcquireOrStronger(Ord); // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i64, i64} and we have to recombine them into a // single i128 here. if (ValTy->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; Function *Ldxr = Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi"); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); return Builder.CreateOr( Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); } Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldxr, Addr), cast(Addr->getType())->getElementType()); } void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilder<> &Builder) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); } Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); bool IsRelease = isReleaseOrStronger(Ord); // Since the intrinsics must have legal type, the i128 intrinsics take two // parameters: "i64, i64". We must marshal Val into the appropriate form // before the call. if (Val->getType()->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp; Function *Stxr = Intrinsic::getDeclaration(M, Int); Type *Int64Ty = Type::getInt64Ty(M->getContext()); Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi"); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall(Stxr, {Lo, Hi, Addr}); } Intrinsic::ID Int = IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr; Type *Tys[] = { Addr->getType() }; Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateCall(Stxr, {Builder.CreateZExtOrBitCast( Val, Stxr->getFunctionType()->getParamType(0)), Addr}); } bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { return Ty->isArrayTy(); } bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, EVT) const { return false; } Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { if (!Subtarget->isTargetAndroid()) return TargetLowering::getIRStackGuard(IRB); // Android provides a fixed TLS slot for the stack cookie. See the definition // of TLS_SLOT_STACK_GUARD in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h const unsigned TlsOffset = 0x28; Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Function *ThreadPointerFunc = Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); return IRB.CreatePointerCast( IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); } Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { if (!Subtarget->isTargetAndroid()) return TargetLowering::getSafeStackPointerLocation(IRB); // Android provides a fixed TLS slot for the SafeStack pointer. See the // definition of TLS_SLOT_SAFESTACK in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h const unsigned TlsOffset = 0x48; Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Function *ThreadPointerFunc = Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); return IRB.CreatePointerCast( IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); } void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in AArch64unctionInfo. AArch64FunctionInfo *AFI = Entry->getParent()->getInfo(); AFI->setIsSplitCSR(true); } void AArch64TargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (AArch64::GPR64RegClass.contains(*I)) RC = &AArch64::GPR64RegClass; else if (AArch64::FPR64RegClass.contains(*I)) RC = &AArch64::FPR64RegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction()->hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { // Integer division on AArch64 is expensive. However, when aggressively // optimizing for code size, we prefer to use a div instruction, as it is // usually smaller than the alternative sequence. // The exception to this is vector division. Since AArch64 doesn't have vector // integer division, leaving the division as-is is a loss even in terms of // size, because it will have to be scalarized, while the alternative code // sequence can be performed in vector form. bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); return OptSize && !VT.isVector(); } Index: projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMCallingConv.td =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMCallingConv.td (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMCallingConv.td (revision 313894) @@ -1,310 +1,310 @@ //===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // This describes the calling conventions for ARM architecture. //===----------------------------------------------------------------------===// /// CCIfAlign - Match of the original alignment of the arg class CCIfAlign: CCIf; //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// def CC_ARM_APCS : CallingConv<[ // Handles byval parameters. CCIfByVal>, CCIfType<[i1, i8, i16], CCPromoteToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is passed in R6. - CCIfSwiftError>>, + // A SwiftError is passed in R8. + CCIfSwiftError>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 4>>, CCIfType<[v2f64], CCAssignToStack<16, 4>> ]>; def RetCC_ARM_APCS : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is returned in R6. - CCIfSwiftError>>, + // A SwiftError is returned in R8. + CCIfSwiftError>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> ]>; //===----------------------------------------------------------------------===// // ARM APCS Calling Convention for FastCC (when VFP2 or later is available) //===----------------------------------------------------------------------===// def FastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, // CPRCs may be allocated to co-processor registers or the stack - they // may never be allocated to core registers. CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>, CCDelegateTo ]>; def RetFastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo ]>; //===----------------------------------------------------------------------===// // ARM APCS Calling Convention for GHC //===----------------------------------------------------------------------===// def CC_ARM_APCS_GHC : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType>, // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> ]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType>, // i64/f64 is passed in even pairs of GPRs // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>, CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, CCIfType<[v2f64], CCIfAlign<"16", CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> ]>; def RetCC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> ]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention //===----------------------------------------------------------------------===// def CC_ARM_AAPCS : CallingConv<[ // Handles byval parameters. CCIfByVal>, // The 'nest' parameter, if any, is passed in R12. CCIfNest>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is passed in R6. - CCIfSwiftError>>, + // A SwiftError is passed in R8. + CCIfSwiftError>>, CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType>, CCDelegateTo ]>; def RetCC_ARM_AAPCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is returned in R6. - CCIfSwiftError>>, + // A SwiftError is returned in R8. + CCIfSwiftError>>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType>, CCDelegateTo ]>; //===----------------------------------------------------------------------===// // ARM AAPCS-VFP (EABI) Calling Convention // Also used for FastCC (when VFP2 or later is available) //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ // Handles byval parameters. CCIfByVal>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is passed in R6. - CCIfSwiftError>>, + // A SwiftError is passed in R8. + CCIfSwiftError>>, // HFAs are passed in a contiguous block of registers, or on the stack CCIfConsecutiveRegs>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo ]>; def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, - // A SwiftError is returned in R6. - CCIfSwiftError>>, + // A SwiftError is returned in R8. + CCIfSwiftError>>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo ]>; //===----------------------------------------------------------------------===// // Callee-saved register lists. //===----------------------------------------------------------------------===// def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame // pointer, we use this AAPCS alternative. def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register // Only the resulting RegMask is used; the SaveList is ignored def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8), R0)>; // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; -// R6 is used to pass swifterror, remove it from CSR. -def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>; +// R8 is used to pass swifterror, remove it from CSR. +def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, (sequence "R%u", 12, 1), (sequence "D%u", 31, 0))>; // C++ TLS access function saves all registers except SP. Try to match // the order of CSRs in CSR_iOS. def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1), (sequence "D%u", 31, 0))>; // CSRs that are handled by prologue, epilogue. def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR, R12, R11, R7, R5, R4)>; // CSRs that are handled explicitly via copies. def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, CSR_iOS_CXX_TLS_PE)>; // The "interrupt" attribute is used to generate code that is acceptable in // exception-handlers of various kinds. It makes us use a different return // instruction (handled elsewhere) and affects which registers we must return to // our "caller" in the same state as we receive them. // For most interrupts, all registers except SP and LR are shared with // user-space. We mark LR to be saved anyway, since this is what the ARM backend // generally does rather than tracking its liveness as a normal register. def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; // The fast interrupt handlers have more private state and get their own copies // of R8-R12, in addition to SP and LR. As before, mark LR for saving too. // FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and // current frame lowering expects to encounter it while processing callee-saved // registers. def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; Index: projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 313894) @@ -1,13482 +1,13483 @@ //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that ARM uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "ARMISelLowering.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; #define DEBUG_TYPE "arm-isel" STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); STATISTIC(NumConstpoolPromoted, "Number of constants with their storage promoted into constant pools"); static cl::opt ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); static cl::opt EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(true)); static cl::opt ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64)); static cl::opt ConstpoolPromotionMaxTotal( "arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128)); namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, SmallVectorImpl &locs, LLVMContext &C, ParmContext PC) : CCState(CC, isVarArg, MF, locs, C) { assert(((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call" "or prologue generation."); CallOrPrologue = PC; } }; } // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); } MVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); if (ElemTy == MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); } else { setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); setOperationAction(ISD::OR, VT, Promote); AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); setOperationAction(ISD::XOR, VT, Promote); AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); } // Neon does not support vector divide/remainder operations. setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPRRegClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPairRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { RegInfo = Subtarget->getRegisterInfo(); Itins = Subtarget->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && !Subtarget->isTargetWatchOS()) { const auto &E = Subtarget->getTargetTriple().getEnvironment(); bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF || E == Triple::MuslEABIHF; // Windows is a special case. Technically, we will replace all of the "GNU" // calls with calls to MSVCRT if appropriate and adjust the calling // convention then. IsHFTarget = IsHFTarget || Subtarget->isTargetWindows(); for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast(LCID), IsHFTarget ? CallingConv::ARM_AAPCS_VFP : CallingConv::ARM_AAPCS); } if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const ISD::CondCode Cond; } LibraryCalls[] = { // Single-precision floating-point arithmetic. { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, // Double-precision floating-point arithmetic. { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, // Single-precision comparisons. { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ }, // Double-precision comparisons. { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ }, // Floating-point to integer conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, // Conversions between floating types. { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, // Integer to floating-point conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. // FIXME: There appears to be some naming inconsistency in ARM libgcc: // e.g., __floatunsidf vs. __floatunssidfvfp. { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } } // Set the correct calling convention for ARMv7k WatchOS. It's just // AAPCS_VFP for functions as simple as libcalls. if (Subtarget->isTargetWatchABI()) { for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); } } // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); // RTLIB if (Subtarget->isAAPCS_ABI() && (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; const ISD::CondCode Cond; } LibraryCalls[] = { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Double-precision floating-point comparison helper functions // RTABI chapter 4.1.2, Table 3 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, // Single-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 4 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Single-precision floating-point comparison helper functions // RTABI chapter 4.1.2, Table 5 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, // Floating-point to integer conversions. // RTABI chapter 4.1.2, Table 6 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Conversions between floating types. // RTABI chapter 4.1.2, Table 7 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer to floating-point conversions. // RTABI chapter 4.1.2, Table 8 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Long long helper functions // RTABI chapter 4.2, Table 9 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer division functions // RTABI chapter 4.3.1 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } // EABI dependent RTLIB if (TM.Options.EABIVersion == EABI::EABI4 || TM.Options.EABIVersion == EABI::EABI5) { static const struct { const RTLIB::Libcall Op; const char *const Name; const CallingConv::ID CC; const ISD::CondCode Cond; } MemOpsLibraryCalls[] = { // Memory operations // RTABI chapter 4.3.4 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : MemOpsLibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } } } if (Subtarget->isTargetWindows()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } // Use divmod compiler-rt calls for iOS 5.0 and later. if (Subtarget->isTargetWatchOS() || (Subtarget->isTargetIOS() && !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } // The half <-> float conversion functions are always soft-float on // non-watchos platforms, but are needed for some targets which use a // hard-float calling convention by default. if (!Subtarget->isTargetWatchABI()) { if (Subtarget->isAAPCS_ABI()) { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); } else { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); } } // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have // a __gnu_ prefix (which is the default). if (Subtarget->isTargetAEABI()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else addRegisterClass(MVT::i32, &ARM::GPRRegClass); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); addRegisterClass(MVT::f64, &ARM::DPRRegClass); } for (MVT VT : MVT::vector_valuetypes()) { for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); addDRTypeForNEON(MVT::v4i16); addDRTypeForNEON(MVT::v2i32); addDRTypeForNEON(MVT::v1i64); addQRTypeForNEON(MVT::v4f32); addQRTypeForNEON(MVT::v2f64); addQRTypeForNEON(MVT::v16i8); addQRTypeForNEON(MVT::v8i16); addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); // FIXME: Code duplication: FDIV and FREM are expanded always, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); // FIXME: Create unittest. // In another words, find a way when "copysign" appears in DAG with vector // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); // FIXME: Code duplication: SETCC has custom operation action, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); setOperationAction(ISD::FMA, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); setOperationAction(ISD::FRINT, MVT::v4f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Mark v2f32 intrinsics. setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); setOperationAction(ISD::FEXP, MVT::v2f32, Expand); setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); setOperationAction(ISD::FRINT, MVT::v2f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); // Custom handling for some quad-vector types to detect VMULL. setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); // Custom handling for some vector types to avoid expensive expansions setOperationAction(ISD::SDIV, MVT::v4i16, Custom); setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source, and nor does // it have a FP_TO_[SU]INT instruction with a narrower destination than // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); setOperationAction(ISD::CTPOP, MVT::v1i64, Expand); setOperationAction(ISD::CTPOP, MVT::v2i64, Expand); setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); // NEON does not have single instruction CTTZ for vectors. setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); // NEON only has FMA instructions as of VFP4. if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::v2f32, Expand); setOperationAction(ISD::FMA, MVT::v4f32, Expand); } setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}) { for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } } // ARM and Thumb2 support UMLAL/SMLAL. if (!Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::ADDC); if (Subtarget->isFPOnlySP()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, // loads and stores are provided by the hardware. setOperationAction(ISD::FADD, MVT::f64, Expand); setOperationAction(ISD::FSUB, MVT::f64, Expand); setOperationAction(ISD::FMUL, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FDIV, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); setOperationAction(ISD::FNEG, MVT::f64, Expand); setOperationAction(ISD::FABS, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); setOperationAction(ISD::FLOG10, MVT::f64, Expand); setOperationAction(ISD::FEXP, MVT::f64, Expand); setOperationAction(ISD::FEXP2, MVT::f64, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } computeRegisterProperties(Subtarget->getRegisterInfo()); // ARM does not have floating-point extending loads. for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); } // ... or truncating stores setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. if (!Subtarget->isThumb1Only()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, MVT::i1, Legal); setIndexedLoadAction(im, MVT::i8, Legal); setIndexedLoadAction(im, MVT::i16, Legal); setIndexedLoadAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i1, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); } } else { // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); if (Subtarget->isThumb1Only()) { setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); } if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() || (Subtarget->isThumb2() && !Subtarget->hasDSP())) setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); if (!Subtarget->isThumb1Only()) { // FIXME: We should do this for Thumb1 as well. setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); setOperationAction(ISD::SUBC, MVT::i32, Custom); setOperationAction(ISD::SUBE, MVT::i32, Custom); } if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); // @llvm.readcyclecounter requires the Performance Monitors extension. // Default to the 0 expansion on unsupported platforms. // FIXME: Technically there are older ARM CPUs that have // implementation-specific ways of obtaining this information. if (Subtarget->hasPerfMon()) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, LibCall); setOperationAction(ISD::UDIV, MVT::i32, LibCall); } if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); setOperationAction(ISD::SDIV, MVT::i64, Custom); setOperationAction(ISD::UDIV, MVT::i64, Custom); } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; if (Subtarget->isTargetWindows()) { const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } else { const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::SDIVREM, MVT::i64, Custom); setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } else { setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); } if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT()) for (auto &VT : {MVT::f32, MVT::f64}) setOperationAction(ISD::FPOWI, VT, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. InsertFencesForAtomic = false; if (Subtarget->hasAnyDataBarrier() && (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); if (!Subtarget->isThumb() || !Subtarget->isMClass()) setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. InsertFencesForAtomic = true; } } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. // If target has DMB in thumb, Fences can be inserted. if (Subtarget->hasDataBarrier()) InsertFencesForAtomic = true; setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); // Set them all for expansion, which will force libcalls. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. if (!InsertFencesForAtomic) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); } } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); if (Subtarget->useSjLjEH()) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // Thumb-1 cannot currently select ARMISD::SUBE. if (!Subtarget->isThumb1Only()) setOperationAction(ISD::SETCCE, MVT::i32, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); // We don't support sin/cos/fmod/copysign/pow setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); } // Various VFP goodness if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); if (Subtarget->isTargetWatchABI()) { setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); } if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } } // FP-ARMv8 implements a lot of rounding-like FP operations. if (Subtarget->hasFPARMv8()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); if (!Subtarget->isFPOnlySP()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); } } if (Subtarget->hasNEON()) { // vmin and vmax aren't available in a scalar form, so we use // a NEON instruction with an undef lane instead. setOperationAction(ISD::FMINNAN, MVT::f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type MaxStoresPerMemset = 8; MaxStoresPerMemsetOptSize = 4; MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores MaxStoresPerMemcpyOptSize = 2; MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } bool ARMTargetLowering::useSoftFloat() const { return Subtarget->useSoftFloat(); } // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, // SPR's representative would be DPR_VFP2. This should work well if register // pressure tracking were modified such that a register use would increment the // pressure of the register class's representative and all of it's super // classes' representatives transitively. We have not implemented this because // of the difficulty prior to coalescing of modeling operand register classes // due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(TRI, VT); // Use DPR as representative register class for all floating point // and vector types. Since there are 32 SPR registers and 32 DPR registers so // the cost is 1 for both f32 and f64. case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: RRC = &ARM::DPRRegClass; // When NEON is used for SP, only half of the register file is available // because operations that define both SP and DP results will be constrained // to the VFP2 class (D0-D15). We currently model this constraint prior to // coalescing by double-counting the SP regs. See the FIXME above. if (Subtarget->useNEONForSinglePrecisionFP()) Cost = 2; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: RRC = &ARM::DPRRegClass; Cost = 2; break; case MVT::v4i64: RRC = &ARM::DPRRegClass; Cost = 4; break; case MVT::v8i64: RRC = &ARM::DPRRegClass; Cost = 8; break; } return std::make_pair(RRC, Cost); } const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((ARMISD::NodeType)Opcode) { case ARMISD::FIRST_NUMBER: break; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL"; case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::SSAT: return "ARMISD::SSAT"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; case ARMISD::ADDC: return "ARMISD::ADDC"; case ARMISD::ADDE: return "ARMISD::ADDE"; case ARMISD::SUBC: return "ARMISD::SUBC"; case ARMISD::SUBE: return "ARMISD::SUBE"; case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP"; case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; case ARMISD::VCGT: return "ARMISD::VCGT"; case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; case ARMISD::VCGTU: return "ARMISD::VCGTU"; case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VSHL: return "ARMISD::VSHL"; case ARMISD::VSHRs: return "ARMISD::VSHRs"; case ARMISD::VSHRu: return "ARMISD::VSHRu"; case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VSLI: return "ARMISD::VSLI"; case ARMISD::VSRI: return "ARMISD::VSRI"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; case ARMISD::VREV64: return "ARMISD::VREV64"; case ARMISD::VREV32: return "ARMISD::VREV32"; case ARMISD::VREV16: return "ARMISD::VREV16"; case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; case ARMISD::VTBL1: return "ARMISD::VTBL1"; case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD"; case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; } return nullptr; } EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); return VT.changeVectorElementTypeToInteger(); } /// getRegClassFor - Return the register class that should be used for the /// specified value type. const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. if (Subtarget->hasNEON()) { if (VT == MVT::v4i64) return &ARM::QQPRRegClass; if (VT == MVT::v8i64) return &ARM::QQQQPRRegClass; } return TargetLowering::getRegClassFor(VT); } // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const { if (!isa(CI)) return false; MinSize = 8; // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 // cycle faster than 4-byte aligned LDM. PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); return true; } // Create a fast isel object. FastISel * ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { return ARM::createFastISel(funcInfo, libInfo); } Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) return Sched::RegPressure; for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (VT.isFloatingPoint() || VT.isVector()) return Sched::ILP; } if (!N->isMachineOpcode()) return Sched::RegPressure; // Load are scheduled for latency even if there instruction itinerary // is not available. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) return Sched::RegPressure; if (!Itins->isEmpty() && Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) return Sched::ILP; return Sched::RegPressure; } //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: return ARMCC::NE; case ISD::SETEQ: return ARMCC::EQ; case ISD::SETGT: return ARMCC::GT; case ISD::SETGE: return ARMCC::GE; case ISD::SETLT: return ARMCC::LT; case ISD::SETLE: return ARMCC::LE; case ISD::SETUGT: return ARMCC::HI; case ISD::SETUGE: return ARMCC::HS; case ISD::SETULT: return ARMCC::LO; case ISD::SETULE: return ARMCC::LS; } } /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2) { CondCode2 = ARMCC::AL; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: case ISD::SETULT: CondCode = ARMCC::LT; break; case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: case ISD::SETUNE: CondCode = ARMCC::NE; break; } } //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// #include "ARMGenCallingConv.inc" /// getEffectiveCallingConv - Get the effective calling convention, taking into /// account presence of floating point hardware and calling convention /// limitations, such as support for variadic functions. CallingConv::ID ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_AAPCS: case CallingConv::ARM_APCS: case CallingConv::GHC: return CC; case CallingConv::PreserveMost: return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: case CallingConv::Swift: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) return CallingConv::ARM_APCS; else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; case CallingConv::Fast: case CallingConv::CXX_FAST_TLS: if (!Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; return CallingConv::ARM_APCS; } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; } } CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const { return CCAssignFnForNode(CC, false, isVarArg); } CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const { return CCAssignFnForNode(CC, true, isVarArg); } /// CCAssignFnForNode - Selects the correct CCAssignFn for the given /// CallingConvention. CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const { switch (getEffectiveCallingConv(CC, isVarArg)) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_AAPCS_VFP: return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); case CallingConv::Fast: return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); case CallingConv::PreserveMost: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue ARMTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector RVLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext(), Call); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference if (i == 0 && isThisReturn) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); continue; } SDValue Val; if (VA.needsCustom()) { // Handle f64 or half of a v2f64. SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(0, dl, MVT::i32)); VA = RVLocs[++i]; // skip ahead to next loc Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, dl, MVT::i32)); } } else { Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } /// LowerMemOpCallTo - Store the argument to the stack. SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); return DAG.getStore( Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); } void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); unsigned id = Subtarget->isLittle() ? 0 : 1; RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } } /// LowerCall - Lowering a call into a callseq_start <- /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. SDValue ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool doesNotRet = CLI.DoesNotReturn; bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { ++NumTailCalls; isSibCall = true; } } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); // For tail calls, memory operands are available in our caller's stack. if (isSibCall) NumBytes = 0; // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); RegsToPassVector RegsToPass; SmallVector MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization, arguments are handled later. for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } // f64 and v2f64 might be passed in i32 pairs and must be split into pieces if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, dl, MVT::i32)); SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, dl, MVT::i32)); PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); VA = ArgLocs[++i]; // skip ahead to next loc if (VA.isRegLoc()) { PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } else { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags)); } } else { PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i32) { assert(VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"); isThisReturn = true; } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); unsigned offset = 0; // True if this byval aggregate will be split between registers // and memory. unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); if (CurByValIdx < ByValArgsCount) { unsigned RegBegin, RegEnd; CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned int i, j; for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } // If parameter size outsides register area, "offset" value // helps us to calculate stack slot for remained part properly. offset = RegEnd - RegBegin; CCInfo.nextInRegsParam(); } if (Flags.getByValSize() > 4*offset) { auto PtrVT = getPointerTy(DAG.getDataLayout()); unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, MVT::i32); SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, MVT::i32); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; // Tail call byval lowering might overwrite argument registers so in case of // tail call optimization the copies to registers are lowered later. if (!isTailCall) for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // For tail calls lower the arguments to the 'real' stack slot. if (isTailCall) { // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and // the alias isn't otherwise explicit. This is slightly more conservative // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } InFlag = SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool isDirect = false; const TargetMachine &TM = getTargetMachine(); const Module *Mod = MF.getFunction()->getParent(); const GlobalValue *GV = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) GV = G->getGlobal(); bool isStub = !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo(); auto PtrVt = getPointerTy(DAG.getDataLayout()); if (Subtarget->genLongCalls()) { assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && "long-calls codegen is not position independent!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. if (isa(Callee)) { // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { const char *Sym = S->getSymbol(); // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } else if (isa(Callee)) { // If we're optimizing for minimum size and the function is called three or // more times in this block, we can improve codesize by calling indirectly // as BLXr has a 16-bit encoding. auto *GV = cast(Callee)->getGlobal(); auto *BB = CLI.CS->getParent(); bool PreferIndirect = Subtarget->isThumb() && MF.getFunction()->optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; if (!PreferIndirect) { isDirect = true; bool isDef = GV->isStrongDefinitionForLinker(); // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode( ARMISD::WrapperPIC, dl, PtrVt, DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 0, MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); unsigned TargetFlags = GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); if (GV->hasDLLImportStorageClass()) Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), MachinePointerInfo::getGOT(DAG.getMachineFunction())); } else { Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); } } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); } } // FIXME: handle tail calls differently. unsigned CallOpc; if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else CallOpc = ARMISD::CALL; } else { if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority !MF.getFunction()->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { // Set isThisReturn to false if the calling convention is not one that // allows 'returned' to be modeled in this way, so LowerCallResult does // not try to pass 'this' straight through isThisReturn = false; Mask = ARI->getCallPreservedMask(MF, CallConv); } } else Mask = ARI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); } if (InFlag.getNode()) Ops.push_back(InFlag); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals, isThisReturn, isThisReturn ? OutVals[0] : SDValue()); } /// HandleByVal - Every parameter *after* a byval parameter is passed /// on the stack. Remember the next parameter register to allocate, /// and then confiscate the rest of the parameter registers to insure /// this. void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, unsigned Align) const { assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); // Byval (as with any stack) slots are always at least 4 byte aligned. Align = std::max(Align, 4U); unsigned Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; for (unsigned i = 0; i < Waste; ++i) Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; unsigned Excess = 4 * (ARM::R4 - Reg); // Special case when NSAA != SP and parameter size greater than size of // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. const unsigned NSAAOffset = State->getNextStackOffset(); if (NSAAOffset != 0 && Size > Excess) { while (State->AllocateReg(GPRArgRegs)) ; return; } // First register for byval parameter is the first register that wasn't // allocated before this method call, so it would be "reg". // If parameter is small enough to be saved in range [reg, r4), then // the end (first after last) register would be reg + param-size-in-regs, // else parameter would be splitted between registers and stack, // end register would be r4 in this case. unsigned ByValRegBegin = Reg; unsigned ByValRegEnd = std::min(Reg + Size / 4, ARM::R4); State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); // Note, first register is allocated in the beginning of function already, // allocate remained amount of registers we need. for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs); // A byval parameter that is split between registers and memory needs its // size truncated here. // In the case where the entire structure fits in registers, we set the // size in memory to zero. Size = std::max(Size - Excess, 0); } /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) return false; if (!Flags.isByVal()) { if (!TII->isLoadFromStackSlot(*Def, FI)) return false; } else { return false; } } else if (LoadSDNode *Ld = dyn_cast(Arg)) { if (Flags.isByVal()) // ByVal argument is passed in as a pointer but it's now being // dereferenced. e.g. // define @foo(%struct.X* %A) { // tail call @bar(%struct.X* byval %A) // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast(Ptr); if (!FINode) return false; FI = FINode->getIndex(); } else return false; assert(FI != INT_MAX); if (!MFI.isFixedObjectIndex(FI)) return false; return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); assert(Subtarget->supportsTailCall()); // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. if (CallerF->hasFnAttribute("interrupt")) return false; // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) return false; // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic // pre-emption of symbols, as the AAELF spec requires normal calls // to undefined weak functions to be replaced with a NOP or jump to the // next instruction. The behaviour of branch instructions in this // situation (as used for tail calls) is implementation-defined, so we // cannot rely on the linker replacing the tail call with a return. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) return false; } // Check that the call results are passed in the same way. LLVMContext &C = *DAG.getContext(); if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), CCAssignFnForReturn(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo(); if (AFI_Caller->getArgRegsSaveSize()) return false; // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector ArgLocs; ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (VA.needsCustom()) { // f64 and vector types are split into multiple registers or // register/stack-slot combinations. The types will not match // the registers; give up on memory f64 refs until we figure // out what to do about this. if (!VA.isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) return false; if (RegVT == MVT::v2f64) { if (!ArgLocs[++i].isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) return false; } } else if (!VA.isRegLoc()) { if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, TII)) return false; } } } const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) return false; } return true; } bool ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); } static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset // version of the "preferred return address". These offsets affect the return // instruction if this is a return from PL1 without hypervisor extensions. // IRQ/FIQ: +4 "subs pc, lr, #4" // SWI: 0 "subs pc, lr, #0" // ABORT: +4 "subs pc, lr, #4" // UNDEF: +4/+2 "subs pc, lr, #0" // UNDEF varies depending on where the exception came from ARM or Thumb // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. int64_t LROffset; if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || IntKind == "ABORT") LROffset = 4; else if (IntKind == "SWI" || IntKind == "UNDEF") LROffset = 0; else report_fatal_error("Unsupported interrupt attribute. If present, value " "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, DL, MVT::i32, false)); return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; // CCState - Info about the registers and stack slots. ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext(), Call); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) bool isLittleEndian = Subtarget->isLittle(); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); AFI->setReturnRegsCount(RVLocs.size()); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[realRVLocIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { // Extract the first half and return it in two registers. SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, dl, MVT::i32)); SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, dl, MVT::i32)); } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (ARM::GPRRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i32)); else if (ARM::DPRRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } // Update chain and glue. RetOps[0] = Chain; if (Flag.getNode()) RetOps.push_back(Flag); // CPUs which aren't M-class use a special sequence to return from // exceptions (roughly, any instruction setting pc and cpsr simultaneously, // though we use "subs pc, lr, #N"). // // M-class CPUs actually use a normal return sequence with a special // (hardware-provided) value in LR, so the normal code path works. if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && !Subtarget->isMClass()) { if (Subtarget->isThumb1Only()) report_fatal_error("interrupt attribute is not supported in Thumb1"); return LowerInterruptReturn(RetOps, dl, DAG); } return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { SDNode *VMov = Copy; // f64 returned in a pair of GPRs. SmallPtrSet Copies; for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; Copies.insert(*UI); } if (Copies.size() > 2) return false; for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { SDValue UseChain = UI->getOperand(0); if (Copies.count(UseChain.getNode())) // Second CopyToReg Copy = *UI; else { // We are at the top of this chain. // If the copy has a glue operand, we conservatively assume it // isn't safe to perform a tail call. if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) return false; // First CopyToReg TCChain = UseChain; } } } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. if (!Copy->hasOneUse()) return false; Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else { return false; } bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ARMISD::RET_FLAG && UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } if (!HasRet) return false; Chain = TCChain; return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; return true; } // Trying to write a 64 bit value so need to split into two 32 bit values first, // and pass the lower and high parts through. static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); SDValue WriteValue = Op->getOperand(2); // This function is only supposed to be called for i64 type argument. assert(WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, DAG.getConstant(1, DL, MVT::i32)); SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast(Op); SDValue Res; if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); else Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment()); return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } unsigned ARMTargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_Inline; } SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = 0; SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast(Op)->getBlockAddress(); SDValue CPAddr; bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, ARMCP::CPBlockAddress, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); if (!IsPositionIndependent) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } /// \brief Convert a TLS address reference into the correct sequence of loads /// and calls to compute the variable's address for Darwin, and return an /// SDValue containing the final node. /// Darwin only has one TLS scheme which must be capable of dealing with the /// fully general situation, in the worst case. This means: /// + "extern __thread" declaration. /// + Defined in a possibly unknown dynamic library. /// /// The general system is that each __thread variable has a [3 x i32] descriptor /// which contains information used by the runtime to calculate the address. The /// only part of this the compiler needs to know about is the first word, which /// contains a function pointer that must be called with the address of the /// entire descriptor in "r0". /// /// Since this descriptor may be in a different unit, in general access must /// proceed along the usual ARM rules. A common sequence to produce is: /// /// movw rT1, :lower16:_var$non_lazy_ptr /// movt rT1, :upper16:_var$non_lazy_ptr /// ldr r0, [rT1] /// ldr rT2, [r0] /// blx rT2 /// [...address now in r0...] SDValue ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); SDLoc DL(Op); // First step is to get the address of the actua global symbol. This is where // the TLS descriptor lives. SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); // The first entry in the descriptor is a function pointer that we must call // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( MVT::i32, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 4, MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); MachineFunction &F = DAG.getMachineFunction(); MachineFrameInfo &MFI = F.getFrameInfo(); MFI.setAdjustsStack(true); // TLS calls preserve all registers except those that absolutely must be // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be // silly). auto TRI = getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); auto ARI = static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); // Finally, we can make the call. This is just a degenerate version of a // normal AArch64 call node: r0 takes the address of the descriptor, and // returns the address of the variable in this thread. Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); Chain = DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), DAG.getRegisterMask(Mask), Chain.getValue(1)); return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); } SDValue ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); // Load the current TEB (thread environment block) SDValue Ops[] = {Chain, DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(2, DL, MVT::i32)}; SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); SDValue TEB = CurrentTEB.getValue(0); Chain = CurrentTEB.getValue(1); // Load the ThreadLocalStoragePointer from the TEB // A pointer to the TLS array is located at offset 0x2c from the TEB. SDValue TLSArray = DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 // offset into the TLSArray. // Load the TLS index from the C runtime SDValue TLSIndex = DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(2, DL, MVT::i32)); SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), MachinePointerInfo()); // Get the offset of the start of the .tls section (section base) const auto *GA = cast(Op); auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); SDValue Offset = DAG.getLoad( PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, DAG.getTargetConstantPool(CPV, PtrVT, 4)), MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { SDLoc dl(GA); EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), Argument, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); // call __tls_get_addr. ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } // Lower ISD::GlobalTLSAddress using the "initial exec" or // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); if (model == TLSModel::InitialExec) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else { // local exec model assert(model == TLSModel::LocalExec); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); if (Subtarget->isTargetWindows()) return LowerGlobalTLSAddressWindows(Op, DAG); // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: return LowerToTLSGeneralDynamicModel(GA, DAG); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModels(GA, DAG, model); } llvm_unreachable("bogus TLS model"); } /// Return true if all users of V are within function F, looking through /// ConstantExprs. static bool allUsersAreInFunction(const Value *V, const Function *F) { SmallVector Worklist; for (auto *U : V->users()) Worklist.push_back(U); while (!Worklist.empty()) { auto *U = Worklist.pop_back_val(); if (isa(U)) { for (auto *UU : U->users()) Worklist.push_back(UU); continue; } auto *I = dyn_cast(U); if (!I || I->getParent()->getParent() != F) return false; } return true; } /// Return true if all users of V are within some (any) function, looking through /// ConstantExprs. In other words, are there any global constant users? static bool allUsersAreInFunctions(const Value *V) { SmallVector Worklist; for (auto *U : V->users()) Worklist.push_back(U); while (!Worklist.empty()) { auto *U = Worklist.pop_back_val(); if (isa(U)) { for (auto *UU : U->users()) Worklist.push_back(UU); continue; } if (!isa(U)) return false; } return true; } // Return true if T is an integer, float or an array/vector of either. static bool isSimpleType(Type *T) { if (T->isIntegerTy() || T->isFloatingPointTy()) return true; Type *SubT = nullptr; if (T->isArrayTy()) SubT = T->getArrayElementType(); else if (T->isVectorTy()) SubT = T->getVectorElementType(); else return false; return SubT->isIntegerTy() || SubT->isFloatingPointTy(); } static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, SDLoc dl) { // If we're creating a pool entry for a constant global with unnamed address, // and the global is small enough, we can emit it inline into the constant pool // to save ourselves an indirection. // // This is a win if the constant is only used in one function (so it doesn't // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). const Function *F = DAG.getMachineFunction().getFunction(); // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll // inline it elsewhere too (and reuse the constant pool entry). Fast-isel // doesn't know about this optimization, so bail out if it's enabled else // we could decide to inline here (and thus never emit the GV) but require // the GV from fast-isel generated code. if (!EnableConstpoolPromotion || DAG.getMachineFunction().getTarget().Options.EnableFastISel) return SDValue(); auto *GVar = dyn_cast(GV); if (!GVar || !GVar->hasInitializer() || !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || !GVar->hasLocalLinkage()) return SDValue(); // Ensure that we don't try and inline any type that contains pointers. If // we inline a value that contains relocations, we move the relocations from // .data to .text which is not ideal. auto *Init = GVar->getInitializer(); if (!isSimpleType(Init->getType())) return SDValue(); // The constant islands pass can only really deal with alignment requests // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote // any type wanting greater alignment requirements than 4 bytes. We also // can only promote constants that are multiples of 4 bytes in size or // are paddable to a multiple of 4. Currently we only try and pad constants // that are strings for simplicity. auto *CDAInit = dyn_cast(Init); unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); unsigned Align = GVar->getAlignment(); unsigned RequiredPadding = 4 - (Size % 4); bool PaddingPossible = RequiredPadding == 4 || (CDAInit && CDAInit->isString()); if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize) return SDValue(); unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); // We can't bloat the constant pool too much, else the ConstantIslands pass // may fail to converge. If we haven't promoted this global yet (it may have // multiple uses), and promoting it would increase the constant pool size (Sz // > 4), ensure we have space to do so up to MaxTotal. if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= ConstpoolPromotionMaxTotal) return SDValue(); // This is only valid if all users are in a single function OR it has users // in multiple functions but it no larger than a pointer. We also check if // GVar has constant (non-ConstantExpr) users. If so, it essentially has its // address taken. if (!allUsersAreInFunction(GVar, F) && !(Size <= 4 && allUsersAreInFunctions(GVar))) return SDValue(); // We're going to inline this global. Pad it out if needed. if (RequiredPadding != 4) { StringRef S = CDAInit->getAsString(); SmallVector V(S.size()); std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); while (RequiredPadding--) V.push_back(0); Init = ConstantDataArray::get(*DAG.getContext(), V); } auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4); if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { AFI->markGlobalAsPromotedToConstantPool(GVar); AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + PaddedSize - 4); } ++NumConstpoolPromoted; return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); if (const GlobalAlias *GA = dyn_cast(GV)) GV = GA->getBaseObject(); bool IsRO = (isa(GV) && cast(GV)->isConstant()) || isa(GV); // promoteToConstantPool only if not generating XO text section if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl)) return V; if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, /*AddCurrentAddress=*/UseGOT_PREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Result.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); if (UseGOT_PREL) Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } else if (Subtarget->isROPI() && IsRO) { // PC-relative. SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); return Result; } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue G = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G); return Result; } // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into multiple nodes unsigned Wrapper = isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); if (Subtarget->isGVIndirectSymbol(GV)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); assert(Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"); assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"); const GlobalValue *GV = cast(Op)->getGlobal(); const ARMII::TOF TargetFlags = (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; SDLoc DL(Op); ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, TargetFlags)); if (GV->hasDLLImportStorageClass()) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Val = DAG.getConstant(0, dl, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1), Val); } SDValue ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); } SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, Op.getOperand(0)); } SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue CPAddr; bool IsPositionIndependent = isPositionIndependent(); unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); if (IsPositionIndependent) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } return Result; } case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) ? ARMISD::VMULLs : ARMISD::VMULLu; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vminnm: case Intrinsic::arm_neon_vmaxnm: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) ? ISD::FMINNUM : ISD::FMAXNUM; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vminu: case Intrinsic::arm_neon_vmaxu: { if (Op.getValueType().isFloatingPoint()) return SDValue(); unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) ? ISD::UMIN : ISD::UMAX; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vmins: case Intrinsic::arm_neon_vmaxs: { // v{min,max}s is overloaded between signed integers and floats. if (!Op.getValueType().isFloatingPoint()) { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::SMIN : ISD::SMAX; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::FMINNAN : ISD::FMAXNAN; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } } } static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, dl, MVT::i32)); } ConstantSDNode *OrdN = cast(Op.getOperand(1)); AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); ARM_MB::MemBOpt Domain = ARM_MB::ISH; if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; } else if (Subtarget->preferISHSTBarriers() && Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! Domain = ARM_MB::ISHST; } return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), DAG.getConstant(Domain, dl, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. if (!(Subtarget->isThumb2() || (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) // Just preserve the chain. return Op.getOperand(0); SDLoc dl(Op); unsigned isRead = ~cast(Op.getOperand(2))->getZExtValue() & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) // ARMv7 with MP extension has PLDW. return Op.getOperand(0); unsigned isData = cast(Op.getOperand(4))->getZExtValue(); if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; isData = ~isData & 1; } return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), DAG.getConstant(isData, dl, MVT::i32)); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *FuncInfo = MF.getInfo(); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; else RC = &ARM::GPRRegClass; // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; if (NextVA.isMemLoc()) { MachineFrameInfo &MFI = MF.getFrameInfo(); int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad( MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } if (!Subtarget->isLittle()) std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } // The remaining GPRs hold either the beginning of variable-argument // data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. // Return: The frame index registers were stored into. int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, int ArgOffset, unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; // eat all remained registers // (these two actions are performed by HandleByVal method). // Then, here, we initialize stack frame with // "store-reg" instructions. // Case #2. Var-args function, that doesn't contain byval parameters. // The same: eat all remained unallocated registers, // initialize stack frame. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); } else { unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; REnd = ARM::R4; } if (REnd != RBegin) ArgOffset = -4 * (ARM::R4 - RBegin); auto PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); SmallVector MemOps; const TargetRegisterClass *RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { unsigned VReg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(OrigArg, 4 * i)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } // Setup stack frame, the va_list pointer will start from. void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, unsigned ArgOffset, unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); // Try to store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(), CCInfo.getNextStackOffset(), 4); AFI->setVarArgsFrameIndex(FrameIndex); } SDValue ARMTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); SmallVector ArgValues; SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; // Initially ArgRegsSaveSize is zero. // Then we increase this value each time we meet byval parameter. // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); // Calculate the amount of stack space that we need to allocate to store // byval and variadic arguments that are passed in registers. // We need to know this before we allocate the first byval or variadic // argument, as they will be allocated a stack slot below the CFA (Canonical // Frame Address, the stack pointer at entry to the function). unsigned ArgRegBegin = ARM::R4; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) break; CCValAssign &VA = ArgLocs[i]; unsigned Index = VA.getValNo(); ISD::ArgFlagsTy Flags = Ins[Index].Flags; if (!Flags.isByVal()) continue; assert(VA.isMemLoc() && "unexpected byval pointer in reg"); unsigned RBegin, REnd; CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); ArgRegBegin = std::min(ArgRegBegin, RBegin); CCInfo.nextInRegsParam(); } CCInfo.rewindByValRegsInfo(); int lastInsIndex = -1; if (isVarArg && MFI.hasVAStart()) { unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); if (RegIdx != array_lengthof(GPRArgRegs)) ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); } unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); auto PtrVT = getPointerTy(DAG.getDataLayout()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (Ins[VA.getValNo()].isOrigArg()) { std::advance(CurOrigArg, Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); } // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI)); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0, dl)); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue2, DAG.getIntPtrConstant(1, dl)); } else ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { const TargetRegisterClass *RC; if (RegVT == MVT::f32) RC = &ARM::SPRRegClass; else if (RegVT == MVT::f64) RC = &ARM::DPRRegClass; else if (RegVT == MVT::v2f64) RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } // If this is an 8 or 16-bit value, it is really passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); break; case CCValAssign::SExt: ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; case CCValAssign::ZExt: ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; } InVals.push_back(ArgValue); } else { // VA.isRegLoc() // sanity check assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); int index = VA.getValNo(); // Some Ins[] entries become multiple ArgLoc[] entries. // Process them only once. if (index != lastInsIndex) { ISD::ArgFlagsTy Flags = Ins[index].Flags; // FIXME: For now, all byval parameter objects are marked mutable. // This can be changed with more analysis. // In case of tail call optimization mark all arguments mutable. // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { assert(Ins[index].isOrigArg() && "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, VA.getLocMemOffset(), Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI))); } lastInsIndex = index; } } } // varargs if (isVarArg && MFI.hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); return Chain; } /// isFloatingPointZero - Return true if this is +0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isPosZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast(WrapperOp)) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } } else if (Op->getOpcode() == ISD::BITCAST && Op->getValueType(0) == MVT::f64) { // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) // created by LowerConstantFP(). SDValue BitcastOp = Op->getOperand(0); if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && isNullConstant(BitcastOp->getOperand(0))) return true; } return false; } /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C - 1, dl, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: if (C != 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C - 1, dl, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C + 1, dl, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C + 1, dl, MVT::i32); } break; } } } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMISD::NodeType CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; break; case ARMCC::EQ: case ARMCC::NE: // Uses only Z Flag CompareType = ARMISD::CMPZ; break; } ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); else Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } /// duplicateCmp - Glue values can have only one use, so this function /// duplicates a comparison node. SDValue ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { unsigned Opc = Cmp.getOpcode(); SDLoc DL(Cmp); if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } std::pair ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const { assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); SDValue Value, OverflowCmp; SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDLoc dl(Op); // FIXME: We are currently always generating CMPs because we don't support // generating CMN through the backend. This is not as good as the natural // CMP case because it causes a register dependency and cannot be folded // later. switch (Op.getOpcode()) { default: llvm_unreachable("Unknown overflow instruction!"); case ISD::SADDO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::SSUBO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; case ISD::USUBO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; } // switch (...) return std::make_pair(Value, OverflowCmp); } SDValue ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDLoc dl(Op); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); EVT VT = Op.getValueType(); SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, CCR, OverflowCmp); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); unsigned Opc = Cond.getOpcode(); if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO)) { if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) return SDValue(); SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, OverflowCmp, DAG); } // Convert: // // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) // if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { const ConstantSDNode *CMOVTrue = dyn_cast(Cond.getOperand(0)); const ConstantSDNode *CMOVFalse = dyn_cast(Cond.getOperand(1)); if (CMOVTrue && CMOVFalse) { unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); SDValue True; SDValue False; if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { True = SelectTrue; False = SelectFalse; } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { True = SelectFalse; False = SelectTrue; } if (True.getNode() && False.getNode()) { EVT VT = Op.getValueType(); SDValue ARMcc = Cond.getOperand(2); SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the // undefined bits before doing a full-word comparison with zero. Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, DAG.getConstant(1, dl, Cond.getValueType())); return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, dl, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); } static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps) { // Start by selecting the GE condition code for opcodes that return true for // 'equality' if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || CC == ISD::SETULE) CondCode = ARMCC::GE; // and GT for opcodes that return false for 'equality'. else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || CC == ISD::SETULT) CondCode = ARMCC::GT; // Since we are constrained to GE/GT, if the opcode contains 'less', we need // to swap the compare operands. if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || CC == ISD::SETULT) swpCmpOps = true; // Both GT and GE are ordered comparisons, and return false for 'unordered'. // If we have an unordered opcode, we need to swap the operands to the VSEL // instruction (effectively negating the condition). // // This also has the effect of swapping which one of 'less' or 'greater' // returns true, so we also swap the compare operands. It also switches // whether we return true for 'equality', so we compensate by picking the // opposite condition code to our original choice. if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || CC == ISD::SETUGT) { swpCmpOps = !swpCmpOps; swpVselOps = !swpVselOps; CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; } // 'ordered' is 'anything but unordered', so use the VS condition code and // swap the VSEL operands. if (CC == ISD::SETO) { CondCode = ARMCC::VS; swpVselOps = true; } // 'unordered or not equal' is 'anything but equal', so use the EQ condition // code and swap the VSEL operands. if (CC == ISD::SETUNE) { CondCode = ARMCC::EQ; swpVselOps = true; } } SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const { if (Subtarget->isFPOnlySP() && VT == MVT::f64) { FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), FalseVal); TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), TrueVal); SDValue TrueLow = TrueVal.getValue(0); SDValue TrueHigh = TrueVal.getValue(1); SDValue FalseLow = FalseVal.getValue(0); SDValue FalseHigh = FalseVal.getValue(1); SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, ARMcc, CCR, Cmp); SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, ARMcc, CCR, duplicateCmp(Cmp, DAG)); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); } else { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); } } static bool isGTorGE(ISD::CondCode CC) { return CC == ISD::SETGT || CC == ISD::SETGE; } static bool isLTorLE(ISD::CondCode CC) { return CC == ISD::SETLT || CC == ISD::SETLE; } // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. // All of these conditions (and their <= and >= counterparts) will do: // x < k ? k : x // x > k ? x : k // k < x ? x : k // k > x ? k : x static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K) { return (isGTorGE(CC) && ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || (isLTorLE(CC) && ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); } // Similar to isLowerSaturate(), but checks for upper-saturating conditions. static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K) { return (isGTorGE(CC) && ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || (isLTorLE(CC) && ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); } // Check if two chained conditionals could be converted into SSAT. // // SSAT can replace a set of two conditional selectors that bound a number to an // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: // // x < -k ? -k : (x > k ? k : x) // x < -k ? -k : (x < k ? x : k) // x > -k ? (x > k ? k : x) : -k // x < k ? (x < -k ? -k : x) : k // etc. // // It returns true if the conversion can be done, false otherwise. // Additionally, the variable is returned in parameter V and the constant in K. static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K) { SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); SDValue FalseVal1 = Op.getOperand(3); ISD::CondCode CC1 = cast(Op.getOperand(4))->get(); const SDValue Op2 = isa(TrueVal1) ? FalseVal1 : TrueVal1; if (Op2.getOpcode() != ISD::SELECT_CC) return false; SDValue LHS2 = Op2.getOperand(0); SDValue RHS2 = Op2.getOperand(1); SDValue TrueVal2 = Op2.getOperand(2); SDValue FalseVal2 = Op2.getOperand(3); ISD::CondCode CC2 = cast(Op2.getOperand(4))->get(); // Find out which are the constants and which are the variables // in each conditional SDValue *K1 = isa(LHS1) ? &LHS1 : isa(RHS1) ? &RHS1 : NULL; SDValue *K2 = isa(LHS2) ? &LHS2 : isa(RHS2) ? &RHS2 : NULL; SDValue K2Tmp = isa(TrueVal2) ? TrueVal2 : FalseVal2; SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; // We must detect cases where the original operations worked with 16- or // 8-bit values. In such case, V2Tmp != V2 because the comparison operations // must work with sign-extended values but the select operations return // the original non-extended value. SDValue V2TmpReg = V2Tmp; if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) V2TmpReg = V2Tmp->getOperand(0); // Check that the registers and the constants have the correct values // in both conditionals if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || V2TmpReg != V2) return false; // Figure out which conditional is saturating the lower/upper bound. const SDValue *LowerCheckOp = isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 : NULL; const SDValue *UpperCheckOp = isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 : NULL; if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) return false; // Check that the constant in the lower-bound check is // the opposite of the constant in the upper-bound check // in 1's complement. int64_t Val1 = cast(*K1)->getSExtValue(); int64_t Val2 = cast(*K2)->getSExtValue(); int64_t PosVal = std::max(Val1, Val2); if (((Val1 > Val2 && UpperCheckOp == &Op) || (Val1 < Val2 && UpperCheckOp == &Op2)) && Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { V = V2; K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive return true; } return false; } SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // Try to convert two saturating conditional selects into a single SSAT SDValue SatValue; uint64_t SatConstant; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && isSaturatingConditional(Op, SatValue, SatConstant)) return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition // codes: it only has two bits to select the condition code, so it's // constrained to use only GE, GT, VS and EQ. // // To implement all the various ISD::SETXXX opcodes, we sometimes need to // swap the operands of the previous compare instruction (effectively // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || CondCode == ARMCC::VC || CondCode == ARMCC::NE) { CC = ISD::getSetCCInverse(CC, true); std::swap(TrueVal, FalseVal); } } SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { if (swpCmpOps) std::swap(LHS, RHS); if (swpVselOps) std::swap(TrueVal, FalseVal); } } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } /// canChangeToInt - Given the fp compare operand, return true if it is suitable /// to morph to an integer compare sequence. static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget) { SDNode *N = Op.getNode(); if (!N->hasOneUse()) // Otherwise it requires moving the value from fp to integer registers. return false; if (!N->getNumValues()) return false; EVT VT = Op.getValueType(); if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) // f32 case is generally profitable. f64 case only makes sense when vcmpe + // vmrs are very slow, e.g. cortex-a8. return false; if (isFloatingPointZero(Op)) { SeenZero = true; return true; } return ISD::isNormalLoad(N); } static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (isFloatingPointZero(Op)) return DAG.getConstant(0, SDLoc(Op), MVT::i32); if (LoadSDNode *Ld = dyn_cast(Op)) return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); } static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2) { SDLoc dl(Op); if (isFloatingPointZero(Op)) { RetVal1 = DAG.getConstant(0, dl, MVT::i32); RetVal2 = DAG.getConstant(0, dl, MVT::i32); return; } if (LoadSDNode *Ld = dyn_cast(Op)) { SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), NewAlign, Ld->getMemOperand()->getFlags()); return; } llvm_unreachable("Unknown VFP cmp argument!"); } /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some /// f32 and even f64 comparisons to integer ones. SDValue ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); bool LHSSeenZero = false; bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); bool RHSSeenZero = false; bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. if (CC == ISD::SETOEQ) CC = ISD::SETEQ; else if (CC == ISD::SETUNE) CC = ISD::SETNE; SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { LHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(LHS, DAG), Mask); RHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, Cmp); } SDValue LHS1, LHS2; SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); } SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) return Result; } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); SDLoc dl(Op); EVT PTy = getPointerTy(DAG.getDataLayout()); JumpTableSDNode *JT = cast(Table); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table // which does another jump to the destination. This also makes it easier // to translate it to TBB / TBH later (Thumb2 only). // FIXME: This might not work if the function is extremely large. return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } if (isPositionIndependent() || Subtarget->isROPI()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } } static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); if (Op.getValueType().getVectorElementType() == MVT::i32) { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) return Op; return DAG.UnrollVectorOp(Op.getNode()); } assert(Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"); if (VT != MVT::v4i16) return DAG.UnrollVectorOp(Op.getNode()); Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } return Op; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { if (VT.getVectorElementType() == MVT::f32) return Op; return DAG.UnrollVectorOp(Op.getNode()); } assert(Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"); if (VT != MVT::v4f32) return DAG.UnrollVectorOp(Op.getNode()); unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; break; case ISD::UINT_TO_FP: CastOpc = ISD::ZERO_EXTEND; Opc = ISD::UINT_TO_FP; break; } Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); return DAG.getNode(Opc, dl, VT, Op); } SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } return Op; } SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; bool UseNEON = !InGPR && Subtarget->hasNEON(); if (UseNEON) { // Use VBSL to copy the sign bit. unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; if (VT == MVT::f64) Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), DAG.getConstant(32, dl, MVT::i32)); else /*if (VT == MVT::f32)*/ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); if (SrcVT == MVT::f32) { Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); if (VT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), DAG.getConstant(32, dl, MVT::i32)); } else if (VT == MVT::f32) Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), DAG.getConstant(32, dl, MVT::i32)); Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), dl, MVT::i32); AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); if (VT == MVT::f32) { Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, DAG.getConstant(0, dl, MVT::i32)); } else { Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); } return Res; } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32); SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32); Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); if (VT == MVT::f32) { Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { const ARMBaseRegisterInfo &ARI = *static_cast(RegInfo); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { unsigned Reg = StringSwitch(RegName) .Case("sp", ARM::SP) .Default(0); if (Reg) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); } // Result is 64 bit value so split into two 32 bit values and return as a // pair of values. static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { SDLoc DL(N); // This function is only supposed to be called for i64 type destination. assert(N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."); SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL, DAG.getVTList(MVT::i32, MVT::i32, MVT::Other), N->getOperand(0), N->getOperand(1)); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), Read.getValue(1))); Results.push_back(Read.getOperand(0)); } /// \p BC is a bitcast that is about to be turned into a VMOVDRR. /// When \p DstVT, the destination type of \p BC, is on the vector /// register bank and the source of bitcast, \p Op, operates on the same bank, /// it might be possible to combine them, such that everything stays on the /// vector register bank. /// \p return The node that would replace \p BT, if the combine /// is possible. static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG) { SDValue Op = BC->getOperand(0); EVT DstVT = BC->getValueType(0); // The only vector instruction that can produce a scalar (remember, // since the bitcast was about to be turned into VMOVDRR, the source // type is i64) from a vector is EXTRACT_VECTOR_ELT. // Moreover, we can do this combine only if there is one use. // Finally, if the destination type is not a vector, there is not // much point on forcing everything on the vector bank. if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !Op.hasOneUse()) return SDValue(); // If the index is not constant, we will introduce an additional // multiply that will stick. // Give up in that case. ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); if (!Index) return SDValue(); unsigned DstNumElt = DstVT.getVectorNumElements(); // Compute the new index. const APInt &APIntIndex = Index->getAPIntValue(); APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); NewIndex *= APIntIndex; // Check if the new constant index fits into i32. if (NewIndex.getBitWidth() > 32) return SDValue(); // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) SDLoc dl(Op); SDValue ExtractSrc = Op.getOperand(0); EVT VecVT = EVT::getVectorVT( *DAG.getContext(), DstVT.getScalarType(), ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); } /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 /// operand type is illegal (e.g., v2f32 for a target that doesn't support /// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); SDValue Op = N->getOperand(0); // This function is only supposed to be called for i64 types, either as the // source or destination of the bit convert. EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"); // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { // Do not force values to GPRs (this is what VMOVDRR does for the inputs) // if we can combine the bitcast with its source. if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) return Val; SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, DstVT, DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { SDValue Cvt; if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() && SrcVT.getVectorNumElements() > 1) Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); else Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } return SDValue(); } /// getZeroVector - Returns a vector of specified type with all zero elements. /// Zero vectors are used to represent vector negation and in those cases /// will be implemented with the NEON VNEG instruction. However, VNEG does /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32); EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CCR, CmpLo); SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, dl, VT)) : DAG.getConstant(0, dl, VT); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CCR, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CCR, CmpHi); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { // The rounding mode is in bits 23:22 of the FPSCR. // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, DAG.getConstant(22, dl, MVT::i32)); return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, DAG.getConstant(3, dl, MVT::i32)); } static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); EVT VT = N->getValueType(0); if (VT.isVector()) { assert(ST->hasNEON()); // Compute the least significant set bit: LSB = X & -X SDValue X = N->getOperand(0); SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X); SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX); EVT ElemTy = VT.getVectorElementType(); if (ElemTy == MVT::i8) { // Compute with: cttz(x) = ctpop(lsb - 1) SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(1, dl, ElemTy)); SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); return DAG.getNode(ISD::CTPOP, dl, VT, Bits); } if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) && (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) { // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0 unsigned NumBits = ElemTy.getSizeInBits(); SDValue WidthMinus1 = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(NumBits - 1, dl, ElemTy)); SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB); return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ); } // Compute with: cttz(x) = ctpop(lsb - 1) // Since we can only compute the number of bits in a byte with vcnt.8, we // have to gather the result with pairwise addition (vpaddl) for i16, i32, // and i64. // Compute LSB - 1. SDValue Bits; if (ElemTy == MVT::i64) { // Load constant 0xffff'ffff'ffff'ffff to register. SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(0x1eff, dl, MVT::i32)); Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF); } else { SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(1, dl, ElemTy)); Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); } // Count #bits with vcnt.8. EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits); SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8); // Gather the #bits with vpaddl (pairwise add.) EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit, DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), Cnt8); if (ElemTy == MVT::i16) return Cnt16; EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32; SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit, DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), Cnt16); if (ElemTy == MVT::i32) return Cnt32; assert(ElemTy == MVT::i64); SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), Cnt32); return Cnt64; } if (!ST->hasV6T2Ops()) return SDValue(); SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0)); return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count /// for each 16-bit element from operand, repeated. The basic idea is to /// leverage vcnt to get the 8-bit counts, gather and add the results. /// /// Trace for v4i16: /// input = [v0 v1 v2 v3 ] (vi 16-bit element) /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] /// [b0 b1 b2 b3 b4 b5 b6 b7] /// +[b1 b0 b3 b2 b5 b4 b7 b6] /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); } /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the /// bit-count for each 16-bit element from the operand. We need slightly /// different sequencing for v4i16 and v8i16 to stay within NEON's available /// 64/128-bit registers. /// /// Trace for v4i16: /// input = [v0 v1 v2 v3 ] (vi 16-bit element) /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] /// v4i16:Extracted = [k0 k1 k2 k3 ] static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); SDValue BitCounts = getCTPOP16BitCounts(N, DAG); if (VT.is64BitVector()) { SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, DAG.getIntPtrConstant(0, DL)); } else { SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, BitCounts, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); } } /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the /// bit-count for each 32-bit element from the operand. The idea here is /// to split the vector into 16-bit elements, leverage the 16-bit count /// routine, and then combine the results. /// /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): /// input = [v0 v1 ] (vi: 32-bit elements) /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) /// vrev: N0 = [k1 k0 k3 k2 ] /// [k0 k1 k2 k3 ] /// N1 =+[k1 k0 k3 k2 ] /// [k0 k2 k1 k3 ] /// N2 =+[k1 k3 k0 k2 ] /// [k0 k2 k1 k3 ] /// Extended =+[k1 k3 k0 k2 ] /// [k0 k2 ] /// Extracted=+[k1 k3 ] /// static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); if (VT.is64BitVector()) { SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, DAG.getIntPtrConstant(0, DL)); } else { SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); } } static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"); if (VT.getVectorElementType() == MVT::i32) return lowerCTPOP32BitElements(N, DAG); else return lowerCTPOP16BitElements(N, DAG); } static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc dl(N); if (!VT.isVector()) return SDValue(); // Lower vector shifts on NEON to use VSHL. assert(ST->hasNEON() && "unexpected vector shift"); // Left shifts translate directly to the vshiftu intrinsic. if (N->getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl, MVT::i32), N->getOperand(0), N->getOperand(1)); assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); // NEON uses the same intrinsics for both left and right shifts. For // right shifts, the shift amounts are negative, so negate the vector of // shift amounts. EVT ShiftVT = N->getOperand(1).getValueType(); SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1)); Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? Intrinsic::arm_neon_vshifts : Intrinsic::arm_neon_vshiftu); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(vshiftInt, dl, MVT::i32), N->getOperand(0), NegatedCount); } static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc dl(N); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) return SDValue(); assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. if (!isOneConstant(N->getOperand(1))) return SDValue(); // If we are in thumb mode, we don't have RRX. if (ST->isThumb1Only()) return SDValue(); // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(1, dl, MVT::i32)); // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue TmpOp0, TmpOp1; bool Invert = false; bool Swap = false; unsigned Opc = 0; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); if (Op0.getValueType().getVectorElementType() == MVT::i64 && (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { // Special-case integer 64-bit equality comparisons. They aren't legal, // but they can be lowered with a few vector instructions. unsigned CmpElements = CmpVT.getVectorNumElements() * 2; EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements); SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0); SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1); SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1, DAG.getCondCode(ISD::SETEQ)); SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp); SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed); Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged); if (SetCCOpcode == ISD::SETNE) Merged = DAG.getNOT(dl, Merged, CmpVT); Merged = DAG.getSExtOrTrunc(Merged, dl, VT); return Merged; } if (CmpVT.getVectorElementType() == MVT::i64) // 64-bit comparisons are not legal in general. return SDValue(); if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETOLT: case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETOLE: case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; case ISD::SETONE: // Expand this to (OLT | OGT). TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); break; case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; case ISD::SETO: // Expand this to (OLT | OGE). TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); break; } } else { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETLE: Swap = true; case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETULT: Swap = true; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; case ISD::SETULE: Swap = true; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). if (Opc == ARMISD::VCEQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) AndOp = Op1; // Ignore bitconvert. if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); Invert = !Invert; } } } if (Swap) std::swap(Op0, Op1); // If one of the operands is a constant vector zero, attempt to fold the // comparison to a specialized compare-against-zero form. SDValue SingleOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) SingleOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { if (Opc == ARMISD::VCGE) Opc = ARMISD::VCLEZ; else if (Opc == ARMISD::VCGT) Opc = ARMISD::VCLTZ; SingleOp = Op1; } SDValue Result; if (SingleOp.getNode()) { switch (Opc) { case ARMISD::VCEQ: Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGE: Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLEZ: Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGT: Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLTZ: Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; default: Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } } else { Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } Result = DAG.getSExtOrTrunc(Result, dl, VT); if (Invert) Result = DAG.getNOT(dl, Result, VT); return Result; } static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); assert(Carry.getOpcode() != ISD::CARRY_FALSE); SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry); SDValue FVal = DAG.getConstant(0, DL, MVT::i32); SDValue TVal = DAG.getConstant(1, DL, MVT::i32); SDValue ARMcc = DAG.getConstant( IntCCToARMCC(cast(Cond)->get()), DL, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, Cmp.getValue(1), SDValue()); return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, CCR, Chain.getValue(1)); } /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified // immediate instructions others than VMOV do not support the 8-bit encoding // of a zero vector, and the default encoding of zero is supposed to be the // 32-bit version. if (SplatBits == 0) SplatBitSize = 32; switch (SplatBitSize) { case 8: if (type != VMOVModImm) return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); OpCmode = 0xe; Imm = SplatBits; VT = is128Bits ? MVT::v16i8 : MVT::v8i8; break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. VT = is128Bits ? MVT::v8i16 : MVT::v4i16; if ((SplatBits & ~0xff) == 0) { // Value = 0x00nn: Op=x, Cmode=100x. OpCmode = 0x8; Imm = SplatBits; break; } if ((SplatBits & ~0xff00) == 0) { // Value = 0xnn00: Op=x, Cmode=101x. OpCmode = 0xa; Imm = SplatBits >> 8; break; } return SDValue(); case 32: // NEON's 32-bit VMOV supports splat values where: // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. VT = is128Bits ? MVT::v4i32 : MVT::v2i32; if ((SplatBits & ~0xff) == 0) { // Value = 0x000000nn: Op=x, Cmode=000x. OpCmode = 0; Imm = SplatBits; break; } if ((SplatBits & ~0xff00) == 0) { // Value = 0x0000nn00: Op=x, Cmode=001x. OpCmode = 0x2; Imm = SplatBits >> 8; break; } if ((SplatBits & ~0xff0000) == 0) { // Value = 0x00nn0000: Op=x, Cmode=010x. OpCmode = 0x4; Imm = SplatBits >> 16; break; } if ((SplatBits & ~0xff000000) == 0) { // Value = 0xnn000000: Op=x, Cmode=011x. OpCmode = 0x6; Imm = SplatBits >> 24; break; } // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC if (type == OtherModImm) return SDValue(); if ((SplatBits & ~0xffff) == 0 && ((SplatBits | SplatUndef) & 0xff) == 0xff) { // Value = 0x0000nnff: Op=x, Cmode=1100. OpCmode = 0xc; Imm = SplatBits >> 8; break; } if ((SplatBits & ~0xffffff) == 0 && ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { // Value = 0x00nnffff: Op=x, Cmode=1101. OpCmode = 0xd; Imm = SplatBits >> 16; break; } // Note: there are a few 32-bit splat values (specifically: 00ffff00, // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not // VMOV.I32. A (very) minor optimization would be to replicate the value // and fall through here to test for a valid 64-bit splat. But, then the // caller would also need to check and handle the change in size. return SDValue(); case 64: { if (type != VMOVModImm) return SDValue(); // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; uint64_t Val = 0; unsigned ImmMask = 1; Imm = 0; for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { if (((SplatBits | SplatUndef) & BitMask) == BitMask) { Val |= BitMask; Imm |= ImmMask; } else if ((SplatBits & BitMask) != 0) { return SDValue(); } BitMask <<= 8; ImmMask <<= 1; } if (DAG.getDataLayout().isBigEndian()) // swap higher and lower 32 bit word Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } default: llvm_unreachable("unexpected size for isNEONModifiedImm"); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, dl, MVT::i32); } SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); const APFloat &FPVal = CFP->getValueAPF(); // Prevent floating-point constants from using literal loads // when execute-only is enabled. if (ST->genExecuteOnly()) { APInt INTVal = FPVal.bitcastToAPInt(); SDLoc DL(CFP); if (IsDouble) { SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32); SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32); if (!ST->isLittle()) std::swap(Lo, Hi); return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi); } else { return DAG.getConstant(INTVal, DL, MVT::i32); } } if (!ST->hasVFP3()) return SDValue(); // Use the default (constant pool) lowering for double constants when we have // an SP-only FPU if (IsDouble && Subtarget->isFPOnlySP()) return SDValue(); // Try splatting with a VMOV.f32... int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { // We have code in place to select a valid ConstantFP already, no need to // do any mangling. return Op; } // It's a float and we are trying to use NEON operations where // possible. Lower it to a splat followed by an extract. SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, DAG.getConstant(0, DL, MVT::i32)); } // The rest of our options are NEON only, make sure that's allowed before // proceeding.. if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) return SDValue(); EVT VMovVT; uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); // It wouldn't really be worth bothering for doubles except for one very // important value, which does happen to match: 0.0. So make sure we don't do // anything stupid. if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) return SDValue(); // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); if (IsDouble) return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, DAG.getConstant(0, DL, MVT::i32)); } // Finally, try a VMVN.i32 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); if (IsDouble) return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, DAG.getConstant(0, DL, MVT::i32)); } return SDValue(); } // check if an VEXT instruction can handle the shuffle mask when the // vector sources of the shuffle are the same. static bool isSingletonVEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, just follow it // back to index zero and keep going. ++ExpectedElt; if (ExpectedElt == NumElts) ExpectedElt = 0; if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } return true; } static bool isVEXTMask(ArrayRef M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, it may still be // a VEXT but the source vectors must be swapped. ExpectedElt += 1; if (ExpectedElt == NumElts * 2) { ExpectedElt = 0; ReverseVEXT = true; } if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } // Adjust the index value if the source operands will be swapped. if (ReverseVEXT) Imm -= NumElts; return true; } /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) static bool isVREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; // If the first shuffle index is UNDEF, be optimistic. if (M[0] < 0) BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } return true; } static bool isVTBLMask(ArrayRef M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } // Checks whether the shuffle mask represents a vector transpose (VTRN) by // checking that pairs of elements in the shuffle mask represent the same index // in each vector, incrementing the expected index by 2 at each step. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g} // v2={e,f,g,h} // WhichResult gives the offset for each element in the mask based on which // of the two results it belongs to. // // The transpose can be represented either as: // result1 = shufflevector v1, v2, result1_shuffle_mask // result2 = shufflevector v1, v2, result2_shuffle_mask // where v1/v2 and the shuffle masks have the same number of elements // (here WhichResult (see below) indicates which result is being checked) // // or as: // results = shufflevector v1, v2, shuffle_mask // where both results are returned in one vector and the shuffle mask has twice // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we // want to check the low half and high half of the shuffle mask as if it were // the other case static bool isVTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; // If the mask is twice as long as the input vector then we need to check the // upper and lower parts of the mask with a matching value for WhichResult // FIXME: A mask with only even values will be rejected in case the first // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { if (M.size() == NumElts * 2) WhichResult = i / NumElts; else WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) return false; } } if (M.size() == NumElts*2) WhichResult = 0; return true; } /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. static bool isVTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { if (M.size() == NumElts * 2) WhichResult = i / NumElts; else WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) return false; } } if (M.size() == NumElts*2) WhichResult = 0; return true; } // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking // that the mask elements are either all even and in steps of size 2 or all odd // and in steps of size 2. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g} // v2={e,f,g,h} // Requires similar checks to that of isVTRNMask with // respect the how results are returned. static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; } } if (M.size() == NumElts*2) WhichResult = 0; // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { int MIdx = M[i + j + k]; if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } } } if (M.size() == NumElts*2) WhichResult = 0; // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } // Checks whether the shuffle mask represents a vector zip (VZIP) by checking // that pairs of elements of the shufflemask represent the same index in each // vector incrementing sequentially through the vectors. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f} // v2={e,f,g,h} // Requires similar checks to that of isVTRNMask with respect the how results // are returned. static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts)) return false; Idx += 1; } } if (M.size() == NumElts*2) WhichResult = 0; // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx)) return false; Idx += 1; } } if (M.size() == NumElts*2) WhichResult = 0; // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't. static unsigned isNEONTwoResultShuffleMask(ArrayRef ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF) { isV_UNDEF = false; if (isVTRNMask(ShuffleMask, VT, WhichResult)) return ARMISD::VTRN; if (isVUZPMask(ShuffleMask, VT, WhichResult)) return ARMISD::VUZP; if (isVZIPMask(ShuffleMask, VT, WhichResult)) return ARMISD::VZIP; isV_UNDEF = true; if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VTRN; if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VUZP; if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VZIP; return 0; } /// \return true if this is a reverse operation on an vector. static bool isReverseMask(ArrayRef M, EVT VT) { unsigned NumElts = VT.getVectorNumElements(); // Make sure the mask has the right size. if (NumElts != M.size()) return false; // Look for <15, ..., 3, -1, 1, 0>. for (unsigned i = 0; i != NumElts; ++i) if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) return false; return true; } // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl) { uint64_t Val; if (!isa(N)) return SDValue(); Val = cast(N)->getZExtValue(); if (ST->isThumb1Only()) { if (Val <= 255 || ~Val <= 255) return DAG.getConstant(Val, dl, MVT::i32); } else { if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) return DAG.getConstant(Val, dl, MVT::i32); } return SDValue(); } // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast(Op.getNode()); SDLoc dl(Op); EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatUndef.isAllOnesValue()) return DAG.getUNDEF(VT); if (SplatBitSize <= 64) { // Check if an immediate VMOV works. EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { int ImmVal = ARM_AM::getFP32Imm(SplatBits); if (ImmVal != -1) { SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32); return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); } } } } // Scan through the operands to see if only one value is used. // // As an optimisation, even if more than one value is used it may be more // profitable to splat with one value then change some lanes. // // Heuristically we decide to do this if the vector has a "dominant" value, // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; bool hasDominantValue = false; bool isConstant = true; // Map of the number of times a particular SDValue appears in the // element list. DenseMap ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; if (!isa(V) && !isa(V)) isConstant = false; ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; // Is this value dominant? (takes up more than half of the lanes) if (++Count > (NumElts / 2)) { hasDominantValue = true; Value = V; } } if (ValueCounts.size() != 1) usesOnlyOneValue = false; if (!Value.getNode() && ValueCounts.size() > 0) Value = ValueCounts.begin()->first; if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. // Keep going if we are hitting this case. if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); unsigned EltSize = VT.getScalarSizeInBits(); // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. if (hasDominantValue && EltSize <= 32) { if (!isConstant) { SDValue N; // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could // just use VDUPLANE. We can only do this if the lane being extracted // is at a constant index, as the VDUP from lane instructions only have // constant-index forms. ConstantSDNode *constIndex; if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && (constIndex = dyn_cast(Value->getOperand(1)))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element // such that the register coalescer will remove unnecessary copies. if (VT != Value->getOperand(0).getValueType()) { unsigned index = constIndex->getAPIntValue().getLimitedValue() % VT.getVectorNumElements(); N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), Value, DAG.getConstant(index, dl, MVT::i32)), DAG.getConstant(index, dl, MVT::i32)); } else N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); if (!usesOnlyOneValue) { // The dominant value was splatted as 'N', but we now have to insert // all differing elements. for (unsigned I = 0; I < NumElts; ++I) { if (Op.getOperand(I) == Value) continue; SmallVector Ops; Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, dl, MVT::i32)); N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } if (usesOnlyOneValue) { SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); if (isConstant && Val.getNode()) return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } } // If all elements are constants and the case above didn't get hit, fall back // to the default expansion, which will generate a load from the constant // pool. if (isConstant) return SDValue(); // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { SDValue shuffle = ReconstructShuffle(Op, DAG); if (shuffle != SDValue()) return shuffle; } if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) { // If we haven't found an efficient lowering, try splitting a 128-bit vector // into two 64-bit vectors; we might discover a better way to lower it. SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElts); EVT ExtVT = VT.getVectorElementType(); EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2); SDValue Lower = DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2)); if (Lower.getOpcode() == ISD::BUILD_VECTOR) Lower = LowerBUILD_VECTOR(Lower, DAG, ST); SDValue Upper = DAG.getBuildVector( HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2)); if (Upper.getOpcode() == ISD::BUILD_VECTOR) Upper = LowerBUILD_VECTOR(Upper, DAG, ST); if (Lower && Upper) return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper); } // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands // will be legalized. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // know the default expansion would otherwise fall back on something even // worse. For a vector with one or two non-undef values, that's // scalar_to_vector for the elements followed by a shuffle (provided the // shuffle is valid for the target) and materialization element by element // on the stack followed by a load for everything else. if (!isConstant && !usesOnlyOneValue) { SDValue Vec = DAG.getUNDEF(VT); for (unsigned i = 0 ; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); } return Vec; } return SDValue(); } // Gather data to see if the operation can be modelled as a // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); struct ShuffleSourceInfo { SDValue Vec; unsigned MinElt; unsigned MaxElt; // We may insert some combination of BITCASTs and VEXT nodes to force Vec to // be compatible with the shuffle we intend to construct. As a result // ShuffleVec will be some sliding window into the original Vec. SDValue ShuffleVec; // Code should guarantee that element i in Vec starts at element "WindowBase // + i * WindowScale in ShuffleVec". int WindowBase; int WindowScale; bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } ShuffleSourceInfo(SDValue Vec) : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} }; // First gather all vectors used as an immediate source for this BUILD_VECTOR // node. SmallVector Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); } else if (!isa(V.getOperand(1))) { // Furthermore, shuffles require a constant mask, whereas extractelts // accept variable indices. return SDValue(); } // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); auto Source = find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } // Currently only do something sane when at most two source vectors // are involved. if (Sources.size() > 2) return SDValue(); // Find out the smallest element size among result and two sources, and use // it as element size to build the shuffle_vector. EVT SmallestEltTy = VT.getVectorElementType(); for (auto &Source : Sources) { EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); if (SrcEltTy.bitsLT(SmallestEltTy)) SmallestEltTy = SrcEltTy; } unsigned ResMultiplier = VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits(); NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); // If the source vector is too wide or too narrow, we may nevertheless be able // to construct a compatible shuffle either by concatenating it with UNDEF or // extracting a suitable range of elements. for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); if (SrcVT.getSizeInBits() == VT.getSizeInBits()) continue; // This stage of the search produces a source with the same element type as // the original, but with a total width matching the BUILD_VECTOR output. EVT EltVT = SrcVT.getVectorElementType(); unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); if (SrcVT.getSizeInBits() < VT.getSizeInBits()) { if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits()) return SDValue(); // We can pad out the smaller vector for free, so if it's part of a // shuffle... Src.ShuffleVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, DAG.getUNDEF(Src.ShuffleVec.getValueType())); continue; } if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits()) return SDValue(); if (Src.MaxElt - Src.MinElt >= NumSrcElts) { // Span too large for a VEXT to cope return SDValue(); } if (Src.MinElt >= NumSrcElts) { // The extraction can just take the second half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); Src.WindowBase = -NumSrcElts; } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i32)); } else { // An actual VEXT is needed SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i32)); SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Src.MinElt, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } // Another possible incompatibility occurs from the vector element types. We // can fix this by bitcasting the source vectors to the same type we intend // for the shuffle. for (auto &Src : Sources) { EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); if (SrcEltTy == SmallestEltTy) continue; assert(ShuffleVT.getVectorElementType() == SmallestEltTy); Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits(); Src.WindowBase *= Src.WindowScale; } // Final sanity check before we try to actually produce a shuffle. DEBUG( for (auto Src : Sources) assert(Src.ShuffleVec.getValueType() == ShuffleVT); ); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); if (Entry.isUndef()) continue; auto Src = find(Sources, Entry.getOperand(0)); int EltNo = cast(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit // trunc. So only std::min(SrcBits, DestBits) actually get defined in this // segment. EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); int BitsDefined = std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits()); int LanesDefined = BitsDefined / BitsPerShuffleLane; // This source is expected to fill ResMultiplier lanes of the final shuffle, // starting at the appropriate offset. int *LaneMask = &Mask[i * ResMultiplier]; int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; ExtractBase += NumElts * (Src - Sources.begin()); for (int j = 0; j < LanesDefined; ++j) LaneMask[j] = ExtractBase + j; } // Final check before we try to produce nonsense... if (!isShuffleMaskLegal(Mask, ShuffleVT)) return SDValue(); // We can't handle more than two sources. This should have already // been checked before this point. assert(Sources.size() <= 2 && "Too many sources!"); SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (M[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = M[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return true; } bool ReverseVEXT, isV_UNDEF; unsigned Imm, WhichResult; unsigned EltSize = VT.getScalarSizeInBits(); return (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || isVEXTMask(M, VT, ReverseVEXT, Imm) || isVTBLMask(M, VT) || isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) || ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VREV, OP_VDUP0, OP_VDUP1, OP_VDUP2, OP_VDUP3, OP_VEXT1, OP_VEXT2, OP_VEXT3, OP_VUZPL, // VUZP, left result OP_VUZPR, // VUZP, right result OP_VZIPL, // VZIP, left result OP_VZIPR, // VZIP, right result OP_VTRNL, // VTRN, left result OP_VTRNR // VTRN, right result }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); EVT VT = OpLHS.getValueType(); switch (OpNum) { default: llvm_unreachable("Unknown shuffle opcode!"); case OP_VREV: // VREV divides the vector in half and swaps within the half. if (VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::f32) return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); // vrev <4 x i16> -> VREV32 if (VT.getVectorElementType() == MVT::i16) return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); // vrev <4 x i8> -> VREV16 assert(VT.getVectorElementType() == MVT::i8); return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); case OP_VDUP0: case OP_VDUP1: case OP_VDUP2: case OP_VDUP3: return DAG.getNode(ARMISD::VDUPLANE, dl, VT, OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32)); case OP_VEXT1: case OP_VEXT2: case OP_VEXT3: return DAG.getNode(ARMISD::VEXT, dl, VT, OpLHS, OpRHS, DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32)); case OP_VUZPL: case OP_VUZPR: return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); case OP_VZIPL: case OP_VZIPR: return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); case OP_VTRNL: case OP_VTRNR: return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); } } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc DL(Op); SmallVector VTBLMask; for (ArrayRef::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); SDValue OpLHS = Op.getOperand(0); EVT VT = OpLHS.getValueType(); assert((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"); OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, // extract the first 8 bytes into the top double word and the last 8 bytes // into the bottom double word. The v8i16 case is similar. unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, DAG.getConstant(ExtractNum, DL, MVT::i32)); } static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again // during code selection. This is more efficient and avoids the possibility // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. ArrayRef ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize <= 32) { if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR // (and probably will turn into a SCALAR_TO_VECTOR once legalization // reaches it). if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && !isa(V1.getOperand(0))) { bool IsScalarToVector = true; for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } if (IsScalarToVector) return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i32)); } bool ReverseVEXT; unsigned Imm; if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { if (ReverseVEXT) std::swap(V1, V2); return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, DAG.getConstant(Imm, dl, MVT::i32)); } if (isVREVMask(ShuffleMask, VT, 64)) return DAG.getNode(ARMISD::VREV64, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 32)) return DAG.getNode(ARMISD::VREV32, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) { return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, DAG.getConstant(Imm, dl, MVT::i32)); } // Check for Neon shuffles that modify both input vectors in place. // If both results are used, i.e., if there are two shuffles with the same // source operands and with masks corresponding to both results of one of // these operations, DAG memoization will ensure that a single node is // used for both shuffles. unsigned WhichResult; bool isV_UNDEF; if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( ShuffleMask, VT, WhichResult, isV_UNDEF)) { if (isV_UNDEF) V2 = V1; return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2) .getValue(WhichResult); } // Also check for these shuffles through CONCAT_VECTORS: we canonicalize // shuffles that produce a result larger than their operands with: // shuffle(concat(v1, undef), concat(v2, undef)) // -> // shuffle(concat(v1, v2), undef) // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine). // // This is useful in the general case, but there are special cases where // native shuffles produce larger results: the two-result ops. // // Look through the concat when lowering them: // shuffle(concat(v1, v2), undef) // -> // concat(VZIP(v1, v2):0, :1) // if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) { SDValue SubV1 = V1->getOperand(0); SDValue SubV2 = V1->getOperand(1); EVT SubVT = SubV1.getValueType(); // We expect these to have been canonicalized to -1. assert(all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"); if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( ShuffleMask, SubVT, WhichResult, isV_UNDEF)) { if (isV_UNDEF) SubV2 = SubV1; assert((WhichResult == 0) && "In-place shuffle of concat can only have one result!"); SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT), SubV1, SubV2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0), Res.getValue(1)); } } } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. unsigned NumElts = VT.getVectorNumElements(); if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = ShuffleMask[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) Ops.push_back(DAG.getUNDEF(EltVT)); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ShuffleMask[i] < (int)NumElts ? V1 : V2, DAG.getConstant(ShuffleMask[i] & (NumElts-1), dl, MVT::i32))); } SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); if (VT == MVT::v8i8) if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG)) return NewOp; return SDValue(); } static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // INSERT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(2); if (!isa(Lane)) return SDValue(); return Op; } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); if (!isa(Lane)) return SDValue(); SDValue Vec = Op.getOperand(0); if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) { SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } return Op; } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"); SDLoc dl(Op); SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (!Op0.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0, dl)); if (!Op1.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1, dl)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); } /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each /// element has been zero/sign-extended, depending on the isSigned parameter, /// from an integer type half its size. static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned) { // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. EVT VT = N->getValueType(0); if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { SDNode *BVN = N->getOperand(0).getNode(); if (BVN->getValueType(0) != MVT::v4i32 || BVN->getOpcode() != ISD::BUILD_VECTOR) return false; unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; unsigned HiElt = 1 - LoElt; ConstantSDNode *Lo0 = dyn_cast(BVN->getOperand(LoElt)); ConstantSDNode *Hi0 = dyn_cast(BVN->getOperand(HiElt)); ConstantSDNode *Lo1 = dyn_cast(BVN->getOperand(LoElt+2)); ConstantSDNode *Hi1 = dyn_cast(BVN->getOperand(HiElt+2)); if (!Lo0 || !Hi0 || !Lo1 || !Hi1) return false; if (isSigned) { if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) return true; } else { if (Hi0->isNullValue() && Hi1->isNullValue()) return true; } return false; } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ConstantSDNode *C = dyn_cast(Elt)) { unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfSize = EltSize / 2; if (isSigned) { if (!isIntN(HalfSize, C->getSExtValue())) return false; } else { if (!isUIntN(HalfSize, C->getZExtValue())) return false; } continue; } return false; } return true; } /// isSignExtended - Check if a node is a vector value that is sign-extended /// or a constant BUILD_VECTOR with sign-extended elements. static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) return true; if (isExtendedBUILD_VECTOR(N, DAG, true)) return true; return false; } /// isZeroExtended - Check if a node is a vector value that is zero-extended /// or a constant BUILD_VECTOR with zero-extended elements. static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) return true; if (isExtendedBUILD_VECTOR(N, DAG, false)) return true; return false; } static EVT getExtensionTo64Bits(const EVT &OrigVT) { if (OrigVT.getSizeInBits() >= 64) return OrigVT; assert(OrigVT.isSimple() && "Expecting a simple value type"); MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; switch (OrigSimpleTy) { default: llvm_unreachable("Unexpected Vector Type"); case MVT::v2i8: case MVT::v2i16: return MVT::v2i32; case MVT::v4i8: return MVT::v4i16; } } /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode) { // The vector originally had a size of OrigTy. It was then extended to ExtTy. // We expect the ExtTy to be 128-bits total. If the OrigTy is less than // 64-bits we need to insert a new extension so that it will be 64-bits. assert(ExtTy.is128BitVector() && "Unexpected extension size"); if (OrigTy.getSizeInBits() >= 64) return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. EVT NewVT = getExtensionTo64Bits(OrigTy); return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } /// SkipLoadExtensionForVMULL - return a load of the original vector size that /// does not do any sign/zero extension. If the original vector is less /// than 64 bits, an appropriate extension will be added after the load to /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getAlignment(), LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getMemoryVT(), LD->getAlignment(), LD->getMemOperand()->getFlags()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, /// extending load, or BUILD_VECTOR with extended elements, return the /// unextended value. The unextended vector should be 64 bits so that it can /// be used as an operand to a VMULL instruction. If the original vector size /// before extension is less than 64 bits we add a an extension to resize /// the vector to 64 bits. static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, N->getOperand(0)->getValueType(0), N->getValueType(0), N->getOpcode()); if (LoadSDNode *LD = dyn_cast(N)) return SkipLoadExtensionForVMULL(LD, DAG); // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { SDNode *BVN = N->getOperand(0).getNode(); assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; return DAG.getBuildVector( MVT::v2i32, SDLoc(N), {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)}); } // Construct a new BUILD_VECTOR with elements truncated to half the size. assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); unsigned EltSize = VT.getScalarSizeInBits() / 2; unsigned NumElts = VT.getVectorNumElements(); MVT TruncVT = MVT::getIntegerVT(EltSize); SmallVector Ops; SDLoc dl(N); for (unsigned i = 0; i != NumElts; ++i) { ConstantSDNode *C = cast(N->getOperand(i)); const APInt &CInt = C->getAPIntValue(); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) && isSignExtended(N1, DAG); } return false; } static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); } return false; } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. EVT VT = Op.getValueType(); assert(VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"); SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; bool isMLA = false; bool isN0SExt = isSignExtended(N0, DAG); bool isN1SExt = isSignExtended(N1, DAG); if (isN0SExt && isN1SExt) NewOpc = ARMISD::VMULLs; else { bool isN0ZExt = isZeroExtended(N0, DAG); bool isN1ZExt = isZeroExtended(N1, DAG); if (isN0ZExt && isN1ZExt) NewOpc = ARMISD::VMULLu; else if (isN1SExt || isN1ZExt) { // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these // into (s/zext A * s/zext C) + (s/zext B * s/zext C) if (isN1SExt && isAddSubSExt(N0, DAG)) { NewOpc = ARMISD::VMULLs; isMLA = true; } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { NewOpc = ARMISD::VMULLu; isMLA = true; } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { std::swap(N0, N1); NewOpc = ARMISD::VMULLu; isMLA = true; } } if (!NewOpc) { if (VT == MVT::v2i64) // Fall through to expand this. It is not legal. return SDValue(); else // Other vector multiplications are legal. return Op; } } // Legalize to a VMULL instruction. SDLoc DL(Op); SDValue Op0; SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { Op0 = SkipExtensionForVMULL(N0, DAG); assert(Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"); return DAG.getNode(NewOpc, DL, VT, Op0, Op1); } // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during // isel lowering to take advantage of no-stall back to back vmul + vmla. // vmull q0, d4, d6 // vmlal q0, d5, d6 // is faster than // vaddl q0, d4, d5 // vmovl q1, d6 // vmul q0, q0, q1 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); return DAG.getNode(N0->getOpcode(), DL, VT, DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); // Get reciprocal estimate. // float4 recip = vrecpeq_f32(yf); Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), Y); // Because char has a smaller range than uchar, we can actually get away // without any newton steps. This requires that we use a weird bias // of 0xb000, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0xb000); X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); Y = DAG.getConstant(0xb000, dl, MVT::v4i32); X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); // Convert back to short. X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); return X; } static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); // float4 xf = vcvt_f32_s32(vmovl_s16(x)); N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); // Use reciprocal estimate and one refinement step. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), N1); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); // Because short has a smaller range than ushort, we can actually get away // with only a single newton step. This requires that we use a weird bias // of 89, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0x89); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(0x89, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. // return vmovn_s32(vcvt_s32_f32(result)); N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); return N0; } static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4, dl)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(4, dl)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0, dl)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0, dl)); N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; } return LowerSDIV_v4i16(N0, N1, dl, DAG); } static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4, dl)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(4, dl)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0, dl)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0, dl)); N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl, MVT::i32), N0); return N0; } // v4i16 sdiv ... Convert to float. // float4 yf = vcvt_f32_s32(vmovl_u16(y)); // float4 xf = vcvt_f32_s32(vmovl_u16(x)); N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); // Use reciprocal estimate and two refinement steps. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); // recip *= vrecpsq_f32(yf, recip); N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), BN1); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); // Simply multiplying by the reciprocal estimate can leave us a few ulps // too low, so we add 2 ulps (exhaustive testing shows that this is enough, // and that it will never cause us to return an answer too large). // float4 result = as_float4(as_int4(xf*recip) + 2); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(2, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. // return vmovn_u32(vcvt_s32_f32(result)); N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); return N0; } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getNode()->getValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = ARMISD::ADDC; break; case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; case ISD::SUBC: Opc = ARMISD::SUBC; break; case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; } if (!ExtraOp) return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); // For iOS, we want to call an alternative entry point: __sincos_stret, // return values are passed via sret. SDLoc dl(Op); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); auto &DL = DAG.getDataLayout(); ArgListTy Args; bool ShouldUseSRet = Subtarget->isAPCS_ABI(); SDValue SRet; if (ShouldUseSRet) { // Create stack object for sret. const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); ArgListEntry Entry; Entry.Node = SRet; Entry.Ty = RetTy->getPointerTo(); Entry.isSExt = false; Entry.isZExt = false; Entry.isSRet = true; Args.push_back(Entry); RetTy = Type::getVoidTy(*DAG.getContext()); } ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; RTLIB::Libcall LC = (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setCallee(CC, RetTy, Callee, std::move(Args)) .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); if (!ShouldUseSRet) return CallResult.first; SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); // Address of cos field. SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0), LoadCos.getValue(0)); } SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed, SDValue &Chain) const { EVT VT = Op.getValueType(); assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"); SDLoc dl(Op); const auto &DL = DAG.getDataLayout(); const auto &TLI = DAG.getTargetLoweringInfo(); const char *Name = nullptr; if (Signed) Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64"; else Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64"; SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL)); ARMTargetLowering::ArgListTy Args; for (auto AI : {1, 0}) { ArgListEntry Arg; Arg.Node = Op.getOperand(AI); Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext()); Args.push_back(Arg); } CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), ES, std::move(Args)); return LowerCallTo(CLI).first; } SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const { assert(Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"); SDLoc dl(Op); SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Op.getOperand(1)); return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); } static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) { SDLoc DL(N); SDValue Op = N->getOperand(1); if (N->getValueType(0) == MVT::i32) return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, DAG.getConstant(1, DL, MVT::i32)); return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi)); } void ARMTargetLowering::ExpandDIV_Windows( SDValue Op, SelectionDAG &DAG, bool Signed, SmallVectorImpl &Results) const { const auto &DL = DAG.getDataLayout(); const auto &TLI = DAG.getTargetLoweringInfo(); assert(Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"); SDLoc dl(Op); SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode()); SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result); SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result, DAG.getConstant(32, dl, TLI.getPointerTy(DL))); Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper); Results.push_back(Lower); Results.push_back(Upper); } static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or // equivalent available. return SDValue(); // Monotonic load/store is legal for all targets. return Op; } static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc DL(N); // Under Power Management extensions, the cycle-count is: // mrc p15, #0, , c9, c13, #0 SDValue Ops[] = { N->getOperand(0), // Chain DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(9, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32) }; SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32, DAG.getConstant(0, DL, MVT::i32))); Results.push_back(Cycles32.getValue(1)); } static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDLoc dl(V.getNode()); SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32); SDValue VHi = DAG.getAnyExtOrTrunc( DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), dl, MVT::i32); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); } static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl & Results, SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"); SDValue Ops[] = {N->getOperand(1), createGPRPairNode(DAG, N->getOperand(2)), createGPRPairNode(DAG, N->getOperand(3)), N->getOperand(0)}; SDNode *CmpSwap = DAG.getMachineNode( ARM::CMP_SWAP_64, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); MachineFunction &MF = DAG.getMachineFunction(); MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1); Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(SDValue(CmpSwap, 2)); } static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SelectionDAG &DAG) { const auto &TLI = DAG.getTargetLoweringInfo(); assert(Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"); SDLoc dl(Op); SDValue Val = Op.getOperand(0); MVT Ty = Val->getSimpleValueType(0); SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1)); SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow", TLI.getPointerTy(DAG.getDataLayout())); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Val; Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isZExt = true; Args.push_back(Entry); Entry.Node = Exponent; Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isZExt = true; Args.push_back(Entry); Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext()); // In the in-chain to the call is the entry node If we are emitting a // tailcall, the chain will be mutated if the node has a non-entry input // chain. SDValue InChain = DAG.getEntryNode(); SDValue TCChain = InChain; const auto *F = DAG.getMachineFunction().getFunction(); bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && F->getReturnType() == LCRTy; if (IsTC) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(InChain) .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args)) .setTailCall(IsTC); std::pair CI = TLI.LowerCallTo(CLI); // Return the chain (the DAG root) if it is a tail call return !CI.second.getNode() ? DAG.getRoot() : CI.first; } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::ConstantPool: if (Subtarget->genExecuteOnly()) llvm_unreachable("execute-only should not generate constant pools"); return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: switch (Subtarget->getTargetTriple().getObjectFormat()) { default: llvm_unreachable("unknown object format"); case Triple::COFF: return LowerGlobalAddressWindows(Op, DAG); case Triple::ELF: return LowerGlobalAddressELF(Op, DAG); case Triple::MachO: return LowerGlobalAddressDarwin(Op, DAG); } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); case ISD::SREM: return LowerREM(Op.getNode(), DAG); case ISD::UREM: return LowerREM(Op.getNode(), DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerSDIV(Op, DAG); case ISD::UDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerUDIV(Op, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: case ISD::USUBO: return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); case ISD::DYNAMIC_STACKALLOC: if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res; switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); case ISD::READ_REGISTER: ExpandREAD_REGISTER(N, Results, DAG); break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; case ISD::SREM: case ISD::UREM: Res = LowerREM(N, DAG); break; case ISD::SDIVREM: case ISD::UDIVREM: Res = LowerDivRem(SDValue(N, 0), DAG); assert(Res.getNumOperands() == 2 && "DivRem needs two values"); Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; case ISD::UDIV: case ISD::SDIV: assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, Results); case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; } if (Res.getNode()) Results.push_back(Res); } //===----------------------------------------------------------------------===// // ARM Scheduler Hooks //===----------------------------------------------------------------------===// /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); ARMFunctionInfo *AFI = MF->getInfo(); const Function *F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); unsigned PCLabelId = AFI->createPICLabelUId(); unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, 4, 4); MachineMemOperand *FIMMOSt = MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 4); // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf // ldr.n r5, LCPI1_1 // orr r5, r5, #1 // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO)); // Set the low bit because of thumb mode. unsigned NewVReg2 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(0x01))); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) .addReg(NewVReg3, RegState::Kill) .addFrameIndex(FI) .addImm(36) // &jbuf[1] :: pc .addMemOperand(FIMMOSt)); } else if (isThumb) { // Incoming value: jbuf // ldr.n r1, LCPI1_4 // add r1, pc // mov r2, #1 // orrs r1, r2 // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) .addReg(ARM::CPSR, RegState::Define) .addImm(1)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) .addImm(0) .addMemOperand(FIMMOSt)); } else { // Incoming value: jbuf // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) .addConstantPoolIndex(CPI) .addImm(0) .addMemOperand(CPMMO)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId)); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) .addReg(NewVReg2, RegState::Kill) .addFrameIndex(FI) .addImm(36) // &jbuf[1] :: pc .addMemOperand(FIMMOSt)); } } void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineFrameInfo &MFI = MF->getFrameInfo(); int FI = MFI.getFunctionContextIndex(); const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass : &ARM::GPRnopcRegClass; // Get a mapping of the call site numbers to all of the landing pads they're // associated with. DenseMap > CallSiteNumToLPad; unsigned MaxCSNum = 0; for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { if (!BB->isEHPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing // pad. for (MachineBasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { if (!II->isEHLabel()) continue; MCSymbol *Sym = II->getOperand(0).getMCSymbol(); if (!MF->hasCallSiteLandingPad(Sym)) continue; SmallVectorImpl &CallSiteIdxs = MF->getCallSiteLandingPad(Sym); for (SmallVectorImpl::iterator CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); CSI != CSE; ++CSI) { CallSiteNumToLPad[*CSI].push_back(&*BB); MaxCSNum = std::max(MaxCSNum, *CSI); } break; } } // Get an ordered list of the machine basic blocks for the jump table. std::vector LPadList; SmallPtrSet InvokeBBs; LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned I = 1; I <= MaxCSNum; ++I) { SmallVectorImpl &MBBList = CallSiteNumToLPad[I]; for (SmallVectorImpl::iterator II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { LPadList.push_back(*II); InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); } } assert(!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"); // Create the jump table and associated information. MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); // Create the MBBs for the dispatch code. // Shove the dispatch's address into the return slot in the function context. MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); DispatchBB->setIsEHPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); unsigned trap_opcode; if (Subtarget->isThumb()) trap_opcode = ARM::tTRAP; else trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); DispatchBB->addSuccessor(DispContBB); // Insert and MBBs. MF->insert(MF->end(), DispatchBB); MF->insert(MF->end(), DispContBB); MF->insert(MF->end(), TrapBB); // Insert code into the entry block that creates and registers the function // context. SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); MachineMemOperand *FIMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); MachineInstrBuilder MIB; MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); const ARMBaseInstrInfo *AII = static_cast(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); // Add a register mask with no preserved registers. This results in all // registers being marked as clobbered. This can't work if the dispatch block // is in a Thumb1 function and is linked with ARM code which uses the FP // registers, as there is no way to preserve the FP registers in Thumb1 mode. MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF)); bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd)); if (NumLPads < 256) { AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) .addReg(NewVReg1) .addImm(LPadList.size())); } else { unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) .addImm(NumLPads & 0xFFFF)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) .addReg(VReg1) .addImm(NumLPads >> 16)); } AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) .addReg(NewVReg1) .addReg(VReg2)); } BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) .addJumpTableIndex(MJTI)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred( BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg1) .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) .addFrameIndex(FI) .addImm(1) .addMemOperand(FIMMOLd)); if (NumLPads < 256) { AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) .addReg(NewVReg1) .addImm(NumLPads)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx)); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) .addReg(NewVReg1) .addReg(VReg1)); } BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg1) .addImm(2)); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) .addReg(NewVReg4, RegState::Kill) .addImm(0) .addMemOperand(JTMMOLd)); unsigned NewVReg6 = NewVReg5; if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg3)); } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) .addReg(NewVReg6, RegState::Kill) .addJumpTableIndex(MJTI); } else { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd)); if (NumLPads < 256) { AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) .addReg(NewVReg1) .addImm(NumLPads)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) .addImm(NumLPads & 0xFFFF)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) .addReg(VReg1) .addImm(NumLPads >> 16)); } AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg2)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) .addImm(0)); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg1, RegState::Kill)); } BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred( BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg4) .addImm(0) .addMemOperand(JTMMOLd)); if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg4) .addJumpTableIndex(MJTI); } else { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) .addReg(NewVReg5, RegState::Kill) .addJumpTableIndex(MJTI); } } // Add the jump table entries as successors to the MBB. SmallPtrSet SeenMBBs; for (std::vector::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; if (SeenMBBs.insert(CurMBB).second) DispContBB->addSuccessor(CurMBB); } // N.B. the order the invoke BBs are processed in doesn't matter here. const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (MachineBasicBlock *BB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. SmallVector Successors(BB->succ_begin(), BB->succ_end()); while (!Successors.empty()) { MachineBasicBlock *SMBB = Successors.pop_back_val(); if (SMBB->isEHPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); } } BB->addSuccessor(DispatchBB, BranchProbability::getZero()); BB->normalizeSuccProbs(); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from // moving instructions to before the EH block, where they will never be // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { if (!II->isCall()) continue; DenseMap DefRegs; for (MachineInstr::mop_iterator OI = II->operands_begin(), OE = II->operands_end(); OI != OE; ++OI) { if (!OI->isReg()) continue; DefRegs[OI->getReg()] = true; } MachineInstrBuilder MIB(*MF, &*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; if (Subtarget->isThumb2() && !ARM::tGPRRegClass.contains(Reg) && !ARM::hGPRRegClass.contains(Reg)) continue; if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) continue; if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) continue; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; } } // Mark all former landing pads as non-landing pads. The dispatch is the only // landing pad now. for (SmallVectorImpl::iterator I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) (*I)->setIsEHPad(false); // The instruction is gone now. MI.eraseFromParent(); } static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I != Succ) return *I; llvm_unreachable("Expecting a BB with two successors!"); } /// Return the load opcode for a given load size. If load size >= 8, /// neon opcode will be returned. static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { if (LdSize >= 8) return LdSize == 16 ? ARM::VLD1q32wb_fixed : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; if (IsThumb1) return LdSize == 4 ? ARM::tLDRi : LdSize == 2 ? ARM::tLDRHi : LdSize == 1 ? ARM::tLDRBi : 0; if (IsThumb2) return LdSize == 4 ? ARM::t2LDR_POST : LdSize == 2 ? ARM::t2LDRH_POST : LdSize == 1 ? ARM::t2LDRB_POST : 0; return LdSize == 4 ? ARM::LDR_POST_IMM : LdSize == 2 ? ARM::LDRH_POST : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; } /// Return the store opcode for a given store size. If store size >= 8, /// neon opcode will be returned. static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { if (StSize >= 8) return StSize == 16 ? ARM::VST1q32wb_fixed : StSize == 8 ? ARM::VST1d32wb_fixed : 0; if (IsThumb1) return StSize == 4 ? ARM::tSTRi : StSize == 2 ? ARM::tSTRHi : StSize == 1 ? ARM::tSTRBi : 0; if (IsThumb2) return StSize == 4 ? ARM::t2STR_POST : StSize == 2 ? ARM::t2STRH_POST : StSize == 1 ? ARM::t2STRB_POST : 0; return StSize == 4 ? ARM::STR_POST_IMM : StSize == 2 ? ARM::STRH_POST : StSize == 1 ? ARM::STRB_POST_IMM : 0; } /// Emit a post-increment load operation with given size. The instructions /// will be added to BB at Pos. static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); assert(LdOpc != 0 && "Should have a load opcode"); if (LdSize >= 8) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define).addReg(AddrIn) .addImm(0)); } else if (IsThumb1) { // load + update AddrIn AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrIn).addImm(0)); MachineInstrBuilder MIB = BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); MIB = AddDefaultT1CC(MIB); MIB.addReg(AddrIn).addImm(LdSize); AddDefaultPred(MIB); } else if (IsThumb2) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define).addReg(AddrIn) .addImm(LdSize)); } else { // arm AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define).addReg(AddrIn) .addReg(0).addImm(LdSize)); } } /// Emit a post-increment store operation with given size. The instructions /// will be added to BB at Pos. static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); assert(StOpc != 0 && "Should have a store opcode"); if (StSize >= 8) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(AddrIn).addImm(0).addReg(Data)); } else if (IsThumb1) { // store + update AddrIn AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) .addReg(AddrIn).addImm(0)); MachineInstrBuilder MIB = BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); MIB = AddDefaultT1CC(MIB); MIB.addReg(AddrIn).addImm(StSize); AddDefaultPred(MIB); } else if (IsThumb2) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(Data).addReg(AddrIn).addImm(StSize)); } else { // arm AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(Data).addReg(AddrIn).addReg(0) .addImm(StSize)); } } MachineBasicBlock * ARMTargetLowering::EmitStructByval(MachineInstr &MI, MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned src = MI.getOperand(1).getReg(); unsigned SizeVal = MI.getOperand(2).getImm(); unsigned Align = MI.getOperand(3).getImm(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; const TargetRegisterClass *TRC = nullptr; const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); bool IsThumb = Subtarget->isThumb(); if (Align & 1) { UnitSize = 1; } else if (Align & 2) { UnitSize = 2; } else { // Check whether we can use NEON instructions. if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; else if ((Align % 8 == 0) && SizeVal >= 8) UnitSize = 8; } // Can't use NEON instructions. if (UnitSize == 0) UnitSize = 4; } // Select the correct opcode and register class for unit size load/store bool IsNeon = UnitSize >= 8; TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; if (IsNeon) VecTRC = UnitSize == 16 ? &ARM::DPairRegClass : UnitSize == 8 ? &ARM::DPRRegClass : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { // Use LDR and STR to copy. // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) // [destOut] = STR_POST(scratch, destIn, UnitSize) unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } // Handle the leftover bytes with LDRB and STRB. // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) for (unsigned i = 0; i < BytesLeft; i++) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } MI.eraseFromParent(); // The instruction is gone now. return BB; } // Expand the pseudo op to a loop. // thisMBB: // ... // movw varEnd, # --> with thumb2 // movt varEnd, # // ldrcp varEnd, idx --> without thumb2 // fallthrough --> loopMBB // loopMBB: // PHI varPhi, varEnd, varLoop // PHI srcPhi, src, srcLoop // PHI destPhi, dst, destLoop // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSize) // subs varLoop, varPhi, #UnitSize // bne loopMBB // fallthrough --> exitMBB // exitMBB: // epilogue to handle left-over bytes // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); if (Subtarget->useMovt(*MF)) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); AddDefaultPred(BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp).addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) AddDefaultPred(BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd) .addReg(Vtmp) .addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb) AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( varEnd, RegState::Define).addConstantPoolIndex(Idx)); else AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); } BB->addSuccessor(loopMBB); // Generate the loop body: // varPhi = PHI(varLoop, varEnd) // srcPhi = PHI(srcLoop, src) // destPhi = PHI(destLoop, dst) MachineBasicBlock *entryBB = BB; BB = loopMBB; unsigned varLoop = MRI.createVirtualRegister(TRC); unsigned varPhi = MRI.createVirtualRegister(TRC); unsigned srcLoop = MRI.createVirtualRegister(TRC); unsigned srcPhi = MRI.createVirtualRegister(TRC); unsigned destLoop = MRI.createVirtualRegister(TRC); unsigned destPhi = MRI.createVirtualRegister(TRC); BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) .addReg(varLoop).addMBB(loopMBB) .addReg(varEnd).addMBB(entryBB); BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) .addReg(srcLoop).addMBB(loopMBB) .addReg(src).addMBB(entryBB); BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) .addReg(destLoop).addMBB(loopMBB) .addReg(dest).addMBB(entryBB); // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, IsThumb1, IsThumb2); emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, IsThumb1, IsThumb2); // Decrement loop variable by UnitSize. if (IsThumb1) { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); MIB = AddDefaultT1CC(MIB); MIB.addReg(varPhi).addImm(UnitSize); AddDefaultPred(MIB); } else { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); MIB->getOperand(5).setReg(ARM::CPSR); MIB->getOperand(5).setIsDef(true); } BuildMI(*BB, BB->end(), dl, TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); // loopMBB can loop back to loopMBB or fall through to exitMBB. BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // Add epilogue to handle BytesLeft. BB = exitMBB; auto StartOfExit = exitMBB->begin(); // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } MI.eraseFromParent(); // The instruction is gone now. return BB; } MachineBasicBlock * ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); assert(Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"); assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); // __chkstk takes the number of words to allocate on the stack in R4, and // returns the stack adjustment in number of bytes in R4. This will not // clober any other registers (other than the obvious lr). // // Although, technically, IP should be considered a register which may be // clobbered, the call itself will not touch it. Windows on ARM is a pure // thumb-2 environment, so there is no interworking required. As a result, we // do not expect a veneer to be emitted by the linker, clobbering IP. // // Each module receives its own copy of __chkstk, so no import thunk is // required, again, ensuring that IP is not clobbered. // // Finally, although some linkers may theoretically provide a trampoline for // out of range calls (which is quite common due to a 32M range limitation of // branches for Thumb), we can generate the long-call version via // -mcmodel=large, alleviating the need for the trampoline which may clobber // IP. switch (TM.getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: case CodeModel::Default: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) .addImm((unsigned)ARMCC::AL).addReg(0) .addExternalSymbol("__chkstk") .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: case CodeModel::JITDefault: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) .addImm((unsigned)ARMCC::AL).addReg(0) .addReg(Reg, RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); break; } } AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addReg(ARM::R4, RegState::Kill) .setMIFlags(MachineInstr::FrameSetup))); MI.eraseFromParent(); return MBB; } MachineBasicBlock * ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); MF->insert(++MBB->getIterator(), ContBB); ContBB->splice(ContBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); ContBB->transferSuccessorsAndUpdatePHIs(MBB); MBB->addSuccessor(ContBB); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0)); MF->push_back(TrapBB); MBB->addSuccessor(TrapBB); AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) .addReg(MI.getOperand(0).getReg()) .addImm(0)); BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::EQ) .addReg(ARM::CPSR); MI.eraseFromParent(); return ContBB; } MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI.getOpcode()) { default: { MI.dump(); llvm_unreachable("Unexpected instr type to insert"); } // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) .addOperand(MI.getOperand(1)) // Rn_wb .addOperand(MI.getOperand(2)) // Rn .addOperand(MI.getOperand(3)) // PredImm .addOperand(MI.getOperand(4)) // PredReg .addOperand(MI.getOperand(0)); // Rt MI.eraseFromParent(); return BB; } // The Thumb2 pre-indexed stores have the same MI operands, they just // define them differently in the .td files from the isel patterns, so // they need pseudos. case ARM::t2STR_preidx: MI.setDesc(TII->get(ARM::t2STR_PRE)); return BB; case ARM::t2STRB_preidx: MI.setDesc(TII->get(ARM::t2STRB_PRE)); return BB; case ARM::t2STRH_preidx: MI.setDesc(TII->get(ARM::t2STRH_PRE)); return BB; case ARM::STRi_preidx: case ARM::STRBi_preidx: { unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; // Decode the offset. unsigned Offset = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; Offset = ARM_AM::getAM2Offset(Offset); if (isSub) Offset = -Offset; MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) .addOperand(MI.getOperand(0)) // Rn_wb .addOperand(MI.getOperand(1)) // Rt .addOperand(MI.getOperand(2)) // Rn .addImm(Offset) // offset (skip GPR==zero_reg) .addOperand(MI.getOperand(5)) // pred .addOperand(MI.getOperand(6)) .addMemOperand(MMO); MI.eraseFromParent(); return BB; } case ARM::STRr_preidx: case ARM::STRBr_preidx: case ARM::STRH_preidx: { unsigned NewOpc; switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); for (unsigned i = 0; i < MI.getNumOperands(); ++i) MIB.addOperand(MI.getOperand(i)); MI.eraseFromParent(); return BB; } case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); BuildMI(BB, dl, TII->get(ARM::tBcc)) .addMBB(sinkMBB) .addImm(MI.getOperand(3).getImm()) .addReg(MI.getOperand(4).getReg()); // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(1).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } case ARM::BCCi64: case ARM::BCCZi64: { // If there is an unconditional branch to the other successor, remove it. BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); // Compare both parts that make up the double comparison separately for // equality. bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; unsigned LHS1 = MI.getOperand(1).getReg(); unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS1).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { unsigned RHS1 = MI.getOperand(3).getReg(); unsigned RHS2 = MI.getOperand(4).getReg(); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1).addReg(RHS1)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS2).addReg(RHS2) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB(); MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); if (MI.getOperand(0).getImm() == ARMCC::NE) std::swap(destMBB, exitMBB); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); if (isThumb2) AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return BB; case ARM::Int_eh_sjlj_setup_dispatch: EmitSjLjDispatchBlock(MI, BB); return BB; case ARM::ABS: case ARM::t2ABS: { // To insert an ABS instruction, we have to insert the // diamond control-flow pattern. The incoming instruction knows the // source vreg to test against 0, the destination vreg to set, // the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. // It transforms // V1 = ABS V0 // into // V2 = MOVS V0 // BCC (branch to SinkBB if V0 >= 0) // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) // SinkBB: V1 = PHI(V2, V3) const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator BBI = ++BB->getIterator(); MachineFunction *Fn = BB->getParent(); MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); unsigned int ABSSrcReg = MI.getOperand(1).getReg(); unsigned int ABSDstReg = MI.getOperand(0).getReg(); bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); SinkBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RSBBB); BB->addSuccessor(SinkBB); // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); // insert a cmp at the end of BB AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(ABSSrcReg).addImm(0)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); // insert rsbri in RSBBB // Note: BCC and rsbri will be converted into predicated rsbmi // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); // insert PHI in SinkBB, // reuse ABSDstReg to not change uses of ABS instruction BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI.eraseFromParent(); // return last added BB return SinkBB; } case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); case ARM::WIN__CHKSTK: return EmitLowered__chkstk(MI, BB); case ARM::WIN__DBZCHK: return EmitLowered__dbzchk(MI, BB); } } /// \brief Attaches vregs to MEMCPY that it will use as scratch registers /// when it is expanded into LDM/STM. This is done as a post-isel lowering /// instead of as a custom inserter because we need the use list from the SDNode. static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node) { bool isThumb1 = Subtarget->isThumb1Only(); DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstrBuilder MIB(*MF, MI); // If the new dst/src is unused mark it as dead. if (!Node->hasAnyUseOfValue(0)) { MI.getOperand(0).setIsDead(true); } if (!Node->hasAnyUseOfValue(1)) { MI.getOperand(1).setIsDead(true); } // The MEMCPY both defines and kills the scratch registers. for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { if (MI.getOpcode() == ARM::MEMCPY) { attachMEMCPYScratchRegs(Subtarget, MI, Node); return; } const MCInstrDesc *MCID = &MI.getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // // e.g. ADCS (..., CPSR) -> ADC (... opt:CPSR). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 && "converted opcode should be the same except for cc_out"); MI.setDesc(*MCID); // Add the optional cc_out operand MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); } unsigned ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it // since we already have an optional CPSR def. bool definesCPSR = false; bool deadCPSR = false; for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { definesCPSR = true; if (MO.isDead()) deadCPSR = true; MI.RemoveOperand(i); break; } } if (!definesCPSR) { assert(!NewOpc && "Optional cc_out operand required"); return; } assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); if (deadCPSR) { assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); return; } // If this instruction was defined with an optional CPSR def and its dag node // had a live implicit CPSR def, then activate the optional CPSR def. MachineOperand &MO = MI.getOperand(ccOutIdx); MO.setReg(ARM::CPSR); MO.setIsDef(true); } //===----------------------------------------------------------------------===// // ARM Optimization Hooks //===----------------------------------------------------------------------===// // Helper function that checks if N is a null or all ones constant. static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); } // Return true if N is conditionally 0 or all ones. // Detects these expressions where cc is an i1 value: // // (select cc 0, y) [AllOnes=0] // (select cc y, 0) [AllOnes=0] // (zext cc) [AllOnes=0] // (sext cc) [AllOnes=0/1] // (select cc -1, y) [AllOnes=1] // (select cc y, -1) [AllOnes=1] // // Invert is set when N is the null/all ones constant when CC is false. // OtherOp is set to the alternative value of N. static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG) { switch (N->getOpcode()) { default: return false; case ISD::SELECT: { CC = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); if (isZeroOrAllOnes(N1, AllOnes)) { Invert = false; OtherOp = N2; return true; } if (isZeroOrAllOnes(N2, AllOnes)) { Invert = true; OtherOp = N1; return true; } return false; } case ISD::ZERO_EXTEND: // (zext cc) can never be the all ones value. if (AllOnes) return false; LLVM_FALLTHROUGH; case ISD::SIGN_EXTEND: { SDLoc dl(N); EVT VT = N->getValueType(0); CC = N->getOperand(0); if (CC.getValueType() != MVT::i1) return false; Invert = !AllOnes; if (AllOnes) // When looking for an AllOnes constant, N is an sext, and the 'other' // value is 0. OtherOp = DAG.getConstant(0, dl, VT); else if (N->getOpcode() == ISD::ZERO_EXTEND) // When looking for a 0 constant, N can be zext or sext. OtherOp = DAG.getConstant(1, dl, VT); else OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT); return true; } } } // Combine a constant select operand into its use: // // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) // // The transform is rejected if the select doesn't have a constant operand that // is null, or all ones when AllOnes is set. // // Also recognize sext/zext from i1: // // (add (zext cc), x) -> (select cc (add x, 1), x) // (add (sext cc), x) -> (select cc (add x, -1), x) // // These transformations eventually create predicated instructions. // // @param N The node to transform. // @param Slct The N operand that is a select. // @param OtherOp The other N operand (x above). // @param DCI Context. // @param AllOnes Require the select constant to be all ones instead of null. // @returns The new node, or SDValue() on failure. static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes = false) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); SDValue NonConstantVal; SDValue CCOp; bool SwapSelectOps; if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, NonConstantVal, DAG)) return SDValue(); // Slct is now know to be the desired identity constant when CC is true. SDValue TrueVal = OtherOp; SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); // Unless SwapSelectOps says CC should be false. if (SwapSelectOps) std::swap(TrueVal, FalseVal); return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal); } // Attempt combineSelectAndUse on each operand of a commutative operator N. static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes)) return Result; if (N1.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes)) return Result; return SDValue(); } static bool IsVUZPShuffleNode(SDNode *N) { // VUZP shuffle node. if (N->getOpcode() == ARMISD::VUZP) return true; // "VUZP" on i32 is an alias for VTRN. if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32) return true; return false; } static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for ADD(VUZP.0, VUZP.1). if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() || N0 == N1) return SDValue(); // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD. if (!N->getValueType(0).is64BitVector()) return SDValue(); // Generate vpadd. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); SDNode *Unzip = N0.getNode(); EVT VT = N->getValueType(0); SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl, TLI.getPointerTy(DAG.getDataLayout()))); Ops.push_back(Unzip->getOperand(0)); Ops.push_back(Unzip->getOperand(1)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); } static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Check for two extended operands. if (!(N0.getOpcode() == ISD::SIGN_EXTEND && N1.getOpcode() == ISD::SIGN_EXTEND) && !(N0.getOpcode() == ISD::ZERO_EXTEND && N1.getOpcode() == ISD::ZERO_EXTEND)) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N10 = N1.getOperand(0); // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1)) if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() || N00 == N10) return SDValue(); // We only recognize Q register paddl here; this can't be reached until // after type legalization. if (!N00.getValueType().is64BitVector() || !N0.getValueType().is128BitVector()) return SDValue(); // Generate vpaddl. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); EVT VT = N->getValueType(0); SmallVector Ops; // Form vpaddl.sN or vpaddl.uN depending on the kind of extension. unsigned Opcode; if (N0.getOpcode() == ISD::SIGN_EXTEND) Opcode = Intrinsic::arm_neon_vpaddls; else Opcode = Intrinsic::arm_neon_vpaddlu; Ops.push_back(DAG.getConstant(Opcode, dl, TLI.getPointerTy(DAG.getDataLayout()))); EVT ElemTy = N00.getValueType().getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT, N00.getOperand(0), N00.getOperand(1)); Ops.push_back(Concat); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); } // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is // much easier to match. static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Only perform optimization if after legalize, and if NEON is available. We // also expected both operands to be BUILD_VECTORs. if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() || N0.getOpcode() != ISD::BUILD_VECTOR || N1.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); // Check output type since VPADDL operand elements can only be 8, 16, or 32. EVT VT = N->getValueType(0); if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) return SDValue(); // Check that the vector operands are of the right form. // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR // operands, where N is the size of the formed vector. // Each EXTRACT_VECTOR should have the same input vector and odd or even // index such that we have a pair wise add pattern. // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); SDValue Vec = N0->getOperand(0)->getOperand(0); SDNode *V = Vec.getNode(); unsigned nextIndex = 0; // For each operands to the ADD which are BUILD_VECTORs, // check to see if each of their operands are an EXTRACT_VECTOR with // the same vector and appropriate index. for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue ExtVec0 = N0->getOperand(i); SDValue ExtVec1 = N1->getOperand(i); // First operand is the vector, verify its the same. if (V != ExtVec0->getOperand(0).getNode() || V != ExtVec1->getOperand(0).getNode()) return SDValue(); // Second is the constant, verify its correct. ConstantSDNode *C0 = dyn_cast(ExtVec0->getOperand(1)); ConstantSDNode *C1 = dyn_cast(ExtVec1->getOperand(1)); // For the constant, we want to see all the even or all the odd. if (!C0 || !C1 || C0->getZExtValue() != nextIndex || C1->getZExtValue() != nextIndex+1) return SDValue(); // Increment index. nextIndex+=2; } else return SDValue(); } // Don't generate vpaddl+vmovn; we'll match it to vpadd later. if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); // Build operand list. SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl, TLI.getPointerTy(DAG.getDataLayout()))); // Input is the vector. Ops.push_back(Vec); // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); EVT inputLaneType = Vec.getValueType().getVectorElementType(); switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, dl, VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { if (V->getOpcode() == ISD::UMUL_LOHI || V->getOpcode() == ISD::SMUL_LOHI) return V; return SDValue(); } static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is // a glue link from the first add to the second add. // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi // / \ [no multiline comment] // loAdd -> ADDE | // \ :glue / // \ / // ADDC <- hiAdd // assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); SDValue AddcOp0 = AddcNode->getOperand(0); SDValue AddcOp1 = AddcNode->getOperand(1); // Check if the two operands are from the same mul_lohi node. if (AddcOp0.getNode() == AddcOp1.getNode()) return SDValue(); assert(AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); // Check that we have a glued ADDC node. if (AddcNode->getValueType(1) != MVT::Glue) return SDValue(); // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && AddcOp0->getOpcode() != ISD::SMUL_LOHI && AddcOp1->getOpcode() != ISD::UMUL_LOHI && AddcOp1->getOpcode() != ISD::SMUL_LOHI) return SDValue(); // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. if (AddeNode->getOpcode() != ISD::ADDE) return SDValue(); assert(AddeNode->getNumOperands() == 3 && AddeNode->getOperand(2).getValueType() == MVT::Glue && "ADDE node has the wrong inputs"); // Check for the triangle shape. SDValue AddeOp0 = AddeNode->getOperand(0); SDValue AddeOp1 = AddeNode->getOperand(1); // Make sure that the ADDE operands are not coming from the same node. if (AddeOp0.getNode() == AddeOp1.getNode()) return SDValue(); // Find the MUL_LOHI node walking up ADDE's operands. bool IsLeftOperandMUL = false; SDValue MULOp = findMUL_LOHI(AddeOp0); if (MULOp == SDValue()) MULOp = findMUL_LOHI(AddeOp1); else IsLeftOperandMUL = true; if (MULOp == SDValue()) return SDValue(); // Figure out the right opcode. unsigned Opc = MULOp->getOpcode(); unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; // Figure out the high and low input values to the MLAL node. SDValue* HiAdd = nullptr; SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; // Ensure that ADDE is from high result of ISD::SMUL_LOHI. if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) return SDValue(); if (IsLeftOperandMUL) HiAdd = &AddeOp1; else HiAdd = &AddeOp0; // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node // whose low result is fed to the ADDC we are checking. if (AddcOp0 == MULOp.getValue(0)) { LoMul = &AddcOp0; LowAdd = &AddcOp1; } if (AddcOp1 == MULOp.getValue(0)) { LoMul = &AddcOp1; LowAdd = &AddcOp0; } if (!LoMul) return SDValue(); // Create the merged node. SelectionDAG &DAG = DCI.DAG; // Build operand list. SmallVector Ops; Ops.push_back(LoMul->getOperand(0)); Ops.push_back(LoMul->getOperand(1)); Ops.push_back(*LowAdd); Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); SDValue LoMLALResult(MLALNode.getNode(), 0); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. SDValue resNode(AddcNode, 0); return resNode; } static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // UMAAL is similar to UMLAL except that it adds two unsigned values. // While trying to combine for the other MLAL nodes, first search for the // chance to use UMAAL. Check if Addc uses another addc node which can first // be combined into a UMLAL. The other pattern is AddcNode being combined // into an UMLAL and then using another addc is handled in ISelDAGToDAG. if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || (Subtarget->isThumb() && !Subtarget->hasThumb2())) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); SDNode *PrevAddc = nullptr; if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) PrevAddc = AddcNode->getOperand(0).getNode(); else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) PrevAddc = AddcNode->getOperand(1).getNode(); // If there's no addc chains, just return a search for any MLAL. if (PrevAddc == nullptr) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); // Try to convert the addc operand to an MLAL and if that fails try to // combine AddcNode. SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); if (MLAL != SDValue(PrevAddc, 0)) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); // Find the converted UMAAL or quit if it doesn't exist. SDNode *UmlalNode = nullptr; SDValue AddHi; if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) { UmlalNode = AddcNode->getOperand(0).getNode(); AddHi = AddcNode->getOperand(1); } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) { UmlalNode = AddcNode->getOperand(1).getNode(); AddHi = AddcNode->getOperand(0); } else { return SDValue(); } // The ADDC should be glued to an ADDE node, which uses the same UMLAL as // the ADDC as well as Zero. auto *Zero = dyn_cast(UmlalNode->getOperand(3)); if (!Zero || Zero->getZExtValue() != 0) return SDValue(); // Check that we have a glued ADDC node. if (AddcNode->getValueType(1) != MVT::Glue) return SDValue(); // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); if (!AddeNode) return SDValue(); if ((AddeNode->getOperand(0).getNode() == Zero && AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && AddeNode->getOperand(1).getNode() == Zero)) { SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), UmlalNode->getOperand(2), AddHi }; SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the UMAAL node's values. DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); // Return original node to notify the driver to stop replacing. return SDValue(AddcNode, 0); } return SDValue(); } /// PerformADDCCombine - Target-specific dag combine transform from /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb1Only()) return SDValue(); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted /// operands. static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget){ // Attempt to create vpadd for this add. if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget)) return Result; // Attempt to create vpaddl for this add. if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI)) return Result; return SDValue(); } /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // First try with the default operand order. if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; // If that didn't work, try again with the operands commuted. return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); } /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. /// static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) if (N1.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI)) return Result; return SDValue(); } /// PerformVMULCombine /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the /// special multiplier accumulator forwarding. /// vmul d3, d0, d2 /// vmla d3, d1, d2 /// is faster than /// vadd d3, d0, d1 /// vmul d3, d3, d2 // However, for (A + B) * (A + B), // vadd d2, d0, d1 // vmul d3, d0, d2 // vmla d3, d1, d2 // is slower than // vadd d2, d0, d1 // vmul d3, d2, d2 static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (!Subtarget->hasVMLxForwarding()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) { Opcode = N1.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) return SDValue(); std::swap(N0, N1); } if (N0 == N1) return SDValue(); EVT VT = N->getValueType(0); SDLoc DL(N); SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); return DAG.getNode(Opcode, DL, VT, DAG.getNode(ISD::MUL, DL, VT, N00, N1), DAG.getNode(ISD::MUL, DL, VT, N01, N1)); } static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; if (Subtarget->isThumb1Only()) return SDValue(); if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); EVT VT = N->getValueType(0); if (VT.is64BitVector() || VT.is128BitVector()) return PerformVMULCombine(N, DCI, Subtarget); if (VT != MVT::i32) return SDValue(); ConstantSDNode *C = dyn_cast(N->getOperand(1)); if (!C) return SDValue(); int64_t MulAmt = C->getSExtValue(); unsigned ShiftAmt = countTrailingZeros(MulAmt); ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); SDLoc DL(N); SDValue Res; MulAmt >>= ShiftAmt; if (MulAmt >= 0) { if (isPowerOf2_32(MulAmt - 1)) { // (mul x, 2^N + 1) => (add (shl x, N), x) Res = DAG.getNode(ISD::ADD, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmt - 1), DL, MVT::i32))); } else if (isPowerOf2_32(MulAmt + 1)) { // (mul x, 2^N - 1) => (sub (shl x, N), x) Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmt + 1), DL, MVT::i32)), V); } else return SDValue(); } else { uint64_t MulAmtAbs = -MulAmt; if (isPowerOf2_32(MulAmtAbs + 1)) { // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) Res = DAG.getNode(ISD::SUB, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmtAbs + 1), DL, MVT::i32))); } else if (isPowerOf2_32(MulAmtAbs - 1)) { // (mul x, -(2^N + 1)) => - (add (shl x, N), x) Res = DAG.getNode(ISD::ADD, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmtAbs - 1), DL, MVT::i32))); Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i32), Res); } else return SDValue(); } if (ShiftAmt != 0) Res = DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getConstant(ShiftAmt, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VbicVT, VT.is128BitVector(), OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); } } } if (!Subtarget->isThumb1Only()) { // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; } return SDValue(); } /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VORR BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN && Subtarget->hasNEON() && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VorrVT, VT.is128BitVector(), OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); } } } if (!Subtarget->isThumb1Only()) { // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } // The code below optimizes (or (and X, Y), Z). // The AND operand needs to have a single user to make these optimizations // profitable. SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); SDValue N1 = N->getOperand(1); // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { APInt SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs) { if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs) { // Ensure that the bit width of the constants are the same and that // the splat arguments are logical inverses as per the pattern we // are trying to simplify. if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && SplatBits0 == ~SplatBits1) { // Canonicalize the vector type to make instruction selection // simpler. EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, N0->getOperand(1), N0->getOperand(0), N1->getOperand(0)); return DAG.getNode(ISD::BITCAST, dl, VT, Result); } } } } // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) // && mask == ~mask2 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) // && ~mask == mask2 // (i.e., copy a bitfield value into another bitfield of the same width) if (VT != MVT::i32) return SDValue(); SDValue N00 = N0.getOperand(0); // The value and the mask need to be constants so we can verify this is // actually a bitfield set. If the mask is 0xffff, we can do better // via a movt instruction, so don't use BFI in that case. SDValue MaskOp = N0.getOperand(1); ConstantSDNode *MaskC = dyn_cast(MaskOp); if (!MaskC) return SDValue(); unsigned Mask = MaskC->getZExtValue(); if (Mask == 0xffff) return SDValue(); SDValue Res; // Case (1): or (and A, mask), val => ARMbfi A, val, mask ConstantSDNode *N1C = dyn_cast(N1); if (N1C) { unsigned Val = N1C->getZExtValue(); if ((Val & ~Mask) != Val) return SDValue(); if (ARM::isBitFieldInvertedMask(Mask)) { Val >>= countTrailingZeros(~Mask); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); unsigned Mask2 = N11C->getZExtValue(); // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern // as is to match. if (ARM::isBitFieldInvertedMask(Mask) && (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a unsigned amt = countTrailingZeros(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b unsigned lsb = countTrailingZeros(Mask); Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } } if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && N00.getOpcode() == ISD::SHL && isa(N00.getOperand(1)) && ARM::isBitFieldInvertedMask(~Mask)) { // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); unsigned ShAmtC = cast(ShAmt)->getZExtValue(); unsigned LSB = countTrailingZeros(Mask); if (ShAmtC != LSB) return SDValue(); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); } return SDValue(); } static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); if (!Subtarget->isThumb1Only()) { // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } return SDValue(); } // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it, // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and // their position in "to" (Rd). static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { assert(N->getOpcode() == ARMISD::BFI); SDValue From = N->getOperand(1); ToMask = ~cast(N->getOperand(2))->getAPIntValue(); FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation()); // If the Base came from a SHR #C, we can deduce that it is really testing bit // #C in the base of the SHR. if (From->getOpcode() == ISD::SRL && isa(From->getOperand(1))) { APInt Shift = cast(From->getOperand(1))->getAPIntValue(); assert(Shift.getLimitedValue() < 32 && "Shift too large!"); FromMask <<= Shift.getLimitedValue(31); From = From->getOperand(0); } return From; } // If A and B contain one contiguous set of bits, does A | B == A . B? // // Neither A nor B must be zero. static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) { unsigned LastActiveBitInA = A.countTrailingZeros(); unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1; return LastActiveBitInA - 1 == FirstActiveBitInB; } static SDValue FindBFIToCombineWith(SDNode *N) { // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with, // if one exists. APInt ToMask, FromMask; SDValue From = ParseBFI(N, ToMask, FromMask); SDValue To = N->getOperand(0); // Now check for a compatible BFI to merge with. We can pass through BFIs that // aren't compatible, but not if they set the same bit in their destination as // we do (or that of any BFI we're going to combine with). SDValue V = To; APInt CombinedToMask = ToMask; while (V.getOpcode() == ARMISD::BFI) { APInt NewToMask, NewFromMask; SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask); if (NewFrom != From) { // This BFI has a different base. Keep going. CombinedToMask |= NewToMask; V = V.getOperand(0); continue; } // Do the written bits conflict with any we've seen so far? if ((NewToMask & CombinedToMask).getBoolValue()) // Conflicting bits - bail out because going further is unsafe. return SDValue(); // Are the new bits contiguous when combined with the old bits? if (BitsProperlyConcatenate(ToMask, NewToMask) && BitsProperlyConcatenate(FromMask, NewFromMask)) return V; if (BitsProperlyConcatenate(NewToMask, ToMask) && BitsProperlyConcatenate(NewFromMask, FromMask)) return V; // We've seen a write to some bits, so track it. CombinedToMask |= NewToMask; // Keep going... V = V.getOperand(0); } return SDValue(); } static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); if (N1.getOpcode() == ISD::AND) { // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff // the bits being cleared by the AND are not demanded by the BFI. ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); unsigned InvMask = cast(N->getOperand(2))->getZExtValue(); unsigned LSB = countTrailingZeros(~InvMask); unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; assert(Width < static_cast(std::numeric_limits::digits) && "undefined behavior"); unsigned Mask = (1u << Width) - 1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) { // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes. // Keep track of any consecutive bits set that all come from the same base // value. We can combine these together into a single BFI. SDValue CombineBFI = FindBFIToCombineWith(N); if (CombineBFI == SDValue()) return SDValue(); // We've found a BFI. APInt ToMask1, FromMask1; SDValue From1 = ParseBFI(N, ToMask1, FromMask1); APInt ToMask2, FromMask2; SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); assert(From1 == From2); (void)From2; // First, unlink CombineBFI. DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0)); // Then create a new BFI, combining the two together. APInt NewFromMask = FromMask1 | FromMask2; APInt NewToMask = ToMask1 | ToMask2; EVT VT = N->getValueType(0); SDLoc dl(N); if (NewFromMask[0] == 0) From1 = DCI.DAG.getNode( ISD::SRL, dl, VT, From1, DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1, DCI.DAG.getConstant(~NewToMask, dl, VT)); } return SDValue(); } /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) SDNode *InNode = InDouble.getNode(); if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && InNode->getValueType(0) == MVT::f64 && InNode->getOperand(1).getOpcode() == ISD::FrameIndex && !cast(InNode)->isVolatile()) { // TODO: Should this be done for non-FrameIndex operands? LoadSDNode *LD = cast(InNode); SelectionDAG &DAG = DCI.DAG; SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); SDValue NewLD2 = DAG.getLoad( MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); if (DCI.DAG.getDataLayout().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); return Result; } return SDValue(); } /// PerformVMOVDRRCombine - Target-specific dag combine xforms for /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::BITCAST) Op1 = Op1.getOperand(0); if (Op0.getOpcode() == ARMISD::VMOVRRD && Op0.getNode() == Op1.getNode() && Op0.getResNo() == 0 && Op1.getResNo() == 1) return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0.getOperand(0)); return SDValue(); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node /// are normal, non-volatile loads. If so, it is profitable to bitcast an /// i64 vector to have f64 elements, since the value can then be loaded /// directly into a VFP register. static bool hasNormalLoadOperand(SDNode *N) { unsigned NumElts = N->getValueType(0).getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ISD::isNormalLoad(Elt) && !cast(Elt)->isVolatile()) return true; } return false; } /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. SelectionDAG &DAG = DCI.DAG; if (N->getNumOperands() == 2) if (SDValue RV = PerformVMOVDRRCombine(N, DAG)) return RV; // Load i64 elements as f64 values so that type legalization does not split // them up into i32 values. EVT VT = N->getValueType(0); if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) return SDValue(); SDLoc dl(N); SmallVector Ops; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); Ops.push_back(V); // Make the DAGCombiner fold the bitcast. DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. // At that time, we may have inserted bitcasts from integer to float. // If these bitcasts have survived DAGCombine, change the lowering of this // BUILD_VECTOR in something more vector friendly, i.e., that does not // force to use floating point types. // Make sure we can change the type of the vector. // This is possible iff: // 1. The vector is only used in a bitcast to a integer type. I.e., // 1.1. Vector is used only once. // 1.2. Use is a bit convert to an integer type. // 2. The size of its operands are 32-bits (64-bits are not legal). EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); // Check 1.1. and 2. if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) return SDValue(); // By construction, the input type must be float. assert(EltVT == MVT::f32 && "Unexpected type!"); // Check 1.2. SDNode *Use = *N->use_begin(); if (Use->getOpcode() != ISD::BITCAST || Use->getValueType(0).isFloatingPoint()) return SDValue(); // Check profitability. // Model is, if more than half of the relevant operands are bitcast from // i32, turn the build_vector into a sequence of insert_vector_elt. // Relevant operands are everything that is not statically // (i.e., at compile time) bitcasted. unsigned NumOfBitCastedElts = 0; unsigned NumElts = VT.getVectorNumElements(); unsigned NumOfRelevantElts = NumElts; for (unsigned Idx = 0; Idx < NumElts; ++Idx) { SDValue Elt = N->getOperand(Idx); if (Elt->getOpcode() == ISD::BITCAST) { // Assume only bit cast to i32 will go away. if (Elt->getOperand(0).getValueType() == MVT::i32) ++NumOfBitCastedElts; } else if (Elt.isUndef() || isa(Elt)) // Constants are statically casted, thus do not count them as // relevant operands. --NumOfRelevantElts; } // Check if more than half of the elements require a non-free bitcast. if (NumOfBitCastedElts <= NumOfRelevantElts / 2) return SDValue(); SelectionDAG &DAG = DCI.DAG; // Create the new vector type. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); // Check if the type is legal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(VecVT)) return SDValue(); // Combine: // ARMISD::BUILD_VECTOR E1, E2, ..., EN. // => BITCAST INSERT_VECTOR_ELT // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), // (BITCAST EN), N. SDValue Vec = DAG.getUNDEF(VecVT); SDLoc dl(N); for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { SDValue V = N->getOperand(Idx); if (V.isUndef()) continue; if (V.getOpcode() == ISD::BITCAST && V->getOperand(0).getValueType() == MVT::i32) // Fold obvious case. V = V.getOperand(0); else { V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); } Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); return Vec; } /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Bitcast an i64 load inserted into a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. EVT VT = N->getValueType(0); SDNode *Elt = N->getOperand(1).getNode(); if (VT.getVectorElementType() != MVT::i64 || !ISD::isNormalLoad(Elt) || cast(Elt)->isVolatile()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VT.getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); DCI.AddToWorklist(V.getNode()); SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, Vec, V, N->getOperand(2)); return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); } /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for /// ISD::VECTOR_SHUFFLE. static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { // The LLVM shufflevector instruction does not require the shuffle mask // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the // operands do not match the mask length, they are extended by concatenating // them with undef vectors. That is probably the right thing for other // targets, but for NEON it is better to concatenate two double-register // size vector operands into a single quad-register size vector. Do that // transformation here: // shuffle(concat(v1, undef), concat(v2, undef)) -> // shuffle(concat(v1, v2), undef) SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::CONCAT_VECTORS || Op1.getOpcode() != ISD::CONCAT_VECTORS || Op0.getNumOperands() != 2 || Op1.getNumOperands() != 2) return SDValue(); SDValue Concat0Op1 = Op0.getOperand(1); SDValue Concat1Op1 = Op1.getOperand(1); if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef()) return SDValue(); // Skip the transformation if any of the types are illegal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(Concat0Op1.getValueType()) || !TLI.isTypeLegal(Concat1Op1.getValueType())) return SDValue(); SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Op0.getOperand(0), Op1.getOperand(0)); // Translate the shuffle mask. SmallVector NewMask; unsigned NumElts = VT.getVectorNumElements(); unsigned HalfElts = NumElts/2; ShuffleVectorSDNode *SVN = cast(N); for (unsigned n = 0; n < NumElts; ++n) { int MaskElt = SVN->getMaskElt(n); int NewElt = -1; if (MaskElt < (int)HalfElts) NewElt = MaskElt; else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) NewElt = HalfElts + MaskElt - NumElts; NewMask.push_back(NewElt); } return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, DAG.getUNDEF(VT), NewMask); } /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, /// NEON load/store intrinsics, and generic vector load/stores, to merge /// base address updates. /// For generic load/stores, the memory type is assumed to be a vector. /// The caller is assumed to have checked legality. static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || N->getOpcode() == ISD::INTRINSIC_W_CHAIN); const bool isStore = N->getOpcode() == ISD::STORE; const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); SDValue Addr = N->getOperand(AddrOpIdx); MemSDNode *MemN = cast(N); SDLoc dl(N); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = Addr.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User->getOpcode() != ISD::ADD || UI.getUse().getResNo() != Addr.getResNo()) continue; // Check that the add is independent of the load/store. Otherwise, folding // it would create a cycle. if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) continue; // Find the new opcode for the updating load/store. bool isLoadOp = true; bool isLaneOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; if (isIntrinsic) { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; NumVecs = 2; break; case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; NumVecs = 3; break; case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; NumVecs = 4; break; case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; NumVecs = 2; isLaneOp = true; break; case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; NumVecs = 3; isLaneOp = true; break; case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; NumVecs = 4; isLaneOp = true; break; case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; NumVecs = 1; isLoadOp = false; break; case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; NumVecs = 2; isLoadOp = false; break; case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; NumVecs = 3; isLoadOp = false; break; case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; NumVecs = 4; isLoadOp = false; break; case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; NumVecs = 2; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; NumVecs = 3; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; NumVecs = 4; isLoadOp = false; isLaneOp = true; break; } } else { isLaneOp = true; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break; case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; isLaneOp = false; break; case ISD::STORE: NewOpc = ARMISD::VST1_UPD; NumVecs = 1; isLaneOp = false; isLoadOp = false; break; } } // Find the size of memory referenced by the load/store. EVT VecTy; if (isLoadOp) { VecTy = N->getValueType(0); } else if (isIntrinsic) { VecTy = N->getOperand(AddrOpIdx+1).getValueType(); } else { assert(isStore && "Node has to be a load, a store, or an intrinsic!"); VecTy = N->getOperand(1).getValueType(); } unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) NumBytes /= VecTy.getVectorNumElements(); // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { uint64_t IncVal = CInc->getZExtValue(); if (IncVal != NumBytes) continue; } else if (NumBytes >= 3 * 16) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; } // OK, we found an ADD we can fold into the base update. // Now, create a _UPD node, taking care of not breaking alignment. EVT AlignedVecTy = VecTy; unsigned Alignment = MemN->getAlignment(); // If this is a less-than-standard-aligned load/store, change the type to // match the standard alignment. // The alignment is overlooked when selecting _UPD variants; and it's // easier to introduce bitcasts here than fix that. // There are 3 ways to get to this base-update combine: // - intrinsics: they are assumed to be properly aligned (to the standard // alignment of the memory type), so we don't need to do anything. // - ARMISD::VLDx nodes: they are only generated from the aforementioned // intrinsics, so, likewise, there's nothing to do. // - generic load/store instructions: the alignment is specified as an // explicit operand, rather than implicitly as the standard alignment // of the memory type (like the intrisics). We need to change the // memory type to match the explicit alignment. That way, we don't // generate non-standard-aligned ARMISD::VLDx nodes. if (isa(N)) { if (Alignment == 0) Alignment = 1; if (Alignment < VecTy.getScalarSizeInBits() / 8) { MVT EltTy = MVT::getIntegerVT(Alignment * 8); assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); assert(!isLaneOp && "Unexpected generic load/store lane."); unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); } // Don't set an explicit alignment on regular load/stores that we want // to transform to VLD/VST 1_UPD nodes. // This matches the behavior of regular load/stores, which only get an // explicit alignment if the MMO alignment is larger than the standard // alignment of the memory type. // Intrinsics, however, always get an explicit alignment, set to the // alignment of the MMO. Alignment = 1; } // Create the new updating load/store node. // First, create an SDVTList for the new updating node's results. EVT Tys[6]; unsigned NumResultVecs = (isLoadOp ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) Tys[n] = AlignedVecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); // Then, gather the new node's operands. SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); Ops.push_back(Inc); if (StoreSDNode *StN = dyn_cast(N)) { // Try to match the intrinsic's signature Ops.push_back(StN->getValue()); } else { // Loads (and of course intrinsics) match the intrinsics' signature, // so just add all but the alignment operand. for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i) Ops.push_back(N->getOperand(i)); } // For all node types, the alignment operand is always the last one. Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32)); // If this is a non-standard-aligned STORE, the penultimate operand is the // stored value. Bitcast it to the aligned type. if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { SDValue &StVal = Ops[Ops.size()-2]; StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal); } EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy; SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT, MemN->getMemOperand()); // Update the uses. SmallVector NewResults; for (unsigned i = 0; i < NumResultVecs; ++i) NewResults.push_back(SDValue(UpdN.getNode(), i)); // If this is an non-standard-aligned LOAD, the first result is the loaded // value. Bitcast it to the expected result type. if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { SDValue &LdVal = NewResults[0]; LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal); } NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; } return SDValue(); } static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); return CombineBaseUpdate(N, DCI); } /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and /// return true. static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); // vldN-dup instructions only support 64-bit vectors for N > 1. if (!VT.is64BitVector()) return false; // Check if the VDUPLANE operand is a vldN-dup intrinsic. SDNode *VLD = N->getOperand(0).getNode(); if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) return false; unsigned NumVecs = 0; unsigned NewOpc = 0; unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); if (IntNo == Intrinsic::arm_neon_vld2lane) { NumVecs = 2; NewOpc = ARMISD::VLD2DUP; } else if (IntNo == Intrinsic::arm_neon_vld3lane) { NumVecs = 3; NewOpc = ARMISD::VLD3DUP; } else if (IntNo == Intrinsic::arm_neon_vld4lane) { NumVecs = 4; NewOpc = ARMISD::VLD4DUP; } else { return false; } // First check that all the vldN-lane uses are VDUPLANEs and that the lane // numbers match the load. unsigned VLDLaneNo = cast(VLD->getOperand(NumVecs+3))->getZExtValue(); for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { // Ignore uses of the chain result. if (UI.getUse().getResNo() == NumVecs) continue; SDNode *User = *UI; if (User->getOpcode() != ARMISD::VDUPLANE || VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) return false; } // Create the vldN-dup node. EVT Tys[5]; unsigned n; for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { unsigned ResNo = UI.getUse().getResNo(); // Ignore uses of the chain result. if (ResNo == NumVecs) continue; SDNode *User = *UI; DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); } // Now the vldN-lane intrinsic is dead except for its chain result. // Update uses of the chain. std::vector VLDDupResults; for (unsigned n = 0; n < NumVecs; ++n) VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); DCI.CombineTo(VLD, VLDDupResults); return true; } /// PerformVDUPLANECombine - Target-specific dag combine xforms for /// ARMISD::VDUPLANE. static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue Op = N->getOperand(0); // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. if (CombineVLDDUP(N, DCI)) return SDValue(N, 0); // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is // redundant. Ignore bit_converts for now; element sizes are checked below. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) return SDValue(); // Make sure the VMOV element size is not bigger than the VDUPLANE elements. unsigned EltSize = Op.getScalarValueSizeInBits(); // The canonical VMOV for a zero vector uses a 32-bit element size. unsigned Imm = cast(Op.getOperand(0))->getZExtValue(); unsigned EltBits; if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) EltSize = 8; EVT VT = N->getValueType(0); if (EltSize > VT.getScalarSizeInBits()) return SDValue(); return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP. static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; SDValue Op = N->getOperand(0); // Match VDUP(LOAD) -> VLD1DUP. // We match this pattern here rather than waiting for isel because the // transform is only legal for unindexed loads. LoadSDNode *LD = dyn_cast(Op.getNode()); if (LD && Op.hasOneUse() && LD->isUnindexed() && LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1), DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) }; SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops, LD->getMemoryVT(), LD->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1)); return VLDDup; } return SDValue(); } static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); // If this is a legal vector load, try to combine it into a VLD1_UPD. if (ISD::isNormalLoad(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); return SDValue(); } /// PerformSTORECombine - Target-specific dag combine xforms for /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { StoreSDNode *St = cast(N); if (St->isVolatile()) return SDValue(); // Optimize trunc store (of multiple scalars) to shuffle and store. First, // pack all of the elements in one place. Next, store to memory in fewer // chunks. SDValue StVal = St->getValue(); EVT VT = StVal.getValueType(); if (St->isTruncatingStore() && VT.isVector()) { SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT StVT = St->getMemoryVT(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromEltSz = VT.getScalarSizeInBits(); unsigned ToEltSz = StVT.getScalarSizeInBits(); // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); unsigned SizeRatio = FromEltSz / ToEltSz; assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. // Find the largest store unit MVT StoreType = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) StoreType = Tp; } // Didn't find a legal store type. if (!TLI.isTypeLegal(StoreType)) return SDValue(); // Bitcast the original vector into a vector of store-size units EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); SmallVector Chains; SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, TLI.getPointerTy(DAG.getDataLayout())); SDValue BasePtr = St->getBasePtr(); // Perform one or more big stores into memory. unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); for (unsigned I = 0; I < E; I++) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) return SDValue(); // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore( St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0), BasePtr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), std::min(4U, St->getAlignment() / 2), St->getMemOperand()->getFlags()); } if (StVal.getValueType() == MVT::i64 && StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { // Bitcast an i64 store extracted from a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; SDLoc dl(StVal); SDValue IntVec = StVal.getOperand(0); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, IntVec.getValueType().getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Vec, StVal.getOperand(1)); dl = SDLoc(N); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags(), St->getAAInfo()); } // If this is a legal vector store, try to combine it into a VST1_UPD. if (ISD::isNormalStore(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); return SDValue(); } /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) /// can replace combinations of VMUL and VCVT (floating-point to integer) /// when the VMUL has a constant operand that is a power of 2. /// /// Example (assume d17 = ): /// vmul.f32 d16, d17, d16 /// vcvt.s32.f32 d16, d16 /// becomes: /// vcvt.s32.f32 d16, d16, #3 static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL) return SDValue(); SDValue ConstVec = Op->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions // only support v2i32/v4i32 types. return SDValue(); } BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; SDValue FixConv = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0), DAG.getConstant(C, dl, MVT::i32)); if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) /// can replace combinations of VCVT (integer to floating-point) and VDIV /// when the VDIV has a constant operand that is a power of 2. /// /// Example (assume d17 = ): /// vcvt.f32.s32 d16, d16 /// vdiv.f32 d16, d17, d16 /// becomes: /// vcvt.f32.s32 d16, d16, #3 static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); SDValue ConstVec = N->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from i32 to f32. We can handle // smaller integers by generating an extra extend, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions // only support v2i32/v4i32 types. return SDValue(); } BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); bool isSigned = OpOpcode == ISD::SINT_TO_FP; SDValue ConvInput = Op.getOperand(0); if (IntBits < FloatBits) ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput); unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), ConvInput, DAG.getConstant(C, dl, MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { // Ignore bit_converts. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ElementBits) || SplatBitSize > ElementBits) return false; Cnt = SplatBits.getSExtValue(); return true; } /// isVShiftLImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift left operation. That value must be in the range: /// 0 <= Value < ElementBits for a left shift; or /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); } /// isVShiftRImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift right operation. For a shift opcode, the value /// is positive, but for an intrinsic the value count must be negative. The /// absolute value must be in the range: /// 1 <= |Value| <= ElementBits for a right shift; or /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; if (!isIntrinsic) return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) { Cnt = -Cnt; return true; } return false; } /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); switch (IntNo) { default: // Don't do anything for most intrinsics. break; // Vector shifts: check for immediate versions and lower them. // Note: This is done during DAG combining instead of DAG legalizing because // the build_vectors for 64-bit vector element shift counts are generally // not legal, and it is hard to see their values after they get legalized to // loads from a constant pool. case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshifts: case Intrinsic::arm_neon_vqshiftu: case Intrinsic::arm_neon_vqshiftsu: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: case Intrinsic::arm_neon_vqshiftnsu: case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: { EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; switch (IntNo) { case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { VShiftOpc = ARMISD::VSHL; break; } if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRs : ARMISD::VSHRu); break; } return SDValue(); case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) break; return SDValue(); case Intrinsic::arm_neon_vqshifts: case Intrinsic::arm_neon_vqshiftu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; return SDValue(); case Intrinsic::arm_neon_vqshiftsu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; llvm_unreachable("invalid shift count for vqshlu intrinsic"); case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: case Intrinsic::arm_neon_vqshiftnsu: case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; llvm_unreachable("invalid shift count for narrowing vector shift " "intrinsic"); default: llvm_unreachable("unhandled vector shift"); } switch (IntNo) { case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: // Opcode already set above. break; case Intrinsic::arm_neon_vrshifts: VShiftOpc = ARMISD::VRSHRs; break; case Intrinsic::arm_neon_vrshiftu: VShiftOpc = ARMISD::VRSHRu; break; case Intrinsic::arm_neon_vrshiftn: VShiftOpc = ARMISD::VRSHRN; break; case Intrinsic::arm_neon_vqshifts: VShiftOpc = ARMISD::VQSHLs; break; case Intrinsic::arm_neon_vqshiftu: VShiftOpc = ARMISD::VQSHLu; break; case Intrinsic::arm_neon_vqshiftsu: VShiftOpc = ARMISD::VQSHLsu; break; case Intrinsic::arm_neon_vqshiftns: VShiftOpc = ARMISD::VQSHRNs; break; case Intrinsic::arm_neon_vqshiftnu: VShiftOpc = ARMISD::VQSHRNu; break; case Intrinsic::arm_neon_vqshiftnsu: VShiftOpc = ARMISD::VQSHRNsu; break; case Intrinsic::arm_neon_vqrshiftns: VShiftOpc = ARMISD::VQRSHRNs; break; case Intrinsic::arm_neon_vqrshiftnu: VShiftOpc = ARMISD::VQRSHRNu; break; case Intrinsic::arm_neon_vqrshiftnsu: VShiftOpc = ARMISD::VQRSHRNsu; break; } SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, N->getValueType(0), N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32)); } case Intrinsic::arm_neon_vshiftins: { EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) VShiftOpc = ARMISD::VSLI; else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) VShiftOpc = ARMISD::VSRI; else { llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, N->getValueType(0), N->getOperand(1), N->getOperand(2), DAG.getConstant(Cnt, dl, MVT::i32)); } case Intrinsic::arm_neon_vqrshifts: case Intrinsic::arm_neon_vqrshiftu: // No immediate versions of these to check for. break; } return SDValue(); } /// PerformShiftCombine - Checks for immediate versions of vector shifts and /// lowers them. As with the vector shift intrinsics, this is done during DAG /// combining instead of DAG legalizing because the build_vectors for 64-bit /// vector element shift counts are generally not legal, and it is hard to see /// their values after they get legalized to loads from a constant pool. static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. SDValue N1 = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(N1)) { SDValue N0 = N->getOperand(0); if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && DAG.MaskedValueIsZero(N0.getOperand(0), APInt::getHighBitsSet(32, 16))) return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); } } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); assert(ST->hasNEON() && "unexpected vector shift"); int64_t Cnt; switch (N->getOpcode()) { default: llvm_unreachable("unexpected shift opcode"); case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) { SDLoc dl(N); return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0), DAG.getConstant(Cnt, dl, MVT::i32)); } break; case ISD::SRA: case ISD::SRL: if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? ARMISD::VSHRs : ARMISD::VSHRu); SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), DAG.getConstant(Cnt, dl, MVT::i32)); } } return SDValue(); } /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDValue N0 = N->getOperand(0); // Check for sign- and zero-extensions of vector extract operations of 8- // and 16-bit vector elements. NEON supports these directly. They are // handled during DAG combining because type legalization will promote them // to 32-bit types and it is messy to recognize the operations after that. if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue Vec = N0.getOperand(0); SDValue Lane = N0.getOperand(1); EVT VT = N->getValueType(0); EVT EltVT = N0.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (VT == MVT::i32 && (EltVT == MVT::i8 || EltVT == MVT::i16) && TLI.isTypeLegal(Vec.getValueType()) && isa(Lane)) { unsigned Opc = 0; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode"); case ISD::SIGN_EXTEND: Opc = ARMISD::VGETLANEs; break; case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Opc = ARMISD::VGETLANEu; break; } return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); } } return SDValue(); } static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, APInt &KnownOne) { if (Op.getOpcode() == ARMISD::BFI) { // Conservatively, we can recurse down the first operand // and just mask out all affected bits. computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); // The operand to BFI is already a mask suitable for removing the bits it // sets. ConstantSDNode *CI = cast(Op.getOperand(2)); const APInt &Mask = CI->getAPIntValue(); KnownZero &= Mask; KnownOne &= Mask; return; } if (Op.getOpcode() == ARMISD::CMOV) { APInt KZ2(KnownZero.getBitWidth(), 0); APInt KO2(KnownOne.getBitWidth(), 0); computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); KnownZero &= KZ2; KnownOne &= KO2; return; } return DAG.computeKnownBits(Op, KnownZero, KnownOne); } SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) // y |= CM; // // And: // * CN is a single bit; // * All bits covered by CM are known zero in y // // Then we can convert this into a sequence of BFI instructions. This will // always be a win if CM is a single bit, will always be no worse than the // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is // three bits (due to the extra IT instruction). SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); auto CCNode = cast(CMOV->getOperand(2)); auto CC = CCNode->getAPIntValue().getLimitedValue(); SDValue CmpZ = CMOV->getOperand(4); // The compare must be against zero. if (!isNullConstant(CmpZ->getOperand(1))) return SDValue(); assert(CmpZ->getOpcode() == ARMISD::CMPZ); SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); if (!AndC || !AndC->getAPIntValue().isPowerOf2()) return SDValue(); SDValue X = And->getOperand(0); if (CC == ARMCC::EQ) { // We're performing an "equal to zero" compare. Swap the operands so we // canonicalize on a "not equal to zero" compare. std::swap(Op0, Op1); } else { assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); } if (Op1->getOpcode() != ISD::OR) return SDValue(); ConstantSDNode *OrC = dyn_cast(Op1->getOperand(1)); if (!OrC) return SDValue(); SDValue Y = Op1->getOperand(0); if (Op0 != Y) return SDValue(); // Now, is it profitable to continue? APInt OrCI = OrC->getAPIntValue(); unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; if (OrCI.countPopulation() > Heuristic) return SDValue(); // Lastly, can we determine that the bits defined by OrCI // are zero in Y? APInt KnownZero, KnownOne; computeKnownBits(DAG, Y, KnownZero, KnownOne); if ((OrCI & KnownZero) != OrCI) return SDValue(); // OK, we can do the combine. SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); unsigned BitInX = AndC->getAPIntValue().logBase2(); if (BitInX != 0) { // We must shift X first. X = DAG.getNode(ISD::SRL, dl, VT, X, DAG.getConstant(BitInX, dl, VT)); } for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); BitInY < NumActiveBits; ++BitInY) { if (OrCI[BitInY] == 0) continue; APInt Mask(VT.getSizeInBits(), 0); Mask.setBit(BitInY); V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, // Confusingly, the operand is an *inverted* mask. DAG.getConstant(~Mask, dl, VT)); } return V; } /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at NE cases. return SDValue(); EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue Chain = N->getOperand(0); SDValue BB = N->getOperand(1); SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) // -> (brcond Chain BB CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && LHS->getOperand(0)->hasOneUse()) { auto *LHS00C = dyn_cast(LHS->getOperand(0)->getOperand(0)); auto *LHS01C = dyn_cast(LHS->getOperand(0)->getOperand(1)); auto *LHS1C = dyn_cast(LHS->getOperand(1)); auto *RHSC = dyn_cast(RHS); if ((LHS00C && LHS00C->getZExtValue() == 0) && (LHS01C && LHS01C->getZExtValue() == 1) && (LHS1C && LHS1C->getZExtValue() == 1) && (RHSC && RHSC->getZExtValue() == 0)) { return DAG.getNode( ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); } } return SDValue(); } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at EQ and NE cases. return SDValue(); EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); // BFI is only available on V6T2+. if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { SDValue R = PerformCMOVToBFICombine(N, DAG); if (R) return R; } // Simplify // mov r1, r0 // cmp r1, x // mov r0, y // moveq r0, x // to // cmp r0, x // movne r0, y // // mov r1, r0 // cmp r1, x // mov r0, x // movne r0, y // to // cmp r0, x // movne r0, y /// FIXME: Turn this into a target neutral optimization? SDValue Res; if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, N->getOperand(3), Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { SDValue ARMcc; SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, N->getOperand(3), NewCmp); } // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) // -> (cmov F T CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) { auto *LHS0C = dyn_cast(LHS->getOperand(0)); auto *LHS1C = dyn_cast(LHS->getOperand(1)); auto *RHSC = dyn_cast(RHS); if ((LHS0C && LHS0C->getZExtValue() == 0) && (LHS1C && LHS1C->getZExtValue() == 1) && (RHSC && RHSC->getZExtValue() == 0)) { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, LHS->getOperand(2), LHS->getOperand(3), LHS->getOperand(4)); } } if (Res.getNode()) { APInt KnownZero, KnownOne; DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i1)); else if (KnownZero == 0xffffff00) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i8)); else if (KnownZero == 0xffff0000) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i16)); } return Res; } SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ARMISD::VDUP: return PerformVDUPCombine(N, DCI); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI.DAG, Subtarget); case ISD::FDIV: return PerformVDIVCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD1DUP: case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: case Intrinsic::arm_neon_vld4: case Intrinsic::arm_neon_vld2lane: case Intrinsic::arm_neon_vld3lane: case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: case Intrinsic::arm_neon_vst4: case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: return PerformVLDCombine(N, DCI); default: break; } break; } return SDValue(); } bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const { return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: case MVT::i16: case MVT::i32: { // Unaligned access can use (for example) LRDB, LRDH, LDR if (AllowsUnaligned) { if (Fast) *Fast = Subtarget->hasV7Ops(); return true; } return false; } case MVT::f64: case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { if (Fast) *Fast = true; return true; } return false; } } } static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck) { return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && (DstAlign == 0 || DstAlign % AlignCheck == 0)); } EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && Fast))) { return MVT::f64; } } // Lowering to i32/i16 if the size permits. if (Size >= 4) return MVT::i32; else if (Size >= 2) return MVT::i16; // Let the target-independent logic figure it out. return MVT::Other; } bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; EVT VT1 = Val.getValueType(); if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() || !VT2.isInteger()) return false; switch (VT1.getSimpleVT().SimpleTy) { default: break; case MVT::i1: case MVT::i8: case MVT::i16: // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. return true; } return false; } bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { EVT VT = ExtVal.getValueType(); if (!isTypeLegal(VT)) return false; // Don't create a loadext if we can fold the extension into a wide/long // instruction. // If there's more than one user instruction, the loadext is desirable no // matter what. There can be two uses by the same instruction. if (ExtVal->use_empty() || !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) return true; SDNode *U = *ExtVal->use_begin(); if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL)) return false; return true; } bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; if (!isTypeLegal(EVT::getEVT(Ty1))) return false; assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); // Assuming the caller doesn't have a zeroext or signext return parameter, // truncation all the way down to i1 is valid. return true; } int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { if (isLegalAddressingMode(DL, AM, Ty, AS)) { if (Subtarget->hasFPAO()) return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster return 0; } return -1; } static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; unsigned Scale = 1; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: // Scale == 1; break; case MVT::i16: // Scale == 2; Scale = 2; break; case MVT::i32: // Scale == 4; Scale = 4; break; } if ((V & (Scale - 1)) != 0) return false; V /= Scale; return V == (V & ((1LL << 5) - 1)); } static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { bool isNeg = false; if (V < 0) { isNeg = true; V = - V; } switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: // + imm12 or - imm8 if (isNeg) return V == (V & ((1LL << 8) - 1)); return V == (V & ((1LL << 12) - 1)); case MVT::f32: case MVT::f64: // Same as ARM mode. FIXME: NEON? if (!Subtarget->hasVFP2()) return false; if ((V & 3) != 0) return false; V >>= 2; return V == (V & ((1LL << 8) - 1)); } } /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { if (V == 0) return true; if (!VT.isSimple()) return false; if (Subtarget->isThumb1Only()) return isLegalT1AddressImmediate(V, VT); else if (Subtarget->isThumb2()) return isLegalT2AddressImmediate(V, VT, Subtarget); // ARM mode. if (V < 0) V = - V; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: // +- imm12 return V == (V & ((1LL << 12) - 1)); case MVT::i16: // +- imm8 return V == (V & ((1LL << 8) - 1)); case MVT::f32: case MVT::f64: if (!Subtarget->hasVFP2()) // FIXME: NEON? return false; if ((V & 3) != 0) return false; V >>= 2; return V == (V & ((1LL << 8) - 1)); } } bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const { int Scale = AM.Scale; if (Scale < 0) return false; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: if (Scale == 1) return true; // r + r << imm Scale = Scale & ~1; return Scale == 2 || Scale == 4 || Scale == 8; case MVT::i64: // r + r if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; return false; case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. if (Scale & 1) return false; return isPowerOf2_32(Scale); } } /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; // Can never fold addr of global into load/store. if (AM.BaseGV) return false; switch (AM.Scale) { case 0: // no scale reg, must be "r+i" or "r", or "i". break; case 1: if (Subtarget->isThumb1Only()) return false; LLVM_FALLTHROUGH; default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) return false; if (!VT.isSimple()) return false; if (Subtarget->isThumb2()) return isLegalT2ScaledAddressingMode(AM, VT); int Scale = AM.Scale; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: if (Scale < 0) Scale = -Scale; if (Scale == 1) return true; // r + r << imm return isPowerOf2_32(Scale & ~1); case MVT::i16: case MVT::i64: // r + r if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; return false; case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. if (Scale & 1) return false; return isPowerOf2_32(Scale); } } return true; } /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can compare /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(std::abs(Imm)) != -1; if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1; // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } /// isLegalAddImmediate - Return true if the specified immediate is a legal add /// *or sub* immediate, that is the target has add or sub instructions which can /// add a register with the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Same encoding for add/sub, just flip the sign. int64_t AbsImm = std::abs(Imm); if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(AbsImm) != -1; if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal(AbsImm) != -1; // Thumb1 only has 8-bit unsigned immediate. return AbsImm >= 0 && AbsImm <= 255; } static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { // AddressingMode 3 Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -256) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } } isInc = (Ptr->getOpcode() == ISD::ADD); Offset = Ptr->getOperand(1); return true; } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { // AddressingMode 2 if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x1000) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); Base = Ptr->getOperand(0); return true; } } if (Ptr->getOpcode() == ISD::ADD) { isInc = true; ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { Base = Ptr->getOperand(1); Offset = Ptr->getOperand(0); } else { Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); } return true; } isInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); return true; } // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. return false; } static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x100) { // 8 bits. assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. isInc = Ptr->getOpcode() == ISD::ADD; Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } } return false; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (Subtarget->isThumb1Only()) return false; EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); } else return false; bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); else isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; return true; } /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); isNonExt = !ST->isTruncatingStore(); } else return false; if (Subtarget->isThumb1Only()) { // Thumb-1 can do a limited post-inc load or store as an updating LDM. It // must be non-extending/truncating, i32, with an offset of 4. assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"); if (Op->getOpcode() != ISD::ADD || !isNonExt) return false; auto *RHS = dyn_cast(Op->getOperand(1)); if (!RHS || RHS->getZExtValue() != 4) return false; Offset = Op->getOperand(1); Base = Op->getOperand(0); AM = ISD::POST_INC; return true; } bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; if (Ptr != Base) { // Swap base ptr and offset to catch more post-index load / store when // it's legal. In Thumb2 mode, offset must be an immediate. if (Ptr == Offset && Op->getOpcode() == ISD::ADD && !Subtarget->isThumb2()) std::swap(Base, Offset); // Post-indexed load / store update the base pointer. if (Ptr != Base) return false; } AM = isInc ? ISD::POST_INC : ISD::POST_DEC; return true; } void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: // These nodes' second result is a boolean if (Op.getResNo() == 0) break; KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { default: return; case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } } } } //===----------------------------------------------------------------------===// // ARM Inline Assembly Support //===----------------------------------------------------------------------===// bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { // Looking for "rev" which is V6+. if (!Subtarget->hasV6Ops()) return false; InlineAsm *IA = cast(CI->getCalledValue()); std::string AsmStr = IA->getAsmString(); SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; case 1: AsmStr = AsmPieces[0]; AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t,"); // rev $0, $1 if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && IA->getConstraintString().compare(0, 4, "=l,l") == 0) { IntegerType *Ty = dyn_cast(CI->getType()); if (Ty && Ty->getBitWidth() == 32) return IntrinsicLowering::LowerToByteSwap(CI); } break; } return false; } const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { // At this point, we have to lower this constraint to something else, so we // lower it to an "r" or "w". However, by doing this we will force the result // to be in register, while the X constraint is much more permissive. // // Although we are correct (we are free to emit anything, without // constraints), we might break use cases that would expect us to be more // efficient and emit something else. if (!Subtarget->hasVFP2()) return "r"; if (ConstraintVT.isFloatingPoint()) return "w"; if (ConstraintVT.isVector() && Subtarget->hasNEON() && (ConstraintVT.getSizeInBits() == 64 || ConstraintVT.getSizeInBits() == 128)) return "w"; return "r"; } /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; case 'w': return C_RegisterClass; case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; case 'j': return C_Other; // Constant for movw. // An address with a single base register. Due to the way we // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { default: break; // All 'U+' constraints are addresses. case 'U': return C_Memory; } } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight ARMTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'l': if (type->isIntegerTy()) { if (Subtarget->isThumb()) weight = CW_SpecificReg; else weight = CW_Register; } break; case 'w': if (type->isFloatingPointTy()) weight = CW_Register; break; } return weight; } typedef std::pair RCPair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { case 'l': // Low regs or general regs. if (Subtarget->isThumb()) return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) return RCPair(0U, &ARM::hGPRRegClass); break; case 'r': if (Subtarget->isThumb1Only()) return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'w': if (VT == MVT::Other) break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPRRegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPRRegClass); break; case 'x': if (VT == MVT::Other) break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_8RegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPR_8RegClass); break; case 't': if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); break; } } if (StringRef("{cc}").equals_lower(Constraint)) return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'j': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': ConstantSDNode *C = dyn_cast(Op); if (!C) return; int64_t CVal64 = C->getSExtValue(); int CVal = (int) CVal64; // None of these constraints allow values larger than 32 bits. Check // that the value fits in an int. if (CVal != CVal64) return; switch (ConstraintLetter) { case 'j': // Constant suitable for movw, must be between 0 and // 65535. if (Subtarget->hasV6T2Ops()) if (CVal >= 0 && CVal <= 65535) break; return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD // immediates. if (CVal >= 0 && CVal <= 255) break; } else if (Subtarget->isThumb2()) { // A constant that can be used as an immediate value in a // data-processing instruction. if (ARM_AM::getT2SOImmVal(CVal) != -1) break; } else { // A constant that can be used as an immediate value in a // data-processing instruction. if (ARM_AM::getSOImmVal(CVal) != -1) break; } return; case 'J': if (Subtarget->isThumb1Only()) { // This must be a constant between -255 and -1, for negated ADD // immediates. This can be used in GCC with an "n" modifier that // prints the negated value, for use with SUB instructions. It is // not useful otherwise but is implemented for compatibility. if (CVal >= -255 && CVal <= -1) break; } else { // This must be a constant between -4095 and 4095. It is not clear // what this constraint is intended for. Implemented for // compatibility with GCC. if (CVal >= -4095 && CVal <= 4095) break; } return; case 'K': if (Subtarget->isThumb1Only()) { // A 32-bit value where only one byte has a nonzero value. Exclude // zero to match GCC. This constraint is used by GCC internally for // constants that can be loaded with a move/shift combination. // It is not useful otherwise but is implemented for compatibility. if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) break; } else if (Subtarget->isThumb2()) { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC // with a "B" modifier that prints the inverted value, for use with // BIC and MVN instructions. It is not useful otherwise but is // implemented for compatibility. if (ARM_AM::getT2SOImmVal(~CVal) != -1) break; } else { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC // with a "B" modifier that prints the inverted value, for use with // BIC and MVN instructions. It is not useful otherwise but is // implemented for compatibility. if (ARM_AM::getSOImmVal(~CVal) != -1) break; } return; case 'L': if (Subtarget->isThumb1Only()) { // This must be a constant between -7 and 7, // for 3-operand ADD/SUB immediate instructions. if (CVal >= -7 && CVal < 7) break; } else if (Subtarget->isThumb2()) { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" // modifier that prints the negated value, for use with SUB // instructions. It is not useful otherwise but is implemented for // compatibility. if (ARM_AM::getT2SOImmVal(-CVal) != -1) break; } else { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" // modifier that prints the negated value, for use with SUB // instructions. It is not useful otherwise but is implemented for // compatibility. if (ARM_AM::getSOImmVal(-CVal) != -1) break; } return; case 'M': if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between 0 and 1020, for // ADD sp + immediate. if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) break; } else { // A power of two or a constant between 0 and 32. This is used in // GCC for the shift amount on shifted register operands, but it is // useful in general for any shift amounts. if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) break; } return; case 'N': if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a constant between 0 and 31, for shift amounts. if (CVal >= 0 && CVal <= 31) break; } return; case 'O': if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a multiple of 4 between -508 and 508, for // ADD/SUB sp = sp + immediate. if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) break; } return; } Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType()); break; } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } static RTLIB::Libcall getDivRemLibcall( const SDNode *N, MVT::SimpleValueType SVT) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"); bool isSigned = N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::SREM; RTLIB::Libcall LC; switch (SVT) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } return LC; } static TargetLowering::ArgListTy getDivRemArgList( const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"); bool isSigned = N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::SREM; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { EVT ArgVT = N->getOperand(i).getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*Context); Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } if (Subtarget->isTargetWindows() && Args.size() >= 2) std::swap(Args[0], Args[1]); return Args; } SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"); bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); SDLoc dl(Op); // If the target has hardware divide, use divide + multiply + subtract: // div = a / b // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); const SDValue Divisor = Op->getOperand(1); SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor); SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor); SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); SDValue Values[2] = {Div, Rem}; return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values); } RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(), VT.getSimpleVT().SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(), DAG.getContext(), Subtarget); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } // Lowers REM using divmod helpers // see RTABI section 4.2/4.3 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { // Build return types (div and rem) std::vector RetTyParams; Type *RetTyElement; switch (N->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break; case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break; case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break; case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break; } RetTyParams.push_back(RetTyElement); RetTyParams.push_back(RetTyElement); ArrayRef ret = ArrayRef(RetTyParams); Type *RetTy = StructType::get(*DAG.getContext(), ret); RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(), Subtarget); bool isSigned = N->getOpcode() == ISD::SREM; SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, N, InChain); // Lower call CallLoweringInfo CLI(DAG); CLI.setChain(InChain) .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); std::pair CallResult = LowerCallTo(CLI); // Return second (rem) result operand (first contains div) SDNode *ResNode = CallResult.first.getNode(); assert(ResNode->getNumOperands() == 2 && "divmod should return two operands"); return ResNode->getOperand(1); } SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "unsupported target platform"); SDLoc DL(Op); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, DAG.getConstant(2, DL, MVT::i32)); SDValue Flag; Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); Chain = NewSP.getValue(1); SDValue Ops[2] = { NewSP, Chain }; return DAG.getMergeValues(Ops, DL); } SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && "Unexpected type for custom-lowering FP_EXTEND"); RTLIB::Libcall LC; LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOperand(0).getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && "Unexpected type for custom-lowering FP_ROUND"); RTLIB::Libcall LC; LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. return false; } bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) return false; // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's return isShiftedMask_32(~v); } /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!Subtarget->hasVFP3()) return false; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: case Intrinsic::arm_neon_vld4: case Intrinsic::arm_neon_vld2lane: case Intrinsic::arm_neon_vld3lane: case Intrinsic::arm_neon_vld4lane: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); Info.vol = false; // volatile loads with NEON intrinsics not supported Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: case Intrinsic::arm_neon_vst4: case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); Info.vol = false; // volatile stores with NEON intrinsics not supported Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; } default: break; } return false; } /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); if (Bits == 0 || Bits > 32) return false; return true; } bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; return (Index == 0 || Index == ResVT.getVectorNumElements()); } Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); // First, if the target has no DMB, see what fallback we can use. if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(7), Builder.getInt32(10), Builder.getInt32(5)}; return Builder.CreateCall(MCR, args); } else { // Instead of using barriers, atomic accesses on these subtargets use // libcalls. llvm_unreachable("makeDMB on a target so old that it has no barriers"); } } else { Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); // Only a full system barrier exists in the M-class architectures. Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; Constant *CDomain = Builder.getInt32(Domain); return Builder.CreateCall(DMB, CDomain); } } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/non-atomic"); case AtomicOrdering::Monotonic: case AtomicOrdering::Acquire: return nullptr; // Nothing to do case AtomicOrdering::SequentiallyConsistent: if (!IsStore) return nullptr; // Nothing to do /*FALLTHROUGH*/ case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/not-atomic"); case AtomicOrdering::Monotonic: case AtomicOrdering::Release: return nullptr; // Nothing to do case AtomicOrdering::Acquire: case AtomicOrdering::AcquireRelease: case AtomicOrdering::SequentiallyConsistent: return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); return (Size == 64) && !Subtarget->isMClass(); } // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that // guarantee, see DDI0406C ARM architecture reference manual, // sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. bool hasAtomicCmpXchg = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg; } bool ARMTargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { return InsertFencesForAtomic; } // This has so far only been implemented for MachO. bool ARMTargetLowering::useLoadStackGuardNode() const { return Subtarget->isTargetMachO(); } bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const { // If we do not have NEON, vector types are not natively supported. if (!Subtarget->hasNEON()) return false; // Floating point values and vector values map to the same register file. // Therefore, although we could do a store extract of a vector type, this is // better to leave at float as we have more freedom in the addressing mode for // those. if (VectorTy->isFPOrFPVectorTy()) return false; // If the index is unknown at compile time, this is very expensive to lower // and it is not possible to combine the store with the extract. if (!isa(Idx)) return false; assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); unsigned BitWidth = cast(VectorTy)->getBitWidth(); // We can do a store + vector extract on any vector that fits perfectly in a D // or Q register. if (BitWidth == 64 || BitWidth == 128) { Cost = 0; return true; } return false; } bool ARMTargetLowering::isCheapToSpeculateCttz() const { return Subtarget->hasV6T2Ops(); } bool ARMTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget->hasV6T2Ops(); } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = isAcquireOrStronger(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a // single i64 here. if (ValTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); return Builder.CreateOr( Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); } Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldrex, Addr), cast(Addr->getType())->getElementType()); } void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilder<> &Builder) const { if (!Subtarget->hasV7Ops()) return; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); } Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); bool IsRelease = isReleaseOrStronger(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form // before the call. if (Val->getType()->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; Function *Strex = Intrinsic::getDeclaration(M, Int); Type *Int32Ty = Type::getInt32Ty(M->getContext()); Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall(Strex, {Lo, Hi, Addr}); } Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; Type *Tys[] = { Addr->getType() }; Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateCall( Strex, {Builder.CreateZExtOrBitCast( Val, Strex->getFunctionType()->getParamType(0)), Addr}); } /// \brief Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements /// /// Into: /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4) /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1 bool ARMTargetLowering::lowerInterleavedLoad( LoadInst *LI, ArrayRef Shuffles, ArrayRef Indices, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); assert(!Shuffles.empty() && "Empty shufflevector input"); assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); VectorType *VecTy = Shuffles[0]->getType(); Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); unsigned VecSize = DL.getTypeSizeInBits(VecTy); bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vldN doesn't support i64/f64 elements). if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; IRBuilder<> Builder(LI); SmallVector Ops; Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); Ops.push_back(Builder.getInt32(LI->getAlignment())); Type *Tys[] = { VecTy, Int8Ptr }; Function *VldnFunc = Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); // Replace uses of each shufflevector with the corresponding vector loaded // by ldN. for (unsigned i = 0; i < Shuffles.size(); i++) { ShuffleVectorInst *SV = Shuffles[i]; unsigned Index = Indices[i]; Value *SubVec = Builder.CreateExtractValue(VldN, Index); // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); SV->replaceAllUsesWith(SubVec); } return true; } /// \brief Get a mask consisting of sequential integers starting from \p Start. /// /// I.e. static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts) { SmallVector Mask; for (unsigned i = 0; i < NumElts; i++) Mask.push_back(Builder.getInt32(Start + i)); return ConstantVector::get(Mask); } /// \brief Lower an interleaved store into a vstN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4 /// /// Into: /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) /// /// Note that the new shufflevectors will be removed and we'll only generate one /// vst3 instruction in CodeGen. /// /// Example for a more general valid mask (Factor 3). Lower: /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1, /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> /// store <12 x i32> %i.vec, <12 x i32>* %ptr /// /// Into: /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7> /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); VectorType *VecTy = SVI->getType(); assert(VecTy->getVectorNumElements() % Factor == 0 && "Invalid interleaved store"); unsigned LaneLen = VecTy->getVectorNumElements() / Factor; Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vstN doesn't support i64/f64 elements). if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) return false; Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { Type *IntTy = DL.getIntPtrType(EltTy); // Convert to the corresponding integer vector. Type *IntVecTy = VectorType::get(IntTy, Op0->getType()->getVectorNumElements()); Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); SubVecTy = VectorType::get(IntTy, LaneLen); } static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; SmallVector Ops; Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); Type *Tys[] = { Int8Ptr, SubVecTy }; Function *VstNFunc = Intrinsic::getDeclaration( SI->getModule(), StoreInts[Factor - 2], Tys); // Split the shufflevector operands into sub vectors for the new vstN call. auto Mask = SVI->getShuffleMask(); for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { if (Mask[j*Factor + i] >= 0) { StartMask = Mask[j*Factor + i] - j; break; } } // Note: If all elements in a chunk are undefs, StartMask=0! // Note: Filling undef gaps with random elements is ok, since // those elements were being written anyway (with undefs). // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); } } Ops.push_back(Builder.getInt32(SI->getAlignment())); Builder.CreateCall(VstNFunc, Ops); return true; } enum HABaseType { HA_UNKNOWN = 0, HA_FLOAT, HA_DOUBLE, HA_VECT64, HA_VECT128 }; static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members) { if (auto *ST = dyn_cast(Ty)) { for (unsigned i = 0; i < ST->getNumElements(); ++i) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) return false; Members += SubMembers; } } else if (auto *AT = dyn_cast(Ty)) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) return false; Members += SubMembers * AT->getNumElements(); } else if (Ty->isFloatTy()) { if (Base != HA_UNKNOWN && Base != HA_FLOAT) return false; Members = 1; Base = HA_FLOAT; } else if (Ty->isDoubleTy()) { if (Base != HA_UNKNOWN && Base != HA_DOUBLE) return false; Members = 1; Base = HA_DOUBLE; } else if (auto *VT = dyn_cast(Ty)) { Members = 1; switch (Base) { case HA_FLOAT: case HA_DOUBLE: return false; case HA_VECT64: return VT->getBitWidth() == 64; case HA_VECT128: return VT->getBitWidth() == 128; case HA_UNKNOWN: switch (VT->getBitWidth()) { case 64: Base = HA_VECT64; return true; case 128: Base = HA_VECT128; return true; default: return false; } } } return (Members > 0 && Members <= 4); } /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when /// passing according to AAPCS rules. bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { if (getEffectiveCallingConv(CallConv, isVarArg) != CallingConv::ARM_AAPCS_VFP) return false; HABaseType Base = HA_UNKNOWN; uint64_t Members = 0; bool IsHA = isHomogeneousAggregate(Ty, Base, Members); DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); return IsHA || IsIntArray; } unsigned ARMTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0; } unsigned ARMTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; } void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in ARMFunctionInfo. ARMFunctionInfo *AFI = Entry->getParent()->getInfo(); AFI->setIsSplitCSR(true); } void ARMTargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (ARM::GPRRegClass.contains(*I)) RC = &ARM::GPRRegClass; else if (ARM::DPRRegClass.contains(*I)) RC = &ARM::DPRRegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction()->hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } Index: projects/clang400-import/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (revision 313894) @@ -1,2799 +1,2808 @@ //===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of AddressSanitizer, an address sanity checker. // Details of the algorithm: // http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm // //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "asan" static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; static const uint64_t kDynamicShadowSentinel = ~(uint64_t)0; static const uint64_t kIOSShadowOffset32 = 1ULL << 30; static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30; static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64; static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G. static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000; static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; // The shadow memory space is dynamically allocated. static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel; static const size_t kMinStackMallocSize = 1 << 6; // 64B static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; static const uint64_t kAsanCtorAndDtorPriority = 1; static const char *const kAsanReportErrorTemplate = "__asan_report_"; static const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *const kAsanRegisterImageGlobalsName = "__asan_register_image_globals"; static const char *const kAsanUnregisterImageGlobalsName = "__asan_unregister_image_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init"; static const char *const kAsanVersionCheckName = "__asan_version_mismatch_check_v8"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; static const int kMaxAsanStackMallocSizeClass = 10; static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_"; static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_"; static const char *const kAsanGenPrefix = "__asan_gen_"; static const char *const kODRGenPrefix = "__odr_asan_gen_"; static const char *const kSanCovGenPrefix = "__sancov_gen_"; static const char *const kAsanSetShadowPrefix = "__asan_set_shadow_"; static const char *const kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; static const char *const kAsanUnpoisonStackMemoryName = "__asan_unpoison_stack_memory"; static const char *const kAsanGlobalsRegisteredFlagName = "__asan_globals_registered"; static const char *const kAsanOptionDetectUseAfterReturn = "__asan_option_detect_stack_use_after_return"; static const char *const kAsanShadowMemoryDynamicAddress = "__asan_shadow_memory_dynamic_address"; static const char *const kAsanAllocaPoison = "__asan_alloca_poison"; static const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison"; // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; static const unsigned kAllocaRzSize = 32; // Command-line flags. static cl::opt ClEnableKasan( "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"), cl::Hidden, cl::init(false)); static cl::opt ClRecover( "asan-recover", cl::desc("Enable recovery mode (continue-after-error)."), cl::Hidden, cl::init(false)); // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt ClInstrumentReads("asan-instrument-reads", cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); static cl::opt ClInstrumentWrites( "asan-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); static cl::opt ClInstrumentAtomics( "asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); static cl::opt ClAlwaysSlowPath( "asan-always-slow-path", cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, cl::init(false)); static cl::opt ClForceDynamicShadow( "asan-force-dynamic-shadow", cl::desc("Load shadow address into a local variable for each function"), cl::Hidden, cl::init(false)); // This flag limits the number of instructions to be instrumented // in any given BB. Normally, this should be set to unlimited (INT_MAX), // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary // set it to 10000. static cl::opt ClMaxInsnsToInstrumentPerBB( "asan-max-ins-per-bb", cl::init(10000), cl::desc("maximal number of instructions to instrument in any given BB"), cl::Hidden); // This flag may need to be replaced with -f[no]asan-stack. static cl::opt ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); static cl::opt ClMaxInlinePoisoningSize( "asan-max-inline-poisoning-size", cl::desc( "Inline shadow poisoning for blocks up to the given size in bytes."), cl::Hidden, cl::init(64)); static cl::opt ClUseAfterReturn("asan-use-after-return", cl::desc("Check stack-use-after-return"), cl::Hidden, cl::init(true)); static cl::opt ClUseAfterScope("asan-use-after-scope", cl::desc("Check stack-use-after-scope"), cl::Hidden, cl::init(false)); // This flag may need to be replaced with -f[no]asan-globals. static cl::opt ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); static cl::opt ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true)); static cl::opt ClInvalidPointerPairs( "asan-detect-invalid-pointer-pair", cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, cl::init(false)); static cl::opt ClRealignStack( "asan-realign-stack", cl::desc("Realign stack to the value of this flag (power of two)"), cl::Hidden, cl::init(32)); static cl::opt ClInstrumentationWithCallsThreshold( "asan-instrumentation-with-call-threshold", cl::desc( "If the function being instrumented contains more than " "this number of memory accesses, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(7000)); static cl::opt ClMemoryAccessCallbackPrefix( "asan-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, cl::init("__asan_")); static cl::opt ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas", cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(true)); static cl::opt ClSkipPromotableAllocas( "asan-skip-promotable-allocas", cl::desc("Do not instrument promotable allocas"), cl::Hidden, cl::init(true)); // These flags allow to change the shadow mapping. // The shadow mapping looks like // Shadow = (Mem >> scale) + offset static cl::opt ClMappingScale("asan-mapping-scale", cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); static cl::opt ClMappingOffset( "asan-mapping-offset", cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden, cl::init(0)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. static cl::opt ClOpt("asan-opt", cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true)); static cl::opt ClOptSameTemp( "asan-opt-same-temp", cl::desc("Instrument the same temp just once"), cl::Hidden, cl::init(true)); static cl::opt ClOptGlobals("asan-opt-globals", cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); static cl::opt ClOptStack( "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"), cl::Hidden, cl::init(false)); static cl::opt ClDynamicAllocaStack( "asan-stack-dynamic-alloca", cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden, cl::init(true)); static cl::opt ClForceExperiment( "asan-force-experiment", cl::desc("Force optimization experiment (for testing)"), cl::Hidden, cl::init(0)); static cl::opt ClUsePrivateAliasForGlobals("asan-use-private-alias", cl::desc("Use private aliases for global" " variables"), cl::Hidden, cl::init(false)); static cl::opt ClUseMachOGlobalsSection("asan-globals-live-support", cl::desc("Use linker features to support dead " "code stripping of globals " "(Mach-O only)"), cl::Hidden, cl::init(true)); // Debug flags. static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); static cl::opt ClDebugStack("asan-debug-stack", cl::desc("debug stack"), cl::Hidden, cl::init(0)); static cl::opt ClDebugFunc("asan-debug-func", cl::Hidden, cl::desc("Debug func")); static cl::opt ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1)); static cl::opt ClDebugMax("asan-debug-max", cl::desc("Debug max inst"), cl::Hidden, cl::init(-1)); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumOptimizedAccessesToGlobalVar, "Number of optimized accesses to global vars"); STATISTIC(NumOptimizedAccessesToStackVar, "Number of optimized accesses to stack vars"); namespace { /// Frontend-provided metadata for source location. struct LocationMetadata { StringRef Filename; int LineNo; int ColumnNo; LocationMetadata() : Filename(), LineNo(0), ColumnNo(0) {} bool empty() const { return Filename.empty(); } void parse(MDNode *MDN) { assert(MDN->getNumOperands() == 3); MDString *DIFilename = cast(MDN->getOperand(0)); Filename = DIFilename->getString(); LineNo = mdconst::extract(MDN->getOperand(1))->getLimitedValue(); ColumnNo = mdconst::extract(MDN->getOperand(2))->getLimitedValue(); } }; /// Frontend-provided metadata for global variables. class GlobalsMetadata { public: struct Entry { Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {} LocationMetadata SourceLoc; StringRef Name; bool IsDynInit; bool IsBlacklisted; }; GlobalsMetadata() : inited_(false) {} void reset() { inited_ = false; Entries.clear(); } void init(Module &M) { assert(!inited_); inited_ = true; NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); if (!Globals) return; for (auto MDN : Globals->operands()) { // Metadata node contains the global and the fields of "Entry". assert(MDN->getNumOperands() == 5); auto *GV = mdconst::extract_or_null(MDN->getOperand(0)); // The optimizer may optimize away a global entirely. if (!GV) continue; // We can already have an entry for GV if it was merged with another // global. Entry &E = Entries[GV]; if (auto *Loc = cast_or_null(MDN->getOperand(1))) E.SourceLoc.parse(Loc); if (auto *Name = cast_or_null(MDN->getOperand(2))) E.Name = Name->getString(); ConstantInt *IsDynInit = mdconst::extract(MDN->getOperand(3)); E.IsDynInit |= IsDynInit->isOne(); ConstantInt *IsBlacklisted = mdconst::extract(MDN->getOperand(4)); E.IsBlacklisted |= IsBlacklisted->isOne(); } } /// Returns metadata entry for a given global. Entry get(GlobalVariable *G) const { auto Pos = Entries.find(G); return (Pos != Entries.end()) ? Pos->second : Entry(); } private: bool inited_; DenseMap Entries; }; /// This struct defines the shadow mapping using the rule: /// shadow = (mem >> Scale) ADD-or-OR Offset. struct ShadowMapping { int Scale; uint64_t Offset; bool OrShadowOffset; }; static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsKasan) { bool IsAndroid = TargetTriple.isAndroid(); bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); bool IsLinux = TargetTriple.isOSLinux(); bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || TargetTriple.getArch() == llvm::Triple::ppc64le; bool IsSystemZ = TargetTriple.getArch() == llvm::Triple::systemz; bool IsX86 = TargetTriple.getArch() == llvm::Triple::x86; bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips || TargetTriple.getArch() == llvm::Triple::mipsel; bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el; bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64; bool IsWindows = TargetTriple.isOSWindows(); ShadowMapping Mapping; if (LongSize == 32) { // Android is always PIE, which means that the beginning of the address // space is always available. if (IsAndroid) Mapping.Offset = 0; else if (IsMIPS32) Mapping.Offset = kMIPS32_ShadowOffset32; else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset32; else if (IsIOS) // If we're targeting iOS and x86, the binary is built for iOS simulator. Mapping.Offset = IsX86 ? kIOSSimShadowOffset32 : kIOSShadowOffset32; else if (IsWindows) Mapping.Offset = kWindowsShadowOffset32; else Mapping.Offset = kDefaultShadowOffset32; } else { // LongSize == 64 if (IsPPC64) Mapping.Offset = kPPC64_ShadowOffset64; else if (IsSystemZ) Mapping.Offset = kSystemZ_ShadowOffset64; else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset64; else if (IsLinux && IsX86_64) { if (IsKasan) Mapping.Offset = kLinuxKasan_ShadowOffset64; else Mapping.Offset = kSmallX86_64ShadowOffset; } else if (IsWindows && IsX86_64) { Mapping.Offset = kWindowsShadowOffset64; } else if (IsMIPS64) Mapping.Offset = kMIPS64_ShadowOffset64; else if (IsIOS) // If we're targeting iOS and x86, the binary is built for iOS simulator. // We are using dynamic shadow offset on the 64-bit devices. Mapping.Offset = IsX86_64 ? kIOSSimShadowOffset64 : kDynamicShadowSentinel; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; else Mapping.Offset = kDefaultShadowOffset64; } if (ClForceDynamicShadow) { Mapping.Offset = kDynamicShadowSentinel; } Mapping.Scale = kDefaultShadowScale; if (ClMappingScale.getNumOccurrences() > 0) { Mapping.Scale = ClMappingScale; } if (ClMappingOffset.getNumOccurrences() > 0) { Mapping.Offset = ClMappingOffset; } // OR-ing shadow offset if more efficient (at least on x86) if the offset // is a power of two, but on ppc64 we have to use add since the shadow // offset is not necessary 1/8-th of the address space. On SystemZ, // we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; return Mapping; } static size_t RedzoneSizeForScale(int MappingScale) { // Redzone used for stack and globals is at least 32 bytes. // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. return std::max(32U, 1U << MappingScale); } /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false) : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan), Recover(Recover || ClRecover), UseAfterScope(UseAfterScope || ClUseAfterScope), LocalDynamicShadow(nullptr) { initializeAddressSanitizerPass(*PassRegistry::getPassRegistry()); } StringRef getPassName() const override { return "AddressSanitizerFunctionPass"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); } uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const { uint64_t ArraySize = 1; if (AI.isArrayAllocation()) { const ConstantInt *CI = dyn_cast(AI.getArraySize()); assert(CI && "non-constant array size"); ArraySize = CI->getZExtValue(); } Type *Ty = AI.getAllocatedType(); uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty); return SizeInBytes * ArraySize; } /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(const AllocaInst &AI); /// If it is an interesting memory access, return the PointerOperand /// and set IsWrite/Alignment. Otherwise return nullptr. /// MaybeMask is an output parameter for the mask Value, if we're looking at a /// masked load/store. Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, unsigned *Alignment, Value **MaybeMask = nullptr); void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, bool UseCalls, const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp); void instrumentUnusualSizeOrAlignment(Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument, uint32_t Exp); void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; bool maybeInsertAsanInitAtFunctionEntry(Function &F); void maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; static char ID; // Pass identification, replacement for typeid DominatorTree &getDominatorTree() const { return *DT; } private: void initializeCallbacks(Module &M); bool LooksLikeCodeInBug11395(Instruction *I); bool GlobalIsLinkerInitialized(GlobalVariable *G); bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, uint64_t TypeSize) const; /// Helper to cleanup per-function state. struct FunctionStateRAII { AddressSanitizer *Pass; FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) { assert(Pass->ProcessedAllocas.empty() && "last pass forgot to clear cache"); assert(!Pass->LocalDynamicShadow); } ~FunctionStateRAII() { Pass->LocalDynamicShadow = nullptr; Pass->ProcessedAllocas.clear(); } }; LLVMContext *C; Triple TargetTriple; int LongSize; bool CompileKernel; bool Recover; bool UseAfterScope; Type *IntptrTy; ShadowMapping Mapping; DominatorTree *DT; Function *AsanCtorFunction = nullptr; Function *AsanInitFunction = nullptr; Function *AsanHandleNoReturnFunc; Function *AsanPtrCmpFunction, *AsanPtrSubFunction; // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize). Function *AsanErrorCallback[2][2][kNumberOfAccessSizes]; Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; // This array is indexed by AccessIsWrite and Experiment. Function *AsanErrorCallbackSized[2][2]; Function *AsanMemoryAccessCallbackSized[2][2]; Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; Value *LocalDynamicShadow; GlobalsMetadata GlobalsMD; DenseMap ProcessedAllocas; friend struct FunctionStackPoisoner; }; class AddressSanitizerModule : public ModulePass { public: explicit AddressSanitizerModule(bool CompileKernel = false, bool Recover = false) : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), Recover(Recover || ClRecover) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } private: void initializeCallbacks(Module &M); bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers); void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers); void InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers); GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName); void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); IRBuilder<> CreateAsanModuleDtor(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); bool ShouldUseMachOGlobalsSection() const; StringRef getGlobalMetadataSection() const; void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t MinRedzoneSizeForGlobal() const { return RedzoneSizeForScale(Mapping.Scale); } GlobalsMetadata GlobalsMD; bool CompileKernel; bool Recover; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; ShadowMapping Mapping; Function *AsanPoisonGlobals; Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; Function *AsanUnregisterGlobals; Function *AsanRegisterImageGlobals; Function *AsanUnregisterImageGlobals; }; // Stack poisoning does not play well with exception handling. // When an exception is thrown, we essentially bypass the code // that unpoisones the stack. This is why the run-time library has // to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire // stack in the interceptor. This however does not work inside the // actual function which catches the exception. Most likely because the // compiler hoists the load of the shadow value somewhere too high. // This causes asan to report a non-existing bug on 453.povray. // It sounds like an LLVM bug. struct FunctionStackPoisoner : public InstVisitor { Function &F; AddressSanitizer &ASan; DIBuilder DIB; LLVMContext *C; Type *IntptrTy; Type *IntptrPtrTy; ShadowMapping Mapping; SmallVector AllocaVec; SmallVector StaticAllocasToMoveUp; SmallVector RetVec; unsigned StackAlignment; Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; Function *AsanSetShadowFunc[0x100] = {}; Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc; Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc; // Stores a place and arguments of poisoning/unpoisoning call for alloca. struct AllocaPoisonCall { IntrinsicInst *InsBefore; AllocaInst *AI; uint64_t Size; bool DoPoison; }; SmallVector DynamicAllocaPoisonCallVec; SmallVector StaticAllocaPoisonCallVec; SmallVector DynamicAllocaVec; SmallVector StackRestoreVec; AllocaInst *DynamicAllocaLayout = nullptr; IntrinsicInst *LocalEscapeCall = nullptr; // Maps Value to an AllocaInst from which the Value is originated. typedef DenseMap AllocaForValueMapTy; AllocaForValueMapTy AllocaForValue; bool HasNonEmptyInlineAsm = false; bool HasReturnsTwiceCall = false; std::unique_ptr EmptyInlineAsm; FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), StackAlignment(1 << Mapping.Scale), EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {} bool runOnFunction() { if (!ClStack) return false; // Collect alloca, ret, lifetime instructions etc. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false; initializeCallbacks(*F.getParent()); processDynamicAllocas(); processStaticAllocas(); if (ClDebugStack) { DEBUG(dbgs() << F); } return true; } // Finds all Alloca instructions and puts // poisoned red zones around all of them. // Then unpoison everything back before the function returns. void processStaticAllocas(); void processDynamicAllocas(); void createDynamicAllocasInitStorage(); // ----------------------- Visitors. /// \brief Collect all Ret instructions. void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); } /// \brief Collect all Resume instructions. void visitResumeInst(ResumeInst &RI) { RetVec.push_back(&RI); } /// \brief Collect all CatchReturnInst instructions. void visitCleanupReturnInst(CleanupReturnInst &CRI) { RetVec.push_back(&CRI); } void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, Value *SavedStack) { IRBuilder<> IRB(InstBefore); Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy); // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we // need to adjust extracted SP to compute the address of the most recent // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for // this purpose. if (!isa(InstBefore)) { Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration( InstBefore->getModule(), Intrinsic::get_dynamic_area_offset, {IntptrTy}); Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {}); DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy), DynamicAreaOffset); } IRB.CreateCall(AsanAllocasUnpoisonFunc, {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr}); } // Unpoison dynamic allocas redzones. void unpoisonDynamicAllocas() { for (auto &Ret : RetVec) unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout); for (auto &StackRestoreInst : StackRestoreVec) unpoisonDynamicAllocasBeforeInst(StackRestoreInst, StackRestoreInst->getOperand(0)); } // Deploy and poison redzones around dynamic alloca call. To do this, we // should replace this call with another one with changed parameters and // replace all its uses with new address, so // addr = alloca type, old_size, align // is replaced by // new_size = (old_size + additional_size) * sizeof(type) // tmp = alloca i8, new_size, max(align, 32) // addr = tmp + 32 (first 32 bytes are for the left redzone). // Additional_size is added to make new memory allocation contain not only // requested memory, but also left, partial and right redzones. void handleDynamicAllocaCall(AllocaInst *AI); /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { if (!ASan.isInterestingAlloca(AI)) { if (AI.isStaticAlloca()) { // Skip over allocas that are present *before* the first instrumented // alloca, we don't want to move those around. if (AllocaVec.empty()) return; StaticAllocasToMoveUp.push_back(&AI); } return; } StackAlignment = std::max(StackAlignment, AI.getAlignment()); if (!AI.isStaticAlloca()) DynamicAllocaVec.push_back(&AI); else AllocaVec.push_back(&AI); } /// \brief Collect lifetime intrinsic calls to check for use-after-scope /// errors. void visitIntrinsicInst(IntrinsicInst &II) { Intrinsic::ID ID = II.getIntrinsicID(); if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II); if (ID == Intrinsic::localescape) LocalEscapeCall = &II; if (!ASan.UseAfterScope) return; if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. ConstantInt *Size = dyn_cast(II.getArgOperand(0)); // If size argument is undefined, don't do anything. if (Size->isMinusOne()) return; // Check that size doesn't saturate uint64_t and can // be stored in IntptrTy. const uint64_t SizeValue = Size->getValue().getLimitedValue(); if (SizeValue == ~0ULL || !ConstantInt::isValueValidForType(IntptrTy, SizeValue)) return; // Find alloca instruction that corresponds to llvm.lifetime argument. AllocaInst *AI = findAllocaForValue(II.getArgOperand(1)); if (!AI || !ASan.isInterestingAlloca(*AI)) return; bool DoPoison = (ID == Intrinsic::lifetime_end); AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison}; if (AI->isStaticAlloca()) StaticAllocaPoisonCallVec.push_back(APC); else if (ClInstrumentDynamicAllocas) DynamicAllocaPoisonCallVec.push_back(APC); } void visitCallSite(CallSite CS) { Instruction *I = CS.getInstruction(); if (CallInst *CI = dyn_cast(I)) { HasNonEmptyInlineAsm |= CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get()); HasReturnsTwiceCall |= CI->canReturnTwice(); } } // ---------------------- Helpers. void initializeCallbacks(Module &M); bool doesDominateAllExits(const Instruction *I) const { for (auto Ret : RetVec) { if (!ASan.getDominatorTree().dominates(I, Ret)) return false; } return true; } /// Finds alloca where the value comes from. AllocaInst *findAllocaForValue(Value *V); // Copies bytes from ShadowBytes into shadow memory for indexes where // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that // ShadowBytes[i] is constantly zero and doesn't need to be overwritten. void copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, IRBuilder<> &IRB, Value *ShadowBase); void copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, IRBuilder<> &IRB, Value *ShadowBase); void copyToShadowInline(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, IRBuilder<> &IRB, Value *ShadowBase); void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison); Value *createAllocaForLayout(IRBuilder<> &IRB, const ASanStackFrameLayout &L, bool Dynamic); PHINode *createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, Instruction *ThenTerm, Value *ValueIfFalse); }; } // anonymous namespace char AddressSanitizer::ID = 0; INITIALIZE_PASS_BEGIN( AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, bool Recover, bool UseAfterScope) { assert(!CompileKernel || Recover); return new AddressSanitizer(CompileKernel, Recover, UseAfterScope); } char AddressSanitizerModule::ID = 0; INITIALIZE_PASS( AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, bool Recover) { assert(!CompileKernel || Recover); return new AddressSanitizerModule(CompileKernel, Recover); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { size_t Res = countTrailingZeros(TypeSize / 8); assert(Res < kNumberOfAccessSizes); return Res; } // \brief Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, bool AllowMerging) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); // We use private linkage for module-local strings. If they can be merged // with another one, we set the unnamed_addr attribute. GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(1); // Strings may not be merged w/o setting align 1. return GV; } /// \brief Create a global describing a source location. static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M, LocationMetadata MD) { Constant *LocData[] = { createPrivateGlobalForString(M, MD.Filename, true), ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo), ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo), }; auto LocStruct = ConstantStruct::getAnon(LocData); auto GV = new GlobalVariable(M, LocStruct->getType(), true, GlobalValue::PrivateLinkage, LocStruct, kAsanGenPrefix); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); return GV; } /// \brief Check if \p G has been created by a trusted compiler pass. static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { // Do not instrument asan globals. if (G->getName().startswith(kAsanGenPrefix) || G->getName().startswith(kSanCovGenPrefix) || G->getName().startswith(kODRGenPrefix)) return true; // Do not instrument gcov counter arrays. if (G->getName() == "__llvm_gcov_ctr") return true; return false; } Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); if (Mapping.Offset == 0) return Shadow; // (Shadow >> scale) | offset Value *ShadowBase; if (LocalDynamicShadow) ShadowBase = LocalDynamicShadow; else ShadowBase = ConstantInt::get(IntptrTy, Mapping.Offset); if (Mapping.OrShadowOffset) return IRB.CreateOr(Shadow, ShadowBase); else return IRB.CreateAdd(Shadow, ShadowBase); } // Instrument memset/memmove/memcpy void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { IRBuilder<> IRB(MI); if (isa(MI)) { IRB.CreateCall( isa(MI) ? AsanMemmove : AsanMemcpy, {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); } else if (isa(MI)) { IRB.CreateCall( AsanMemset, {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); } MI->eraseFromParent(); } /// Check if we want (and can) handle this alloca. bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI); if (PreviouslySeenAllocaInfo != ProcessedAllocas.end()) return PreviouslySeenAllocaInfo->getSecond(); bool IsInteresting = (AI.getAllocatedType()->isSized() && // alloca() may be called with 0 size, ignore it. ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(AI) > 0) && // We are only interested in allocas not promotable to registers. // Promotable allocas are common under -O0. (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. - !AI.isUsedWithInAlloca()); + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError()); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; } Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, unsigned *Alignment, Value **MaybeMask) { // Skip memory accesses inserted by another instrumentation. if (I->getMetadata("nosanitize")) return nullptr; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) return nullptr; Value *PtrOperand = nullptr; const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast(I)) { if (!ClInstrumentReads) return nullptr; *IsWrite = false; *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); *Alignment = LI->getAlignment(); PtrOperand = LI->getPointerOperand(); } else if (StoreInst *SI = dyn_cast(I)) { if (!ClInstrumentWrites) return nullptr; *IsWrite = true; *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); *Alignment = SI->getAlignment(); PtrOperand = SI->getPointerOperand(); } else if (AtomicRMWInst *RMW = dyn_cast(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); *Alignment = 0; PtrOperand = RMW->getPointerOperand(); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); *Alignment = 0; PtrOperand = XCHG->getPointerOperand(); } else if (auto CI = dyn_cast(I)) { auto *F = dyn_cast(CI->getCalledValue()); if (F && (F->getName().startswith("llvm.masked.load.") || F->getName().startswith("llvm.masked.store."))) { unsigned OpOffset = 0; if (F->getName().startswith("llvm.masked.store.")) { if (!ClInstrumentWrites) return nullptr; // Masked store has an initial operand for the value. OpOffset = 1; *IsWrite = true; } else { if (!ClInstrumentReads) return nullptr; *IsWrite = false; } auto BasePtr = CI->getOperand(0 + OpOffset); auto Ty = cast(BasePtr->getType())->getElementType(); *TypeSize = DL.getTypeStoreSizeInBits(Ty); if (auto AlignmentConstant = dyn_cast(CI->getOperand(1 + OpOffset))) *Alignment = (unsigned)AlignmentConstant->getZExtValue(); else *Alignment = 1; // No alignment guarantees. We probably got Undef if (MaybeMask) *MaybeMask = CI->getOperand(2 + OpOffset); PtrOperand = BasePtr; } } - // Do not instrument acesses from different address spaces; we cannot deal - // with them. if (PtrOperand) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. Type *PtrTy = cast(PtrOperand->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0) + return nullptr; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (PtrOperand->isSwiftError()) return nullptr; } // Treat memory accesses to promotable allocas as non-interesting since they // will not cause memory violations. This greatly speeds up the instrumented // executable at -O0. if (ClSkipPromotableAllocas) if (auto AI = dyn_cast_or_null(PtrOperand)) return isInterestingAlloca(*AI) ? AI : nullptr; return PtrOperand; } static bool isPointerOperand(Value *V) { return V->getType()->isPointerTy() || isa(V); } // This is a rough heuristic; it may cause both false positives and // false negatives. The proper implementation requires cooperation with // the frontend. static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) { if (ICmpInst *Cmp = dyn_cast(I)) { if (!Cmp->isRelational()) return false; } else if (BinaryOperator *BO = dyn_cast(I)) { if (BO->getOpcode() != Instruction::Sub) return false; } else { return false; } return isPointerOperand(I->getOperand(0)) && isPointerOperand(I->getOperand(1)); } bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { // If a global variable does not have dynamic initialization we don't // have to instrument it. However, if a global does not have initializer // at all, we assume it has dynamic initializer (in other TU). return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit; } void AddressSanitizer::instrumentPointerComparisonOrSubtraction( Instruction *I) { IRBuilder<> IRB(I); Function *F = isa(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; for (Value *&i : Param) { if (i->getType()->isPointerTy()) i = IRB.CreatePointerCast(i, IntptrTy); } IRB.CreateCall(F, Param); } static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, Instruction *InsertBefore, Value *Addr, unsigned Alignment, unsigned Granularity, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check // if the data is properly aligned. if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite, nullptr, UseCalls, Exp); Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize, IsWrite, nullptr, UseCalls, Exp); } static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask, Instruction *I, Value *Addr, unsigned Alignment, unsigned Granularity, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { auto *VTy = cast(Addr->getType())->getElementType(); uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); unsigned Num = VTy->getVectorNumElements(); auto Zero = ConstantInt::get(IntptrTy, 0); for (unsigned Idx = 0; Idx < Num; ++Idx) { Value *InstrumentedAddress = nullptr; Instruction *InsertBefore = I; if (auto *Vector = dyn_cast(Mask)) { // dyn_cast as we might get UndefValue if (auto *Masked = dyn_cast(Vector->getOperand(Idx))) { if (Masked->isNullValue()) // Mask is constant false, so no instrumentation needed. continue; // If we have a true or undef value, fall through to doInstrumentAddress // with InsertBefore == I } } else { IRBuilder<> IRB(I); Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); InsertBefore = ThenTerm; } IRBuilder<> IRB(InsertBefore); InstrumentedAddress = IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment, Granularity, ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp); } } void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, bool UseCalls, const DataLayout &DL) { bool IsWrite = false; unsigned Alignment = 0; uint64_t TypeSize = 0; Value *MaybeMask = nullptr; Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); assert(Addr); // Optimization experiments. // The experiments can be used to evaluate potential optimizations that remove // instrumentation (assess false negatives). Instead of completely removing // some instrumentation, you set Exp to a non-zero value (mask of optimization // experiments that want to remove instrumentation of this instruction). // If Exp is non-zero, this pass will emit special calls into runtime // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls // make runtime terminate the program in a special way (with a different // exit status). Then you run the new compiler on a buggy corpus, collect // the special terminations (ideally, you don't see them at all -- no false // negatives) and make the decision on the optimization. uint32_t Exp = ClForceExperiment; if (ClOpt && ClOptGlobals) { // If initialization order checking is disabled, a simple access to a // dynamically initialized global is always valid. GlobalVariable *G = dyn_cast(GetUnderlyingObject(Addr, DL)); if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) && isSafeAccess(ObjSizeVis, Addr, TypeSize)) { NumOptimizedAccessesToGlobalVar++; return; } } if (ClOpt && ClOptStack) { // A direct inbounds access to a stack variable is always valid. if (isa(GetUnderlyingObject(Addr, DL)) && isSafeAccess(ObjSizeVis, Addr, TypeSize)) { NumOptimizedAccessesToStackVar++; return; } } if (IsWrite) NumInstrumentedWrites++; else NumInstrumentedReads++; unsigned Granularity = 1 << Mapping.Scale; if (MaybeMask) { instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr, Alignment, Granularity, TypeSize, IsWrite, nullptr, UseCalls, Exp); } else { doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize, IsWrite, nullptr, UseCalls, Exp); } } Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument, uint32_t Exp) { IRBuilder<> IRB(InsertBefore); Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp); CallInst *Call = nullptr; if (SizeArgument) { if (Exp == 0) Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0], {Addr, SizeArgument}); else Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1], {Addr, SizeArgument, ExpVal}); } else { if (Exp == 0) Call = IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); else Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex], {Addr, ExpVal}); } // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. // This EmptyAsm is required to avoid callback merge. IRB.CreateCall(EmptyAsm, {}); return Call; } Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize) { size_t Granularity = static_cast(1) << Mapping.Scale; // Addr & (Granularity - 1) Value *LastAccessedByte = IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); // (Addr & (Granularity - 1)) + size - 1 if (TypeSize / 8 > 1) LastAccessedByte = IRB.CreateAdd( LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); // (uint8_t) ((Addr & (Granularity-1)) + size - 1) LastAccessedByte = IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false); // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); if (UseCalls) { if (Exp == 0) IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], AddrLong); else IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); return; } Type *ShadowTy = IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale)); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); Value *ShadowValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t Granularity = 1ULL << Mapping.Scale; TerminatorInst *CrashTerm = nullptr; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { // We use branch weights for the slow path check, to indicate that the slow // path is rarely taken. This seems to be the case for SPEC benchmarks. TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen( Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); assert(cast(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); if (Recover) { CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false); } else { BasicBlock *CrashBlock = BasicBlock::Create(*C, "", NextBB->getParent(), NextBB); CrashTerm = new UnreachableInst(*C, CrashBlock); BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); } } else { CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover); } Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument, Exp); Crash->setDebugLoc(OrigIns->getDebugLoc()); } // Instrument unusual size or unusual alignment. // We can not do it with a single check, so we do 1-byte check for the first // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. void AddressSanitizer::instrumentUnusualSizeOrAlignment( Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { IRBuilder<> IRB(InsertBefore); Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { if (Exp == 0) IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], {AddrLong, Size}); else IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1], {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); } else { Value *LastByte = IRB.CreateIntToPtr( IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), Addr->getType()); instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp); instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp); } } void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. IRBuilder<> IRB(&GlobalInit.front(), GlobalInit.front().getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); // Add calls to unpoison all globals before each return instruction. for (auto &BB : GlobalInit.getBasicBlockList()) if (ReturnInst *RI = dyn_cast(BB.getTerminator())) CallInst::Create(AsanUnpoisonGlobals, "", RI); } void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); ConstantArray *CA = cast(GV->getInitializer()); for (Use &OP : CA->operands()) { if (isa(OP)) continue; ConstantStruct *CS = cast(OP); // Must have a function or null ptr. if (Function *F = dyn_cast(CS->getOperand(1))) { if (F->getName() == kAsanModuleCtorName) continue; ConstantInt *Priority = dyn_cast(CS->getOperand(0)); // Don't instrument CTORs that will run before asan.module_ctor. if (Priority->getLimitedValue() <= kAsanCtorAndDtorPriority) continue; poisonOneInitializer(*F, ModuleName); } } } bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = G->getValueType(); DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); if (GlobalsMD.get(G).IsBlacklisted) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. // Touch only those globals that will not be defined in other modules. // Don't handle ODR linkage types and COMDATs since other modules may be built // without ASan. if (G->getLinkage() != GlobalVariable::ExternalLinkage && G->getLinkage() != GlobalVariable::PrivateLinkage && G->getLinkage() != GlobalVariable::InternalLinkage) return false; if (G->hasComdat()) return false; // Two problems with thread-locals: // - The address of the main thread's copy can't be computed at link-time. // - Need to poison all copies, not just the main thread's one. if (G->isThreadLocal()) return false; // For now, just ignore this Global if the alignment is large. if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false; if (G->hasSection()) { StringRef Section = G->getSection(); // Globals from llvm.metadata aren't emitted, do not instrument them. if (Section == "llvm.metadata") return false; // Do not instrument globals from special LLVM sections. if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false; // Do not instrument function pointers to initialization and termination // routines: dynamic linker will not properly handle redzones. if (Section.startswith(".preinit_array") || Section.startswith(".init_array") || Section.startswith(".fini_array")) { return false; } // Callbacks put into the CRT initializer/terminator sections // should not be instrumented. // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx if (Section.startswith(".CRT")) { DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n"); return false; } if (TargetTriple.isOSBinFormatMachO()) { StringRef ParsedSegment, ParsedSection; unsigned TAA = 0, StubSize = 0; bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); assert(ErrorCode.empty() && "Invalid section specifier."); // Ignore the globals from the __OBJC section. The ObjC runtime assumes // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to // them. if (ParsedSegment == "__OBJC" || (ParsedSegment == "__DATA" && ParsedSection.startswith("__objc_"))) { DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n"); return false; } // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 // Constant CFString instances are compiled in the following way: // -- the string buffer is emitted into // __TEXT,__cstring,cstring_literals // -- the constant NSConstantString structure referencing that buffer // is placed into __DATA,__cfstring // Therefore there's no point in placing redzones into __DATA,__cfstring. // Moreover, it causes the linker to crash on OS X 10.7 if (ParsedSegment == "__DATA" && ParsedSection == "__cfstring") { DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n"); return false; } // The linker merges the contents of cstring_literals and removes the // trailing zeroes. if (ParsedSegment == "__TEXT" && (TAA & MachO::S_CSTRING_LITERALS)) { DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n"); return false; } } } return true; } // On Mach-O platforms, we emit global metadata in a separate section of the // binary in order to allow the linker to properly dead strip. This is only // supported on recent versions of ld64. bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const { if (!ClUseMachOGlobalsSection) return false; if (!TargetTriple.isOSBinFormatMachO()) return false; if (TargetTriple.isMacOSX() && !TargetTriple.isMacOSXVersionLT(10, 11)) return true; if (TargetTriple.isiOS() /* or tvOS */ && !TargetTriple.isOSVersionLT(9)) return true; if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2)) return true; return false; } StringRef AddressSanitizerModule::getGlobalMetadataSection() const { switch (TargetTriple.getObjectFormat()) { case Triple::COFF: return ".ASAN$GL"; case Triple::ELF: return "asan_globals"; case Triple::MachO: return "__DATA,__asan_globals,regular"; default: break; } llvm_unreachable("unsupported object format"); } void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Declare our poisoning and unpoisoning functions. AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr)); AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); // Declare functions that register/unregister globals. AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); AsanUnregisterGlobals = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); // Declare the functions that find globals in a shared object and then invoke // the (un)register function on them. AsanRegisterImageGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage); AsanUnregisterImageGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); } // Put the metadata and the instrumented global in the same group. This ensures // that the metadata is discarded if the instrumented global is discarded. void AddressSanitizerModule::SetComdatForGlobalMetadata( GlobalVariable *G, GlobalVariable *Metadata) { Module &M = *G->getParent(); Comdat *C = G->getComdat(); if (!C) { if (!G->hasName()) { // If G is unnamed, it must be internal. Give it an artificial name // so we can put it in a comdat. assert(G->hasLocalLinkage()); G->setName(Twine(kAsanGenPrefix) + "_anon_global"); } C = M.getOrInsertComdat(G->getName()); // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. if (TargetTriple.isOSBinFormatCOFF()) C->setSelectionKind(Comdat::NoDuplicates); G->setComdat(C); } assert(G->hasComdat()); Metadata->setComdat(G->getComdat()); } // Create a separate metadata global and put it in the appropriate ASan // global registration section. GlobalVariable * AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName) { GlobalVariable *Metadata = new GlobalVariable(M, Initializer->getType(), false, GlobalVariable::InternalLinkage, Initializer, Twine("__asan_global_") + GlobalValue::getRealLinkageName(OriginalName)); Metadata->setSection(getGlobalMetadataSection()); return Metadata; } IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { Function *AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); } void AddressSanitizerModule::InstrumentGlobalsCOFF( IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers) { assert(ExtendedGlobals.size() == MetadataInitializers.size()); auto &DL = M.getDataLayout(); for (size_t i = 0; i < ExtendedGlobals.size(); i++) { Constant *Initializer = MetadataInitializers[i]; GlobalVariable *G = ExtendedGlobals[i]; GlobalVariable *Metadata = CreateMetadataGlobal(M, Initializer, G->getName()); // The MSVC linker always inserts padding when linking incrementally. We // cope with that by aligning each struct to its size, which must be a power // of two. unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType()); assert(isPowerOf2_32(SizeOfGlobalStruct) && "global metadata will not be padded appropriately"); Metadata->setAlignment(SizeOfGlobalStruct); SetComdatForGlobalMetadata(G, Metadata); } } void AddressSanitizerModule::InstrumentGlobalsMachO( IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers) { assert(ExtendedGlobals.size() == MetadataInitializers.size()); // On recent Mach-O platforms, use a structure which binds the liveness of // the global variable to the metadata struct. Keep the list of "Liveness" GV // created to be added to llvm.compiler.used StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); SmallVector LivenessGlobals(ExtendedGlobals.size()); for (size_t i = 0; i < ExtendedGlobals.size(); i++) { Constant *Initializer = MetadataInitializers[i]; GlobalVariable *G = ExtendedGlobals[i]; GlobalVariable *Metadata = CreateMetadataGlobal(M, Initializer, G->getName()); // On recent Mach-O platforms, we emit the global metadata in a way that // allows the linker to properly strip dead globals. auto LivenessBinder = ConstantStruct::get( LivenessTy, Initializer->getAggregateElement(0u), ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); GlobalVariable *Liveness = new GlobalVariable( M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, Twine("__asan_binder_") + G->getName()); Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); LivenessGlobals[i] = Liveness; } // Update llvm.compiler.used, adding the new liveness globals. This is // needed so that during LTO these variables stay alive. The alternative // would be to have the linker handling the LTO symbols, but libLTO // current API does not expose access to the section for each symbol. if (!LivenessGlobals.empty()) appendToCompilerUsed(M, LivenessGlobals); // RegisteredFlag serves two purposes. First, we can pass it to dladdr() // to look up the loaded image that contains it. Second, we can store in it // whether registration has already occurred, to prevent duplicate // registration. // // common linkage ensures that there is only one global per shared library. GlobalVariable *RegisteredFlag = new GlobalVariable( M, IntptrTy, false, GlobalVariable::CommonLinkage, ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); IRB.CreateCall(AsanRegisterImageGlobals, {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); // We also need to unregister globals at the end, e.g., when a shared library // gets closed. IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); } void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers) { assert(ExtendedGlobals.size() == MetadataInitializers.size()); unsigned N = ExtendedGlobals.size(); assert(N > 0); // On platforms that don't have a custom metadata section, we emit an array // of global metadata structures. ArrayType *ArrayOfGlobalStructTy = ArrayType::get(MetadataInitializers[0]->getType(), N); auto AllGlobals = new GlobalVariable( M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); IRB.CreateCall(AsanRegisterGlobals, {IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, N)}); // We also need to unregister globals at the end, e.g., when a shared library // gets closed. IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); IRB_Dtor.CreateCall(AsanUnregisterGlobals, {IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, N)}); } // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { GlobalsMD.init(M); SmallVector GlobalsToChange; for (auto &G : M.globals()) { if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G); } size_t n = GlobalsToChange.size(); if (n == 0) return false; auto &DL = M.getDataLayout(); // A global is described by a structure // size_t beg; // size_t size; // size_t size_with_redzone; // const char *name; // const char *module_name; // size_t has_dynamic_init; // void *source_location; // size_t odr_indicator; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, nullptr); SmallVector NewGlobals(n); SmallVector Initializers(n); bool HasDynamicallyInitializedGlobals = false; // We shouldn't merge same module names, as this string serves as unique // module ID in runtime. GlobalVariable *ModuleName = createPrivateGlobalForString( M, M.getModuleIdentifier(), /*AllowMerging*/ false); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; auto MD = GlobalsMD.get(G); StringRef NameForGlobal = G->getName(); // Create string holding the global name (use global name from metadata // if it's available, otherwise just write the name of global variable). GlobalVariable *Name = createPrivateGlobalForString( M, MD.Name.empty() ? NameForGlobal : MD.Name, /*AllowMerging*/ true); Type *Ty = G->getValueType(); uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); uint64_t MinRZ = MinRedzoneSizeForGlobal(); // MinRZ <= RZ <= kMaxGlobalRedzone // and trying to make RZ to be ~ 1/4 of SizeInBytes. uint64_t RZ = std::max( MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ)); uint64_t RightRedzoneSize = RZ; // Round up to MinRZ if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr); Constant *NewInitializer = ConstantStruct::get(NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), nullptr); // Create a new global variable with enough space for a redzone. GlobalValue::LinkageTypes Linkage = G->getLinkage(); if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage) Linkage = GlobalValue::InternalLinkage; GlobalVariable *NewGlobal = new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setAlignment(MinRZ); // Move null-terminated C strings to "__asan_cstring" section on Darwin. if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() && G->isConstant()) { auto Seq = dyn_cast(G->getInitializer()); if (Seq && Seq->isCString()) NewGlobal->setSection("__TEXT,__asan_cstring,regular"); } // Transfer the debug info. The payload starts at offset zero so we can // copy the debug info over as is. SmallVector GVs; G->getDebugInfo(GVs); for (auto *GV : GVs) NewGlobal->addDebugInfo(GV); Value *Indices2[2]; Indices2[0] = IRB.getInt32(0); Indices2[1] = IRB.getInt32(0); G->replaceAllUsesWith( ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); NewGlobal->takeName(G); G->eraseFromParent(); NewGlobals[i] = NewGlobal; Constant *SourceLoc; if (!MD.SourceLoc.empty()) { auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc); SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy); } else { SourceLoc = ConstantInt::get(IntptrTy, 0); } Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy()); GlobalValue *InstrumentedGlobal = NewGlobal; bool CanUsePrivateAliases = TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO(); if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) { // Create local alias for NewGlobal to avoid crash on ODR between // instrumented and non-instrumented libraries. auto *GA = GlobalAlias::create(GlobalValue::InternalLinkage, NameForGlobal + M.getName(), NewGlobal); // With local aliases, we need to provide another externally visible // symbol __odr_asan_XXX to detect ODR violation. auto *ODRIndicatorSym = new GlobalVariable(M, IRB.getInt8Ty(), false, Linkage, Constant::getNullValue(IRB.getInt8Ty()), kODRGenPrefix + NameForGlobal, nullptr, NewGlobal->getThreadLocalMode()); // Set meaningful attributes for indicator symbol. ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); ODRIndicatorSym->setAlignment(1); ODRIndicator = ODRIndicatorSym; InstrumentedGlobal = GA; } Constant *Initializer = ConstantStruct::get( GlobalStructTy, ConstantExpr::getPointerCast(InstrumentedGlobal, IntptrTy), ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, ConstantExpr::getPointerCast(ODRIndicator, IntptrTy), nullptr); if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); Initializers[i] = Initializer; } if (TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); } else if (ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); } else { InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); } // Create calls for poisoning before initializers run and unpoisoning after. if (HasDynamicallyInitializedGlobals) createInitializerPoisonCalls(M, ModuleName); DEBUG(dbgs() << M); return true; } bool AddressSanitizerModule::runOnModule(Module &M) { C = &(M.getContext()); int LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel); initializeCallbacks(M); bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals && !CompileKernel) { Function *CtorFunc = M.getFunction(kAsanModuleCtorName); assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); Changed |= InstrumentGlobals(IRB, M); } return Changed; } void AddressSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Create __asan_report* callbacks. // IsWrite, TypeSize and Exp are encoded in the function name. for (int Exp = 0; Exp < 2; Exp++) { for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { const std::string TypeStr = AccessIsWrite ? "store" : "load"; const std::string ExpStr = Exp ? "exp_" : ""; const std::string SuffixStr = CompileKernel ? "N" : "_n"; const std::string EndingStr = Recover ? "_noabort" : ""; Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; AsanErrorCallbackSized[AccessIsWrite][Exp] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr, IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr, IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); } } } const std::string MemIntrinCallbackPrefix = CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix; AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction( MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction( MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction( MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr)); AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr)); AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); } // virtual bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. GlobalsMD.init(M); C = &(M.getContext()); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); if (!CompileKernel) { std::tie(AsanCtorFunction, AsanInitFunction) = createSanitizerCtorAndInitFunctions( M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); } Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel); return true; } bool AddressSanitizer::doFinalization(Module &M) { GlobalsMD.reset(); return false; } bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. // Therefore we need to instrument such methods with a call to __asan_init // at the beginning in order to initialize our runtime before any access to // the shadow memory. // We cannot just ignore these methods, because they may call other // instrumented functions. if (F.getName().find(" load]") != std::string::npos) { IRBuilder<> IRB(&F.front(), F.front().begin()); IRB.CreateCall(AsanInitFunction, {}); return true; } return false; } void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { // Generate code only when dynamic addressing is needed. if (Mapping.Offset != kDynamicShadowSentinel) return; IRBuilder<> IRB(&F.front().front()); Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( kAsanShadowMemoryDynamicAddress, IntptrTy); LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress); } void AddressSanitizer::markEscapedLocalAllocas(Function &F) { // Find the one possible call to llvm.localescape and pre-mark allocas passed // to it as uninteresting. This assumes we haven't started processing allocas // yet. This check is done up front because iterating the use list in // isInterestingAlloca would be algorithmically slower. assert(ProcessedAllocas.empty() && "must process localescape before allocas"); // Try to get the declaration of llvm.localescape. If it's not in the module, // we can exit early. if (!F.getParent()->getFunction("llvm.localescape")) return; // Look for a call to llvm.localescape call in the entry block. It can't be in // any other block. for (Instruction &I : F.getEntryBlock()) { IntrinsicInst *II = dyn_cast(&I); if (II && II->getIntrinsicID() == Intrinsic::localescape) { // We found a call. Mark all the allocas passed in as uninteresting. for (Value *Arg : II->arg_operands()) { AllocaInst *AI = dyn_cast(Arg->stripPointerCasts()); assert(AI && AI->isStaticAlloca() && "non-static alloca arg to localescape"); ProcessedAllocas[AI] = false; } break; } } } bool AddressSanitizer::runOnFunction(Function &F) { if (&F == AsanCtorFunction) return false; if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; if (F.getName().startswith("__asan_")) return false; bool FunctionModified = false; // If needed, insert __asan_init before checking for SanitizeAddress attr. // This function needs to be called even if the function body is not // instrumented. if (maybeInsertAsanInitAtFunctionEntry(F)) FunctionModified = true; // Leave if the function doesn't need instrumentation. if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); DT = &getAnalysis().getDomTree(); FunctionStateRAII CleanupObj(this); maybeInsertDynamicShadowAtFunctionEntry(F); // We can't instrument allocas used with llvm.localescape. Only static allocas // can be passed to that intrinsic. markEscapedLocalAllocas(F); // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallSet TempsToInstrument; SmallVector ToInstrument; SmallVector NoReturnCalls; SmallVector AllBlocks; SmallVector PointerComparisonsOrSubtracts; int NumAllocas = 0; bool IsWrite; unsigned Alignment; uint64_t TypeSize; const TargetLibraryInfo *TLI = &getAnalysis().getTLI(); // Fill the set of memory operations to instrument. for (auto &BB : F) { AllBlocks.push_back(&BB); TempsToInstrument.clear(); int NumInsnsPerBB = 0; for (auto &Inst : BB) { if (LooksLikeCodeInBug11395(&Inst)) return false; Value *MaybeMask = nullptr; if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, &Alignment, &MaybeMask)) { if (ClOpt && ClOptSameTemp) { // If we have a mask, skip instrumentation if we've already // instrumented the full object. But don't add to TempsToInstrument // because we might get another load/store with a different mask. if (MaybeMask) { if (TempsToInstrument.count(Addr)) continue; // We've seen this (whole) temp in the current BB. } else { if (!TempsToInstrument.insert(Addr).second) continue; // We've seen this temp in the current BB. } } } else if (ClInvalidPointerPairs && isInterestingPointerComparisonOrSubtraction(&Inst)) { PointerComparisonsOrSubtracts.push_back(&Inst); continue; } else if (isa(Inst)) { // ok, take it. } else { if (isa(Inst)) NumAllocas++; CallSite CS(&Inst); if (CS) { // A call inside BB. TempsToInstrument.clear(); if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction()); } if (CallInst *CI = dyn_cast(&Inst)) maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); continue; } ToInstrument.push_back(&Inst); NumInsnsPerBB++; if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } } bool UseCalls = CompileKernel || (ClInstrumentationWithCallsThreshold >= 0 && ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), /*RoundToAlign=*/true); // Instrument. int NumInstrumented = 0; for (auto Inst : ToInstrument) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment)) instrumentMop(ObjSizeVis, Inst, UseCalls, F.getParent()->getDataLayout()); else instrumentMemIntrinsic(cast(Inst)); } NumInstrumented++; } FunctionStackPoisoner FSP(F, *this); bool ChangedStack = FSP.runOnFunction(); // We must unpoison the stack before every NoReturn call (throw, _exit, etc). // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 for (auto CI : NoReturnCalls) { IRBuilder<> IRB(CI); IRB.CreateCall(AsanHandleNoReturnFunc, {}); } for (auto Inst : PointerComparisonsOrSubtracts) { instrumentPointerComparisonOrSubtraction(Inst); NumInstrumented++; } if (NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " << F << "\n"); return FunctionModified; } // Workaround for bug 11395: we don't want to instrument stack in functions // with large assembly blobs (32-bit only), otherwise reg alloc may crash. // FIXME: remove once the bug 11395 is fixed. bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { if (LongSize != 32) return false; CallInst *CI = dyn_cast(I); if (!CI || !CI->isInlineAsm()) return false; if (CI->getNumArgOperands() <= 5) return false; // We have inline assembly with quite a few arguments. return true; } void FunctionStackPoisoner::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) { std::string Suffix = itostr(i); AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr)); AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } if (ASan.UseAfterScope) { AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) { std::ostringstream Name; Name << kAsanSetShadowPrefix; Name << std::setw(2) << std::setfill('0') << std::hex << Val; AsanSetShadowFunc[Val] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanAllocasUnpoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } void FunctionStackPoisoner::copyToShadowInline(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, IRBuilder<> &IRB, Value *ShadowBase) { if (Begin >= End) return; const size_t LargestStoreSizeInBytes = std::min(sizeof(uint64_t), ASan.LongSize / 8); const bool IsLittleEndian = F.getParent()->getDataLayout().isLittleEndian(); // Poison given range in shadow using larges store size with out leading and // trailing zeros in ShadowMask. Zeros never change, so they need neither // poisoning nor up-poisoning. Still we don't mind if some of them get into a // middle of a store. for (size_t i = Begin; i < End;) { if (!ShadowMask[i]) { assert(!ShadowBytes[i]); ++i; continue; } size_t StoreSizeInBytes = LargestStoreSizeInBytes; // Fit store size into the range. while (StoreSizeInBytes > End - i) StoreSizeInBytes /= 2; // Minimize store size by trimming trailing zeros. for (size_t j = StoreSizeInBytes - 1; j && !ShadowMask[i + j]; --j) { while (j <= StoreSizeInBytes / 2) StoreSizeInBytes /= 2; } uint64_t Val = 0; for (size_t j = 0; j < StoreSizeInBytes; j++) { if (IsLittleEndian) Val |= (uint64_t)ShadowBytes[i + j] << (8 * j); else Val = (Val << 8) | ShadowBytes[i + j]; } Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)); Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val); IRB.CreateAlignedStore( Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()), 1); i += StoreSizeInBytes; } } void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, IRBuilder<> &IRB, Value *ShadowBase) { copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase); } void FunctionStackPoisoner::copyToShadow(ArrayRef ShadowMask, ArrayRef ShadowBytes, size_t Begin, size_t End, IRBuilder<> &IRB, Value *ShadowBase) { assert(ShadowMask.size() == ShadowBytes.size()); size_t Done = Begin; for (size_t i = Begin, j = Begin + 1; i < End; i = j++) { if (!ShadowMask[i]) { assert(!ShadowBytes[i]); continue; } uint8_t Val = ShadowBytes[i]; if (!AsanSetShadowFunc[Val]) continue; // Skip same values. for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) { } if (j - i >= ClMaxInlinePoisoningSize) { copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase); IRB.CreateCall(AsanSetShadowFunc[Val], {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), ConstantInt::get(IntptrTy, j - i)}); Done = j; } } copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase); } // Fake stack allocator (asan_fake_stack.h) has 11 size classes // for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass static int StackMallocSizeClass(uint64_t LocalStackSize) { assert(LocalStackSize <= kMaxStackMallocSize); uint64_t MaxSize = kMinStackMallocSize; for (int i = 0;; i++, MaxSize *= 2) if (LocalStackSize <= MaxSize) return i; llvm_unreachable("impossible LocalStackSize"); } PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, Instruction *ThenTerm, Value *ValueIfFalse) { PHINode *PHI = IRB.CreatePHI(IntptrTy, 2); BasicBlock *CondBlock = cast(Cond)->getParent(); PHI->addIncoming(ValueIfFalse, CondBlock); BasicBlock *ThenBlock = ThenTerm->getParent(); PHI->addIncoming(ValueIfTrue, ThenBlock); return PHI; } Value *FunctionStackPoisoner::createAllocaForLayout( IRBuilder<> &IRB, const ASanStackFrameLayout &L, bool Dynamic) { AllocaInst *Alloca; if (Dynamic) { Alloca = IRB.CreateAlloca(IRB.getInt8Ty(), ConstantInt::get(IRB.getInt64Ty(), L.FrameSize), "MyAlloca"); } else { Alloca = IRB.CreateAlloca(ArrayType::get(IRB.getInt8Ty(), L.FrameSize), nullptr, "MyAlloca"); assert(Alloca->isStaticAlloca()); } assert((ClRealignStack & (ClRealignStack - 1)) == 0); size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); Alloca->setAlignment(FrameAlignment); return IRB.CreatePointerCast(Alloca, IntptrTy); } void FunctionStackPoisoner::createDynamicAllocasInitStorage() { BasicBlock &FirstBB = *F.begin(); IRBuilder<> IRB(dyn_cast(FirstBB.begin())); DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr); IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout); DynamicAllocaLayout->setAlignment(32); } void FunctionStackPoisoner::processDynamicAllocas() { if (!ClInstrumentDynamicAllocas || DynamicAllocaVec.empty()) { assert(DynamicAllocaPoisonCallVec.empty()); return; } // Insert poison calls for lifetime intrinsics for dynamic allocas. for (const auto &APC : DynamicAllocaPoisonCallVec) { assert(APC.InsBefore); assert(APC.AI); assert(ASan.isInterestingAlloca(*APC.AI)); assert(!APC.AI->isStaticAlloca()); IRBuilder<> IRB(APC.InsBefore); poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); // Dynamic allocas will be unpoisoned unconditionally below in // unpoisonDynamicAllocas. // Flag that we need unpoison static allocas. } // Handle dynamic allocas. createDynamicAllocasInitStorage(); for (auto &AI : DynamicAllocaVec) handleDynamicAllocaCall(AI); unpoisonDynamicAllocas(); } void FunctionStackPoisoner::processStaticAllocas() { if (AllocaVec.empty()) { assert(StaticAllocaPoisonCallVec.empty()); return; } int StackMallocIdx = -1; DebugLoc EntryDebugLocation; if (auto SP = F.getSubprogram()) EntryDebugLocation = DebugLoc::get(SP->getScopeLine(), 0, SP); Instruction *InsBefore = AllocaVec[0]; IRBuilder<> IRB(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); // Make sure non-instrumented allocas stay in the entry block. Otherwise, // debug info is broken, because only entry-block allocas are treated as // regular stack slots. auto InsBeforeB = InsBefore->getParent(); assert(InsBeforeB == &F.getEntryBlock()); for (auto *AI : StaticAllocasToMoveUp) if (AI->getParent() == InsBeforeB) AI->moveBefore(InsBefore); // If we have a call to llvm.localescape, keep it in the entry block. if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore); SmallVector SVD; SVD.reserve(AllocaVec.size()); for (AllocaInst *AI : AllocaVec) { ASanStackVariableDescription D = {AI->getName().data(), ASan.getAllocaSizeInBytes(*AI), 0, AI->getAlignment(), AI, 0, 0}; SVD.push_back(D); } // Minimal header size (left redzone) is 4 pointers, // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms. size_t MinHeaderSize = ASan.LongSize / 2; const ASanStackFrameLayout &L = ComputeASanStackFrameLayout(SVD, 1ULL << Mapping.Scale, MinHeaderSize); // Build AllocaToSVDMap for ASanStackVariableDescription lookup. DenseMap AllocaToSVDMap; for (auto &Desc : SVD) AllocaToSVDMap[Desc.AI] = &Desc; // Update SVD with information from lifetime intrinsics. for (const auto &APC : StaticAllocaPoisonCallVec) { assert(APC.InsBefore); assert(APC.AI); assert(ASan.isInterestingAlloca(*APC.AI)); assert(APC.AI->isStaticAlloca()); ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI]; Desc.LifetimeSize = Desc.Size; if (const DILocation *FnLoc = EntryDebugLocation.get()) { if (const DILocation *LifetimeLoc = APC.InsBefore->getDebugLoc().get()) { if (LifetimeLoc->getFile() == FnLoc->getFile()) if (unsigned Line = LifetimeLoc->getLine()) Desc.Line = std::min(Desc.Line ? Desc.Line : Line, Line); } } } auto DescriptionString = ComputeASanStackFrameDescription(SVD); DEBUG(dbgs() << DescriptionString << " --- " << L.FrameSize << "\n"); uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel && LocalStackSize <= kMaxStackMallocSize; bool DoDynamicAlloca = ClDynamicAllocaStack; // Don't do dynamic alloca or stack malloc if: // 1) There is inline asm: too often it makes assumptions on which registers // are available. // 2) There is a returns_twice call (typically setjmp), which is // optimization-hostile, and doesn't play well with introduced indirect // register-relative calculation of local variable addresses. DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); Value *FakeStack; Value *LocalStackBase; if (DoStackMalloc) { // void *FakeStack = __asan_option_detect_stack_use_after_return // ? __asan_stack_malloc_N(LocalStackSize) // : nullptr; // void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize); Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal( kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty()); Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUseAfterReturn), Constant::getNullValue(IRB.getInt32Ty())); Instruction *Term = SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false); IRBuilder<> IRBIf(Term); IRBIf.SetCurrentDebugLocation(EntryDebugLocation); StackMallocIdx = StackMallocSizeClass(LocalStackSize); assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass); Value *FakeStackValue = IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize)); IRB.SetInsertPoint(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, ConstantInt::get(IntptrTy, 0)); Value *NoFakeStack = IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy)); Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false); IRBIf.SetInsertPoint(Term); IRBIf.SetCurrentDebugLocation(EntryDebugLocation); Value *AllocaValue = DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca; IRB.SetInsertPoint(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack); } else { // void *FakeStack = nullptr; // void *LocalStackBase = alloca(LocalStackSize); FakeStack = ConstantInt::get(IntptrTy, 0); LocalStackBase = DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; } // Replace Alloca instructions with base+offset. for (const auto &Desc : SVD) { AllocaInst *AI = Desc.AI; Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true); AI->replaceAllUsesWith(NewAllocaPtr); } // The left-most redzone has enough space for at least 4 pointers. // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), BasePlus0); // Write the frame description constant to redzone[1]. Value *BasePlus1 = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize / 8)), IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), DescriptionString, /*AllowMerging*/ true); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); // Write the PC to redzone[2]. Value *BasePlus2 = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), IntptrPtrTy); IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L); // Poison the stack red zones at the entry. Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); // As mask we must use most poisoned case: red zones and after scope. // As bytes we can use either the same or just red zones only. copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); if (!StaticAllocaPoisonCallVec.empty()) { const auto &ShadowInScope = GetShadowBytes(SVD, L); // Poison static allocas near lifetime intrinsics. for (const auto &APC : StaticAllocaPoisonCallVec) { const ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI]; assert(Desc.Offset % L.Granularity == 0); size_t Begin = Desc.Offset / L.Granularity; size_t End = Begin + (APC.Size + L.Granularity - 1) / L.Granularity; IRBuilder<> IRB(APC.InsBefore); copyToShadow(ShadowAfterScope, APC.DoPoison ? ShadowAfterScope : ShadowInScope, Begin, End, IRB, ShadowBase); } } SmallVector ShadowClean(ShadowAfterScope.size(), 0); SmallVector ShadowAfterReturn; // (Un)poison the stack before all ret instructions. for (auto Ret : RetVec) { IRBuilder<> IRBRet(Ret); // Mark the current frame as retired. IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), BasePlus0); if (DoStackMalloc) { assert(StackMallocIdx >= 0); // if FakeStack != 0 // LocalStackBase == FakeStack // // In use-after-return mode, poison the whole stack frame. // if StackMallocIdx <= 4 // // For small sizes inline the whole thing: // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize); // **SavedFlagPtr(FakeStack) = 0 // else // __asan_stack_free_N(FakeStack, LocalStackSize) // else // Value *Cmp = IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy)); TerminatorInst *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm); IRBuilder<> IRBPoison(ThenTerm); if (StackMallocIdx <= 4) { int ClassSize = kMinStackMallocSize << StackMallocIdx; ShadowAfterReturn.resize(ClassSize / L.Granularity, kAsanStackUseAfterReturnMagic); copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison, ShadowBase); Value *SavedFlagPtrPtr = IRBPoison.CreateAdd( FakeStack, ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8)); Value *SavedFlagPtr = IRBPoison.CreateLoad( IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy)); IRBPoison.CreateStore( Constant::getNullValue(IRBPoison.getInt8Ty()), IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy())); } else { // For larger frames call __asan_stack_free_*. IRBPoison.CreateCall( AsanStackFreeFunc[StackMallocIdx], {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)}); } IRBuilder<> IRBElse(ElseTerm); copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase); } else { copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase); } } // We are done. Remove the old unused alloca instructions. for (auto AI : AllocaVec) AI->eraseFromParent(); } void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison) { // For now just insert the call to ASan runtime. Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); Value *SizeArg = ConstantInt::get(IntptrTy, Size); IRB.CreateCall( DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, {AddrArg, SizeArg}); } // Handling llvm.lifetime intrinsics for a given %alloca: // (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca. // (2) if %size is constant, poison memory for llvm.lifetime.end (to detect // invalid accesses) and unpoison it for llvm.lifetime.start (the memory // could be poisoned by previous llvm.lifetime.end instruction, as the // variable may go in and out of scope several times, e.g. in loops). // (3) if we poisoned at least one %alloca in a function, // unpoison the whole stack frame at function exit. AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (AllocaInst *AI = dyn_cast(V)) // We're interested only in allocas we can handle. return ASan.isInterestingAlloca(*AI) ? AI : nullptr; // See if we've already calculated (or started to calculate) alloca for a // given value. AllocaForValueMapTy::iterator I = AllocaForValue.find(V); if (I != AllocaForValue.end()) return I->second; // Store 0 while we're calculating alloca for value V to avoid // infinite recursion if the value references itself. AllocaForValue[V] = nullptr; AllocaInst *Res = nullptr; if (CastInst *CI = dyn_cast(V)) Res = findAllocaForValue(CI->getOperand(0)); else if (PHINode *PN = dyn_cast(V)) { for (Value *IncValue : PN->incoming_values()) { // Allow self-referencing phi-nodes. if (IncValue == PN) continue; AllocaInst *IncValueAI = findAllocaForValue(IncValue); // AI for incoming values should exist and should all be equal. if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res)) return nullptr; Res = IncValueAI; } } else if (GetElementPtrInst *EP = dyn_cast(V)) { Res = findAllocaForValue(EP->getPointerOperand()); } else { DEBUG(dbgs() << "Alloca search canceled on unknown instruction: " << *V << "\n"); } if (Res) AllocaForValue[V] = Res; return Res; } void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { IRBuilder<> IRB(AI); const unsigned Align = std::max(kAllocaRzSize, AI->getAlignment()); const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1; Value *Zero = Constant::getNullValue(IntptrTy); Value *AllocaRzSize = ConstantInt::get(IntptrTy, kAllocaRzSize); Value *AllocaRzMask = ConstantInt::get(IntptrTy, AllocaRedzoneMask); // Since we need to extend alloca with additional memory to locate // redzones, and OldSize is number of allocated blocks with // ElementSize size, get allocated memory size in bytes by // OldSize * ElementSize. const unsigned ElementSize = F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType()); Value *OldSize = IRB.CreateMul(IRB.CreateIntCast(AI->getArraySize(), IntptrTy, false), ConstantInt::get(IntptrTy, ElementSize)); // PartialSize = OldSize % 32 Value *PartialSize = IRB.CreateAnd(OldSize, AllocaRzMask); // Misalign = kAllocaRzSize - PartialSize; Value *Misalign = IRB.CreateSub(AllocaRzSize, PartialSize); // PartialPadding = Misalign != kAllocaRzSize ? Misalign : 0; Value *Cond = IRB.CreateICmpNE(Misalign, AllocaRzSize); Value *PartialPadding = IRB.CreateSelect(Cond, Misalign, Zero); // AdditionalChunkSize = Align + PartialPadding + kAllocaRzSize // Align is added to locate left redzone, PartialPadding for possible // partial redzone and kAllocaRzSize for right redzone respectively. Value *AdditionalChunkSize = IRB.CreateAdd( ConstantInt::get(IntptrTy, Align + kAllocaRzSize), PartialPadding); Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize); // Insert new alloca with new NewSize and Align params. AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize); NewAlloca->setAlignment(Align); // NewAddress = Address + Align Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), ConstantInt::get(IntptrTy, Align)); // Insert __asan_alloca_poison call for new created alloca. IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); // Store the last alloca's address to DynamicAllocaLayout. We'll need this // for unpoisoning stuff. IRB.CreateStore(IRB.CreatePtrToInt(NewAlloca, IntptrTy), DynamicAllocaLayout); Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType()); // Replace all uses of AddessReturnedByAlloca with NewAddressPtr. AI->replaceAllUsesWith(NewAddressPtr); // We are done. Erase old alloca from parent. AI->eraseFromParent(); } // isSafeAccess returns true if Addr is always inbounds with respect to its // base object. For example, it is a field access or an array access with // constant inbounds index. bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, uint64_t TypeSize) const { SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr); if (!ObjSizeVis.bothKnown(SizeOffset)) return false; uint64_t Size = SizeOffset.first.getZExtValue(); int64_t Offset = SizeOffset.second.getSExtValue(); // Three checks are required to ensure safety: // . Offset >= 0 (since the offset is given from the base ptr) // . Size >= Offset (unsigned) // . Size - Offset >= NeededSize (unsigned) return Offset >= 0 && Size >= uint64_t(Offset) && Size - uint64_t(Offset) >= TypeSize / 8; } Index: projects/clang400-import/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp (revision 313894) @@ -1,693 +1,700 @@ //===-- ThreadSanitizer.cpp - race detector -------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of ThreadSanitizer, a race detector. // // The tool is under development, for the details about previous versions see // http://code.google.com/p/data-race-test // // The instrumentation phase is quite simple: // - Insert calls to run-time library before every memory access. // - Optimizations may apply to avoid instrumenting some of the accesses. // - Insert calls at function entry/exit. // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; #define DEBUG_TYPE "tsan" static cl::opt ClInstrumentMemoryAccesses( "tsan-instrument-memory-accesses", cl::init(true), cl::desc("Instrument memory accesses"), cl::Hidden); static cl::opt ClInstrumentFuncEntryExit( "tsan-instrument-func-entry-exit", cl::init(true), cl::desc("Instrument function entry and exit"), cl::Hidden); static cl::opt ClHandleCxxExceptions( "tsan-handle-cxx-exceptions", cl::init(true), cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"), cl::Hidden); static cl::opt ClInstrumentAtomics( "tsan-instrument-atomics", cl::init(true), cl::desc("Instrument atomics"), cl::Hidden); static cl::opt ClInstrumentMemIntrinsics( "tsan-instrument-memintrinsics", cl::init(true), cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads"); STATISTIC(NumOmittedReadsFromConstantGlobals, "Number of reads from constant globals"); STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing"); static const char *const kTsanModuleCtorName = "tsan.module_ctor"; static const char *const kTsanInitName = "__tsan_init"; namespace { /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { ThreadSanitizer() : FunctionPass(ID) {} StringRef getPassName() const override; void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; static char ID; // Pass identification, replacement for typeid. private: void initializeCallbacks(Module &M); bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl &Local, SmallVectorImpl &All, const DataLayout &DL); bool addrPointsToConstantData(Value *Addr); int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); void InsertRuntimeIgnores(Function &F); Type *IntptrTy; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. Function *TsanFuncEntry; Function *TsanFuncExit; Function *TsanIgnoreBegin; Function *TsanIgnoreEnd; // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; Function *TsanRead[kNumberOfAccessSizes]; Function *TsanWrite[kNumberOfAccessSizes]; Function *TsanUnalignedRead[kNumberOfAccessSizes]; Function *TsanUnalignedWrite[kNumberOfAccessSizes]; Function *TsanAtomicLoad[kNumberOfAccessSizes]; Function *TsanAtomicStore[kNumberOfAccessSizes]; Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes]; Function *TsanAtomicCAS[kNumberOfAccessSizes]; Function *TsanAtomicThreadFence; Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; Function *TsanVptrLoad; Function *MemmoveFn, *MemcpyFn, *MemsetFn; Function *TsanCtorFunction; }; } // namespace char ThreadSanitizer::ID = 0; INITIALIZE_PASS_BEGIN( ThreadSanitizer, "tsan", "ThreadSanitizer: detects data races.", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( ThreadSanitizer, "tsan", "ThreadSanitizer: detects data races.", false, false) StringRef ThreadSanitizer::getPassName() const { return "ThreadSanitizer"; } void ThreadSanitizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); } FunctionPass *llvm::createThreadSanitizerPass() { return new ThreadSanitizer(); } void ThreadSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(M.getContext()); AttributeSet Attr; Attr = Attr.addAttribute(M.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); // Initialize the callbacks. TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); TsanFuncExit = checkSanitizerInterfaceFunction( M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy(), nullptr)); TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy(), nullptr)); TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_ignore_thread_end", Attr, IRB.getVoidTy(), nullptr)); OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const unsigned ByteSize = 1U << i; const unsigned BitSize = ByteSize * 8; std::string ByteSizeStr = utostr(ByteSize); std::string BitSizeStr = utostr(BitSize); SmallString<32> ReadName("__tsan_read" + ByteSizeStr); TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<32> WriteName("__tsan_write" + ByteSizeStr); TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr); TsanUnalignedRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr); TsanUnalignedWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy, nullptr)); SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store"); TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr)); for (int op = AtomicRMWInst::FIRST_BINOP; op <= AtomicRMWInst::LAST_BINOP; ++op) { TsanAtomicRMW[op][i] = nullptr; const char *NamePart = nullptr; if (op == AtomicRMWInst::Xchg) NamePart = "_exchange"; else if (op == AtomicRMWInst::Add) NamePart = "_fetch_add"; else if (op == AtomicRMWInst::Sub) NamePart = "_fetch_sub"; else if (op == AtomicRMWInst::And) NamePart = "_fetch_and"; else if (op == AtomicRMWInst::Or) NamePart = "_fetch_or"; else if (op == AtomicRMWInst::Xor) NamePart = "_fetch_xor"; else if (op == AtomicRMWInst::Nand) NamePart = "_fetch_nand"; else continue; SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy, nullptr)); } SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr + "_compare_exchange_val"); TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr)); } TsanVptrUpdate = checkSanitizerInterfaceFunction( M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr)); TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr)); TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr)); MemmoveFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); MemcpyFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); MemsetFn = checkSanitizerInterfaceFunction( M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr)); } bool ThreadSanitizer::doInitialization(Module &M) { const DataLayout &DL = M.getDataLayout(); IntptrTy = DL.getIntPtrType(M.getContext()); std::tie(TsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}); appendToGlobalCtors(M, TsanCtorFunction, 0); return true; } static bool isVtableAccess(Instruction *I) { if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) return Tag->isTBAAVtableAccess(); return false; } // Do not instrument known races/"benign races" that come from compiler // instrumentatin. The user has no way of suppressing them. static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { // Peel off GEPs and BitCasts. Addr = Addr->stripInBoundsOffsets(); if (GlobalVariable *GV = dyn_cast(Addr)) { if (GV->hasSection()) { StringRef SectionName = GV->getSection(); // Check if the global is in the PGO counters section. if (SectionName.endswith(getInstrProfCountersSectionName( /*AddSegment=*/false))) return false; } // Check if the global is private gcov data. if (GV->getName().startswith("__llvm_gcov") || GV->getName().startswith("__llvm_gcda")) return false; } // Do not instrument acesses from different address spaces; we cannot deal // with them. if (Addr) { Type *PtrTy = cast(Addr->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0) return false; } return true; } bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { // If this is a GEP, just analyze its pointer operand. if (GetElementPtrInst *GEP = dyn_cast(Addr)) Addr = GEP->getPointerOperand(); if (GlobalVariable *GV = dyn_cast(Addr)) { if (GV->isConstant()) { // Reads from constant globals can not race with any writes. NumOmittedReadsFromConstantGlobals++; return true; } } else if (LoadInst *L = dyn_cast(Addr)) { if (isVtableAccess(L)) { // Reads from a vtable pointer can not race with any writes. NumOmittedReadsFromVtable++; return true; } } return false; } // Instrumenting some of the accesses may be proven redundant. // Currently handled: // - read-before-write (within same BB, no calls between) // - not captured variables // // We do not handle some of the patterns that should not survive // after the classic compiler optimizations. // E.g. two reads from the same temp should be eliminated by CSE, // two writes should be eliminated by DSE, etc. // // 'Local' is a vector of insns within the same BB (no calls between). // 'All' is a vector of insns that will be instrumented. void ThreadSanitizer::chooseInstructionsToInstrument( SmallVectorImpl &Local, SmallVectorImpl &All, const DataLayout &DL) { SmallSet WriteTargets; // Iterate from the end. for (Instruction *I : reverse(Local)) { if (StoreInst *Store = dyn_cast(I)) { Value *Addr = Store->getPointerOperand(); if (!shouldInstrumentReadWriteFromAddress(Addr)) continue; WriteTargets.insert(Addr); } else { LoadInst *Load = cast(I); Value *Addr = Load->getPointerOperand(); if (!shouldInstrumentReadWriteFromAddress(Addr)) continue; if (WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. NumOmittedReadsBeforeWrite++; continue; } if (addrPointsToConstantData(Addr)) { // Addr points to some constant data -- it can not race with any writes. continue; } } Value *Addr = isa(*I) ? cast(I)->getPointerOperand() : cast(I)->getPointerOperand(); if (isa(GetUnderlyingObject(Addr, DL)) && !PointerMayBeCaptured(Addr, true, true)) { // The variable is addressable but not captured, so it cannot be // referenced from a different thread and participate in a data race // (see llvm/Analysis/CaptureTracking.h for details). NumOmittedNonCaptured++; continue; } All.push_back(I); } Local.clear(); } static bool isAtomic(Instruction *I) { if (LoadInst *LI = dyn_cast(I)) return LI->isAtomic() && LI->getSynchScope() == CrossThread; if (StoreInst *SI = dyn_cast(I)) return SI->isAtomic() && SI->getSynchScope() == CrossThread; if (isa(I)) return true; if (isa(I)) return true; if (isa(I)) return true; return false; } void ThreadSanitizer::InsertRuntimeIgnores(Function &F) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); IRB.CreateCall(TsanIgnoreBegin); EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions); while (IRBuilder<> *AtExit = EE.Next()) { AtExit->CreateCall(TsanIgnoreEnd); } } bool ThreadSanitizer::runOnFunction(Function &F) { // This is required to prevent instrumenting call to __tsan_init from within // the module constructor. if (&F == TsanCtorFunction) return false; initializeCallbacks(*F.getParent()); SmallVector AllLoadsAndStores; SmallVector LocalLoadsAndStores; SmallVector AtomicAccesses; SmallVector MemIntrinCalls; bool Res = false; bool HasCalls = false; bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(); // Traverse all instructions, collect loads/stores/returns, check for calls. for (auto &BB : F) { for (auto &Inst : BB) { if (isAtomic(&Inst)) AtomicAccesses.push_back(&Inst); else if (isa(Inst) || isa(Inst)) LocalLoadsAndStores.push_back(&Inst); else if (isa(Inst) || isa(Inst)) { if (CallInst *CI = dyn_cast(&Inst)) maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); if (isa(Inst)) MemIntrinCalls.push_back(&Inst); HasCalls = true; chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL); } } chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL); } // We have collected all loads and stores. // FIXME: many of these accesses do not need to be checked for races // (e.g. variables that do not escape, etc). // Instrument memory accesses only if we want to report bugs in the function. if (ClInstrumentMemoryAccesses && SanitizeFunction) for (auto Inst : AllLoadsAndStores) { Res |= instrumentLoadOrStore(Inst, DL); } // Instrument atomic memory accesses in any case (they can be used to // implement synchronization). if (ClInstrumentAtomics) for (auto Inst : AtomicAccesses) { Res |= instrumentAtomic(Inst, DL); } if (ClInstrumentMemIntrinsics && SanitizeFunction) for (auto Inst : MemIntrinCalls) { Res |= instrumentMemIntrinsic(Inst); } if (F.hasFnAttribute("sanitize_thread_no_checking_at_run_time")) { assert(!F.hasFnAttribute(Attribute::SanitizeThread)); if (HasCalls) InsertRuntimeIgnores(F); } // Instrument function entry/exit points if there were instrumented accesses. if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); Value *ReturnAddress = IRB.CreateCall( Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), IRB.getInt32(0)); IRB.CreateCall(TsanFuncEntry, ReturnAddress); EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions); while (IRBuilder<> *AtExit = EE.Next()) { AtExit->CreateCall(TsanFuncExit, {}); } Res = true; } return Res; } bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, const DataLayout &DL) { IRBuilder<> IRB(I); bool IsWrite = isa(*I); Value *Addr = IsWrite ? cast(I)->getPointerOperand() : cast(I)->getPointerOperand(); + + // swifterror memory addresses are mem2reg promoted by instruction selection. + // As such they cannot have regular uses like an instrumentation function and + // it makes no sense to track them as memory. + if (Addr->isSwiftError()) + return false; + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; if (IsWrite && isVtableAccess(I)) { DEBUG(dbgs() << " VPTR : " << *I << "\n"); Value *StoredValue = cast(I)->getValueOperand(); // StoredValue may be a vector type if we are storing several vptrs at once. // In this case, just take the first element of the vector since this is // enough to find vptr races. if (isa(StoredValue->getType())) StoredValue = IRB.CreateExtractElement( StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0)); if (StoredValue->getType()->isIntegerTy()) StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy()); // Call TsanVptrUpdate. IRB.CreateCall(TsanVptrUpdate, {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())}); NumInstrumentedVtableWrites++; return true; } if (!IsWrite && isVtableAccess(I)) { IRB.CreateCall(TsanVptrLoad, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); NumInstrumentedVtableReads++; return true; } const unsigned Alignment = IsWrite ? cast(I)->getAlignment() : cast(I)->getAlignment(); Type *OrigTy = cast(Addr->getType())->getElementType(); const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); Value *OnAccessFunc = nullptr; if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; else OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); if (IsWrite) NumInstrumentedWrites++; else NumInstrumentedReads++; return true; } static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { uint32_t v = 0; switch (ord) { case AtomicOrdering::NotAtomic: llvm_unreachable("unexpected atomic ordering!"); case AtomicOrdering::Unordered: LLVM_FALLTHROUGH; case AtomicOrdering::Monotonic: v = 0; break; // Not specified yet: // case AtomicOrdering::Consume: v = 1; break; case AtomicOrdering::Acquire: v = 2; break; case AtomicOrdering::Release: v = 3; break; case AtomicOrdering::AcquireRelease: v = 4; break; case AtomicOrdering::SequentiallyConsistent: v = 5; break; } return IRB->getInt32(v); } // If a memset intrinsic gets inlined by the code gen, we will miss races on it. // So, we either need to ensure the intrinsic is not inlined, or instrument it. // We do not instrument memset/memmove/memcpy intrinsics (too complicated), // instead we simply replace them with regular function calls, which are then // intercepted by the run-time. // Since tsan is running after everyone else, the calls should not be // replaced back with intrinsics. If that becomes wrong at some point, // we will need to call e.g. __tsan_memset to avoid the intrinsics. bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { IRBuilder<> IRB(I); if (MemSetInst *M = dyn_cast(I)) { IRB.CreateCall( MemsetFn, {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); I->eraseFromParent(); } else if (MemTransferInst *M = dyn_cast(I)) { IRB.CreateCall( isa(M) ? MemcpyFn : MemmoveFn, {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); I->eraseFromParent(); } return false; } // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x // standards. For background see C++11 standard. A slightly older, publicly // available draft of the standard (not entirely up-to-date, but close enough // for casual browsing) is available here: // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf // The following page contains more background information: // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { IRBuilder<> IRB(I); if (LoadInst *LI = dyn_cast(I)) { Value *Addr = LI->getPointerOperand(); int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const unsigned ByteSize = 1U << Idx; const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), createOrdering(&IRB, LI->getOrdering())}; Type *OrigTy = cast(Addr->getType())->getElementType(); Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args); Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy); I->replaceAllUsesWith(Cast); } else if (StoreInst *SI = dyn_cast(I)) { Value *Addr = SI->getPointerOperand(); int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const unsigned ByteSize = 1U << Idx; const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty), createOrdering(&IRB, SI->getOrdering())}; CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args); ReplaceInstWithInst(I, C); } else if (AtomicRMWInst *RMWI = dyn_cast(I)) { Value *Addr = RMWI->getPointerOperand(); int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; if (!F) return false; const unsigned ByteSize = 1U << Idx; const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), createOrdering(&IRB, RMWI->getOrdering())}; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); } else if (AtomicCmpXchgInst *CASI = dyn_cast(I)) { Value *Addr = CASI->getPointerOperand(); int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const unsigned ByteSize = 1U << Idx; const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *CmpOperand = IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty); Value *NewOperand = IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), CmpOperand, NewOperand, createOrdering(&IRB, CASI->getSuccessOrdering()), createOrdering(&IRB, CASI->getFailureOrdering())}; CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args); Value *Success = IRB.CreateICmpEQ(C, CmpOperand); Value *OldVal = C; Type *OrigOldValTy = CASI->getNewValOperand()->getType(); if (Ty != OrigOldValTy) { // The value is a pointer, so we need to cast the return value. OldVal = IRB.CreateIntToPtr(C, OrigOldValTy); } Value *Res = IRB.CreateInsertValue(UndefValue::get(CASI->getType()), OldVal, 0); Res = IRB.CreateInsertValue(Res, Success, 1); I->replaceAllUsesWith(Res); I->eraseFromParent(); } else if (FenceInst *FI = dyn_cast(I)) { Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; Function *F = FI->getSynchScope() == SingleThread ? TsanAtomicSignalFence : TsanAtomicThreadFence; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); } return true; } int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL) { Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast(OrigPtrTy)->getElementType(); assert(OrigTy->isSized()); uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { NumAccessesWithBadSize++; // Ignore all unusual sizes. return -1; } size_t Idx = countTrailingZeros(TypeSize / 8); assert(Idx < kNumberOfAccessSizes); return Idx; } Index: projects/clang400-import/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp (revision 313894) @@ -1,775 +1,778 @@ //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements some loop unrolling utilities. It does not define any // actual pass or policy, but provides a single function to perform loop // unrolling. // // The process of unrolling can produce extraneous basic blocks linked with // unconditional branches. This will be corrected in the future. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; #define DEBUG_TYPE "loop-unroll" // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); static cl::opt UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); /// Convert the instruction operands from referencing the current values into /// those specified by VMap. static inline void remapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); ValueToValueMapTy::iterator It = VMap.find(Op); if (It != VMap.end()) I->setOperand(op, It->second); } if (PHINode *PN = dyn_cast(I)) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); if (It != VMap.end()) PN->setIncomingBlock(i, cast(It->second)); } } } /// Folds a basic block into its predecessor if it only has one predecessor, and /// that predecessor only has one successor. /// The LoopInfo Analysis that is passed will be kept consistent. If folding is /// successful references to the containing loop must be removed from /// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have /// references to the eliminated BB. The argument ForgottenLoops contains a set /// of loops that have already been forgotten to prevent redundant, expensive /// calls to ScalarEvolution::forgetLoop. Returns the new combined block. static BasicBlock * foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, SmallPtrSetImpl &ForgottenLoops, DominatorTree *DT) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. BasicBlock *OnlyPred = BB->getSinglePredecessor(); if (!OnlyPred) return nullptr; if (OnlyPred->getTerminator()->getNumSuccessors() != 1) return nullptr; DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); // Resolve any PHI nodes at the start of the block. They are all // guaranteed to have exactly one entry if they exist, unless there are // multiple duplicate (but guaranteed to be equal) entries for the // incoming edges. This occurs when there are multiple edges from // OnlyPred to OnlySucc. FoldSingleEntryPHINodes(BB); // Delete the unconditional branch from the predecessor... OnlyPred->getInstList().pop_back(); // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(OnlyPred); // Move all definitions in the successor to the predecessor... OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); // OldName will be valid until erased. StringRef OldName = BB->getName(); // Erase the old block and update dominator info. if (DT) if (DomTreeNode *DTN = DT->getNode(BB)) { DomTreeNode *PredDTN = DT->getNode(OnlyPred); SmallVector Children(DTN->begin(), DTN->end()); for (auto *DI : Children) DT->changeImmediateDominator(DI, PredDTN); DT->eraseNode(BB); } // ScalarEvolution holds references to loop exit blocks. if (SE) { if (Loop *L = LI->getLoopFor(BB)) { if (ForgottenLoops.insert(L).second) SE->forgetLoop(L); } } LI->removeBlock(BB); // Inherit predecessor's name if it exists... if (!OldName.empty() && !OnlyPred->hasName()) OnlyPred->setName(OldName); BB->eraseFromParent(); return OnlyPred; } /// Check if unrolling created a situation where we need to insert phi nodes to /// preserve LCSSA form. /// \param Blocks is a vector of basic blocks representing unrolled loop. /// \param L is the outer loop. /// It's possible that some of the blocks are in L, and some are not. In this /// case, if there is a use is outside L, and definition is inside L, we need to /// insert a phi-node, otherwise LCSSA will be broken. /// The function is just a helper function for llvm::UnrollLoop that returns /// true if this situation occurs, indicating that LCSSA needs to be fixed. static bool needToInsertPhisForLCSSA(Loop *L, std::vector Blocks, LoopInfo *LI) { for (BasicBlock *BB : Blocks) { if (LI->getLoopFor(BB) == L) continue; for (Instruction &I : *BB) { for (Use &U : I.operands()) { if (auto Def = dyn_cast(U)) { Loop *DefLoop = LI->getLoopFor(Def->getParent()); if (!DefLoop) continue; if (DefLoop->contains(L)) return true; } } } } return false; } /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary /// and adds a mapping from the original loop to the new loop to NewLoops. /// Returns nullptr if no new loop was created and a pointer to the /// original loop OriginalBB was part of otherwise. const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops) { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(OriginalBB); assert(OldLoop && "Should (at least) be in the loop being unrolled!"); Loop *&NewLoop = NewLoops[OldLoop]; if (!NewLoop) { // Found a new sub-loop. assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); + NewLoop = new Loop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return OldLoop; } else { NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return nullptr; } } /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// /// TripCount is the upper bound of the iteration on which control exits /// LatchBlock. Control may exit the loop prior to TripCount iterations either /// via an early branch in other loop block or via LatchBlock terminator. This /// is relaxed from the general definition of trip count which is the number of /// times the loop header executes. Note that UnrollLoop assumes that the loop /// counter test is in LatchBlock in order to remove unnecesssary instances of /// the test. If control can exit the loop from the LatchBlock's terminator /// prior to TripCount iterations, flag PreserveCondBr needs to be set. /// /// PreserveCondBr indicates whether the conditional branch of the LatchBlock /// needs to be preserved. It is needed when we use trip count upper bound to /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first /// conditional branch needs to be preserved. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that /// have a runtime (i.e. not compile time constant) trip count. Unrolling these /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" /// iterations before branching into the unrolled loop. UnrollLoop will not /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// /// If we want to perform PGO-based loop peeling, PeelCount is set to the /// number of iterations we want to peel off. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } // Loops with indirectbr cannot be cloned. if (!L->isSafeToClone()) { DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); return false; } BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Won't unroll loop: address of header block is taken.\n"); return false; } if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. if (TripCount != 0 && Count > TripCount) Count = TripCount; // Don't enter the unroll code if there is nothing to do. if (TripCount == 0 && Count < 2 && PeelCount == 0) return false; assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); std::vector OriginalLoopBlocks = L->getBlocks(); // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which // means that complete unrolling might break this form. We need to either fix // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For // now we just recompute LCSSA for the outer loop, but it should be possible // to fix it in-place. bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && any_of(ExitBlocks, [](const BasicBlock *BB) { return isa(BB->begin()); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); assert((!RuntimeTripCount || !PeelCount) && "Did not expect runtime trip-count unrolling " "and peeling for the same loop"); if (PeelCount) peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); // Loops containing convergent instructions must have a count that divides // their TripMultiple. DEBUG( { bool HasConvergent = false; for (auto &BB : L->blocks()) for (auto &I : *BB) if (auto CS = CallSite(&I)) HasConvergent |= CS.isConvergent(); assert((!HasConvergent || TripMultiple % Count == 0) && "Unroll count must divide trip multiple if loop contains a " "convergent operation."); }); if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, UnrollRuntimeEpilog, LI, SE, DT, PreserveLCSSA)) { if (Force) RuntimeTripCount = false; else return false; } // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. if (SE) SE->forgetLoop(L); // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { BreakoutTrip = TripCount % Count; TripMultiple = 0; } else { // Figure out what multiple to use. BreakoutTrip = TripMultiple = (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), L->getHeader()) << "completely unrolled loop with " << NV("UnrollCount", TripCount) << " iterations"); } else if (PeelCount) { DEBUG(dbgs() << "PEELING loop %" << Header->getName() << " with iteration count " << PeelCount << "!\n"); ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), L->getHeader()) << " peeled loop by " << NV("PeelCount", PeelCount) << " iterations"); } else { OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), L->getHeader()); Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count); DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); ORE->emit(Diag << " with a breakout at trip " << NV("BreakoutTrip", BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple) << " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); ORE->emit(Diag << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. ValueToValueMapTy LastValueMap; std::vector OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { OrigPHINode.push_back(cast(I)); } std::vector Headers; std::vector Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. LoopBlocksDFS DFS(L); DFS.perform(LI); // Stash the DFS iterators before adding blocks to the loop. LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); std::vector UnrolledLoopBlocks = L->getBlocks(); // Loop Unrolling might create new loops. While we do preserve LoopInfo, we // might break loop-simplified form for these loops (as they, e.g., would // share the same exit blocks). We'll keep track of loops for which we can // break this so that later we can re-simplify them. SmallSetVector LoopsToSimplify; for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); for (unsigned It = 1; It != Count; ++It) { std::vector NewBlocks; SmallDenseMap NewLoops; NewLoops[L] = L; for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Tell LI about New. if (*BB == Header) { assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); if (OldLoop) { LoopsToSimplify.insert(NewLoops[OldLoop]); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } } if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. for (PHINode *OrigPHI : OrigPHINode) { PHINode *NewPHI = cast(VMap[OrigPHI]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; VMap[OrigPHI] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; // Add phi entries for newly created values to all exit blocks. for (BasicBlock *Succ : successors(*BB)) { if (L->contains(Succ)) continue; for (BasicBlock::iterator BBI = Succ->begin(); PHINode *phi = dyn_cast(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; phi->addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); if (*BB == LatchBlock) Latches.push_back(New); NewBlocks.push_back(New); UnrolledLoopBlocks.push_back(New); // Update DomTree: since we just copy the loop body, and each copy has a // dedicated entry block (copy of the header block), this header's copy // dominates all copied blocks. That means, dominance relations in the // copied body are the same as in the original body. if (DT) { if (*BB == Header) DT->addNewBlock(New, Latches[It - 1]); else { auto BBDomNode = DT->getNode(*BB); auto BBIDom = BBDomNode->getIDom(); BasicBlock *OriginalBBIDom = BBIDom->getBlock(); DT->addNewBlock( New, cast(LastValueMap[cast(OriginalBBIDom)])); } } } // Remap all instructions in the most recent iteration for (BasicBlock *NewBlock : NewBlocks) { for (Instruction &I : *NewBlock) { ::remapInstruction(&I, LastValueMap); if (auto *II = dyn_cast(&I)) if (II->getIntrinsicID() == Intrinsic::assume) AC->registerAssumption(II); } } } // Loop over the PHI nodes in the original block, setting incoming values. for (PHINode *PN : OrigPHINode) { if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } else if (Count > 1) { Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast(InVal)) { if (L->contains(InValI)) InVal = LastValueMap[InVal]; } assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); PN->addIncoming(InVal, Latches.back()); } } // Now that all the basic blocks for the unrolled iterations are in place, // set up the branches to connect them. for (unsigned i = 0, e = Latches.size(); i != e; ++i) { // The original branch was replicated in each unrolled iteration. BranchInst *Term = cast(Latches[i]->getTerminator()); // The branch destination. unsigned j = (i + 1) % e; BasicBlock *Dest = Headers[j]; bool NeedConditional = true; if (RuntimeTripCount && j != 0) { NeedConditional = false; } // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll) { if (j == 0) Dest = LoopExit; // If using trip count upper bound to completely unroll, we need to keep // the conditional branch except the last one because the loop may exit // after any iteration. assert(NeedConditional && "NeedCondition cannot be modified by both complete " "unrolling and runtime unrolling"); NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0)); } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { // If we know the trip count or a multiple of it, we can safely use an // unconditional branch for some iterations. NeedConditional = false; } if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; for (BasicBlock *Succ: successors(BB)) { if (Succ == Headers[i]) continue; for (BasicBlock::iterator BBI = Succ->begin(); PHINode *Phi = dyn_cast(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } } } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); } } // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied // iterations too. Thus, the new idom of the block will be the nearest // common dominator of the previous idom and common dominator of all copies of // the previous idom. This is equivalent to the nearest common dominator of // the previous idom and the first latch, which dominates all copies of the // previous idom. if (DT && Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); SmallVector ChildrenToUpdate; for (auto *ChildDomNode : BBDomNode->getChildren()) { auto *ChildBB = ChildDomNode->getBlock(); if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } // Merge adjacent basic blocks, if possible. SmallPtrSet ForgottenLoops; for (BasicBlock *Latch : Latches) { BranchInst *Term = cast(Latch->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), UnrolledLoopBlocks.end(), Dest), UnrolledLoopBlocks.end()); } } } // FIXME: We only preserve DT info for complete unrolling now. Incrementally // updating domtree after partial loop unrolling should also be easy. if (DT && !CompletelyUnroll) DT->recalculate(*L->getHeader()->getParent()); else if (DT) DEBUG(DT->verifyDomTree()); // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { SmallVector DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already // identified. Any remaining should be cleaned up below. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null(&*DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); } // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. const DataLayout &DL = Header->getModule()->getDataLayout(); const std::vector &NewLoopBlocks = L->getBlocks(); for (BasicBlock *BB : NewLoopBlocks) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = &*I++; if (Value *V = SimplifyInstruction(Inst, DL)) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) BB->getInstList().erase(Inst); } } // TODO: after peeling or unrolling, previously loop variant conditions are // likely to fold to constants, eagerly propagating those here will require // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be // appropriate. NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Loop *OuterL = L->getParentLoop(); // Update LoopInfo if the loop is completely removed. if (CompletelyUnroll) LI->markAsRemoved(L); // After complete unrolling most of the blocks should be contained in OuterL. // However, some of them might happen to be out of OuterL (e.g. if they // precede a loop exit). In this case we might need to insert PHI nodes in // order to preserve LCSSA form. // We don't need to check this if we already know that we need to fix LCSSA // form. // TODO: For now we just recompute LCSSA for the outer loop in this case, but // it should be possible to fix it in-place. if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA) NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI); // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { // OuterL includes all loops for which we can break loop-simplify, so // it's sufficient to simplify only it (it'll recursively simplify inner // loops too). // TODO: That potentially might be compile-time expensive. We should try // to fix the loop-simplified form incrementally. simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after // LoopInfo's been updated by markAsRemoved. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); if (NeedToFixLCSSA) formLCSSARecursively(*OuterL, *DT, LI, SE); else assert(OuterL->isLCSSAForm(*DT) && "Loops should be in LCSSA form after loop-unroll."); } else { // Simplify loops for which we might've broken loop-simplify form. for (Loop *SubLoop : LoopsToSimplify) simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA); } } return true; } /// Given an llvm.loop loop id metadata node, returns the loop hint metadata /// node with the given name (for example, "llvm.loop.unroll.count"). If no /// such metadata node exists, then nullptr is returned. MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { // First operand should refer to the loop id itself. assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { MDNode *MD = dyn_cast(LoopID->getOperand(i)); if (!MD) continue; MDString *S = dyn_cast(MD->getOperand(0)); if (!S) continue; if (Name.equals(S->getString())) return MD; } return nullptr; } Index: projects/clang400-import/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp (revision 313894) @@ -1,693 +1,698 @@ //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements some loop unrolling utilities for loops with run-time // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time // trip counts. // // The functions in this file are used to generate extra code when the // run-time trip count modulo the unroll factor is not 0. When this is the // case, we need to generate code to execute these 'left over' iterations. // // The current strategy generates an if-then-else sequence prior to the // unrolled loop to execute the 'left over' iterations before or after the // unrolled loop. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include using namespace llvm; #define DEBUG_TYPE "loop-unroll" STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Create PHI nodes at prolog end block to combine values /// that exit the prolog code and jump around the prolog. /// - Add a PHI operand to a PHI node at the loop exit block /// for values that exit the prolog and go around the loop. /// - Branch around the original loop if the trip count is less /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast(VMap[Latch]); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add a new PHI node to the prolog end block and add the // appropriate incoming values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. if (L->contains(PN)) { NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); } else { NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); } Value *V = PN->getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast(V)) { if (L->contains(I)) { V = VMap.lookup(I); } } // Adding a value to the new PHI node from the last prolog block // that was created. NewPN->addIncoming(V, PrologLatch); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. if (L->contains(PN)) { PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); } else { PN->addIncoming(NewPN, PrologExit); } } } // Make sure that created prolog loop is in simplified form SmallVector PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, PreserveLCSSA); } // Create a branch around the original loop, which is taken if there are no // iterations remaining to be executed after running the prologue. Instruction *InsertPt = PrologExit->getTerminator(); IRBuilder<> B(InsertPt); assert(Count != 0 && "nonsensical Count!"); // If BECount getType(), Count - 1)); BasicBlock *Exit = L->getUniqueExitBlock(); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); InsertPt->eraseFromParent(); } /// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the /// unroll count is non-zero. /// /// This function performs the following: /// - Update PHI nodes at the unrolling loop exit and epilog loop exit /// - Create PHI nodes at the unrolling loop exit to combine /// values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast(VMap[Latch]); // Loop structure should be the following: // // PreHeader // NewPreHeader // Header // ... // Latch // NewExit (PN) // EpilogPreHeader // EpilogHeader // ... // EpilogLatch // Exit (EpilogPN) // Update PHI nodes at NewExit and Exit. for (Instruction &BBI : *NewExit) { PHINode *PN = dyn_cast(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit // Basicaly it should look like: // NewExit: // PN = PHI [I, Latch] // ... // Exit: // EpilogPN = PHI [PN, EpilogPreHeader] // // There is EpilogPreHeader incoming block instead of NewExit as // NewExit was spilt 1 more time to get EpilogPreHeader. assert(PN->hasOneUse() && "The phi should have 1 use"); PHINode *EpilogPN = cast (PN->use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); // Add incoming PreHeader from branch around the Loop PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); Value *V = PN->getIncomingValueForBlock(Latch); Instruction *I = dyn_cast(V); if (I && L->contains(I)) // If value comes from an instruction in the loop add VMap value. V = VMap.lookup(I); // For the instruction out of the loop, constant or undefined value // insert value itself. EpilogPN->addIncoming(V, EpilogLatch); assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && "EpilogPN should have EpilogPreHeader incoming block"); // Change EpilogPreHeader incoming block to NewExit. EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), NewExit); // Now PHIs should look like: // NewExit: // PN = PHI [I, Latch], [undef, PreHeader] // ... // Exit: // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] } // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). // Update corresponding PHI nodes in epilog loop. for (BasicBlock *Succ : successors(Latch)) { // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; for (Instruction &BBI : *Succ) { PHINode *PN = dyn_cast(&BBI); // Exit when we passed all PHI nodes. if (!PN) break; // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", NewExit->getFirstNonPHI()); // Adding a value to the new PHI node from the unrolling loop preheader. NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast(VMap[&BBI]); VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); } } Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); } /// Create a clone of the blocks in a loop and connect them together. /// If CreateRemainderLoop is false, loop structure will not be cloned, /// otherwise a new loop will be created including all cloned blocks, and the /// iterator of it switches to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. /// static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); if (CreateRemainderLoop) { NewLoop = new Loop(); if (ParentLoop) ParentLoop->addChildLoop(NewLoop); else LI->addTopLevelLoop(NewLoop); } NewLoopsMap NewLoops; - NewLoops[L] = NewLoop; + if (NewLoop) + NewLoops[L] = NewLoop; + else if (ParentLoop) + NewLoops[L] = ParentLoop; + // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - - if (NewLoop) { + + // If we're unrolling the outermost loop, there's no remainder loop, + // and this block isn't in a nested loop, then the new block is not + // in any loop. Otherwise, add it to loopinfo. + if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); - } else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { // For the first block, add a CFG connection to this newly // created block. InsertTop->getTerminator()->setSuccessor(0, NewBB); } if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast(VMap[Header]); BranchInst *LatchBR = cast(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); if (!CreateRemainderLoop) { Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".sub"); Value *IdxCmp = Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } LatchBR->eraseFromParent(); } } // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { PHINode *NewPHI = cast(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast(VMap[Header])->getInstList().erase(NewPHI); } } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); NewPHI->setIncomingBlock(idx, InsertTop); BasicBlock *NewLatch = cast(VMap[Latch]); idx = NewPHI->getBasicBlockIndex(Latch); Value *InVal = NewPHI->getIncomingValue(idx); NewPHI->setIncomingBlock(idx, NewLatch); if (Value *V = VMap.lookup(InVal)) NewPHI->setIncomingValue(idx, V); } } if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector MDs; // Reserve first location for self reference to the LoopID metadata node. MDs.push_back(nullptr); MDNode *LoopID = NewLoop->getLoopID(); if (LoopID) { // First remove any existing loop unrolling metadata. for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { bool IsUnrollMetadata = false; MDNode *MD = dyn_cast(LoopID->getOperand(i)); if (MD) { const MDString *S = dyn_cast(MD->getOperand(0)); IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); } if (!IsUnrollMetadata) MDs.push_back(LoopID->getOperand(i)); } } LLVMContext &Context = NewLoop->getHeader()->getContext(); SmallVector DisableOperands; DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); MDNode *DisableNode = MDNode::get(Context, DisableOperands); MDs.push_back(DisableNode); MDNode *NewLoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); } } /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// /// This method assumes that the loop unroll factor is total number /// of loop bodies in the loop after unrolling. (Some folks refer /// to the unroll factor as the number of *extra* copies added). /// We assume also that the loop unroll factor is a power-of-two. So, after /// unrolling the loop, the number of loop bodies executed is 2, /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for /// the switch instruction is generated. /// /// ***Prolog case*** /// extraiters = tripcount % loopfactor /// if (extraiters == 0) jump Loop: /// else jump Prol: /// Prol: LoopBody; /// extraiters -= 1 // Omitted if unroll factor is 2. /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. /// if (tripcount < loopfactor) jump End: /// Loop: /// ... /// End: /// /// ***Epilog case*** /// extraiters = tripcount % loopfactor /// if (tripcount < loopfactor) jump LoopExit: /// unroll_iters = tripcount - extraiters /// Loop: LoopBody; (executes unroll_iter times); /// unroll_iter -= 1 /// if (unroll_iter != 0) jump Loop: /// LoopExit: /// if (extraiters == 0) jump EpilExit: /// Epil: LoopBody; (executes extraiters times) /// extraiters -= 1 // Omitted if unroll factor is 2. /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. /// EpilExit: bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm()) return false; BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop if (!Exit) return false; // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) return false; // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). const SCEV *BECountSC = SE->getBackedgeTakenCount(L); if (isa(BECountSC) || !BECountSC->getType()->isIntegerTy()) return false; unsigned BEWidth = cast(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa(TripCountSC)) return false; BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) return false; BasicBlock *Latch = L->getLoopLatch(); // Loop structure is the following: // // PreHeader // Header // ... // Latch // Exit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; BasicBlock *PrologExit = nullptr; BasicBlock *EpilogPreHeader = nullptr; BasicBlock *PrologPreHeader = nullptr; if (UseEpilogRemainder) { // If epilog remainder // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); // Split Exit to create phi nodes from branch above. SmallVector Preds(predecessors(Exit)); NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder // Split the original preheader twice to insert prolog remainder loop PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); PrologPreHeader->setName(Header->getName() + ".prol.preheader"); PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), DT, LI); PrologExit->setName(Header->getName() + ".prol.loopexit"); // Split PrologExit to get NewPreHeader. NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); } // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // *NewPreHeader *PrologPreHeader // Header *PrologExit // ... *NewPreHeader // Latch Header // *NewExit ... // *EpilogPreHeader Latch // Exit Exit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % loop unroll factor PreHeaderBR = cast(PreHeader->getTerminator()); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); IRBuilder<> B(PreHeaderBR); Value *ModVal; // Calculate ModVal = (BECount + 1) % Count. // Note that TripCount is BECount + 1. if (isPowerOf2_32(Count)) { // When Count is power of 2 we don't BECount for epilog case, however we'll // need it for a branch around unrolling loop for prolog case. ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); // 1. There are no iterations to be run in the prolog/epilog loop. // OR // 2. The addition computing TripCount overflowed. // // If (2) is true, we know that TripCount really is (1 << BEWidth) and so // the number of iterations that remain to be run in the original loop is a // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we // explicitly check this above). } else { // As (BECount + 1) can potentially unsigned overflow we count // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. Value *ModValTmp = B.CreateURem(BECount, ConstantInt::get(BECount->getType(), Count)); Value *ModValAdd = B.CreateAdd(ModValTmp, ConstantInt::get(ModValTmp->getType(), 1)); // At that point (BECount % Count) + 1 could be equal to Count. // To handle this case we need to take mod by Count one more time. ModVal = B.CreateURem(ModValAdd, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } Value *BranchVal = UseEpilogRemainder ? B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // std::vector NewBlocks; ValueToValueMapTy VMap; // For unroll factor 2 remainder loop will have 1 iterations. // Do not create 1 iteration loop. bool CreateRemainderLoop = (Count != 2); // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); // Loop structure should be the following: // Epilog Prolog // // PreHeader PreHeader // NewPreHeader PrologPreHeader // Header PrologHeader // ... ... // Latch PrologLatch // NewExit PrologExit // EpilogPreHeader NewPreHeader // EpilogHeader Header // ... ... // EpilogLatch Latch // Exit Exit // Rewrite the cloned instruction operands to use the values created when the // clone is created. for (BasicBlock *BB : NewBlocks) { for (Instruction &I : *BB) { RemapInstruction(&I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); } } if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); // Update counter in loop for unrolling. // I should be multiply of Count. IRBuilder<> B2(NewPreHeader->getTerminator()); Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); BranchInst *LatchBR = cast(Latch->getTerminator()); B2.SetInsertPoint(LatchBR); PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", Header->getFirstNonPHI()); Value *IdxSub = B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), NewIdx->getName() + ".nsub"); Value *IdxCmp; if (LatchBR->getSuccessor(0) == Header) IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp"); else IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); NewIdx->addIncoming(TestVal, NewPreHeader); NewIdx->addIncoming(IdxSub, Latch); LatchBR->setCondition(IdxCmp); } else { // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); NumRuntimeUnrolled++; return true; } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp (revision 313894) @@ -1,10540 +1,10560 @@ //===--- ExprConstant.cpp - Expression Constant Evaluator -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. // // Constant expression evaluation produces four main results: // // * A success/failure flag indicating whether constant folding was successful. // This is the 'bool' return value used by most of the code in this file. A // 'false' return value indicates that constant folding has failed, and any // appropriate diagnostic has already been produced. // // * An evaluated result, valid only if constant folding has not failed. // // * A flag indicating if evaluation encountered (unevaluated) side-effects. // These arise in cases such as (sideEffect(), 0) and (sideEffect() || 1), // where it is possible to determine the evaluated result regardless. // // * A set of notes indicating why the evaluation was not a constant expression // (under the C++11 / C++1y rules only, at the moment), or, if folding failed // too, why the expression could not be folded. // // If we are checking for a potential constant expression, failure to constant // fold a potential constant sub-expression will be indicated by a 'false' // return value (the expression could not be folded) and no diagnostic (the // expression is not necessarily non-constant). // //===----------------------------------------------------------------------===// #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetInfo.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace clang; using llvm::APSInt; using llvm::APFloat; static bool IsGlobalLValue(APValue::LValueBase B); namespace { struct LValue; struct CallStackFrame; struct EvalInfo; static QualType getType(APValue::LValueBase B) { if (!B) return QualType(); if (const ValueDecl *D = B.dyn_cast()) return D->getType(); const Expr *Base = B.get(); // For a materialized temporary, the type of the temporary we materialized // may not be the type of the expression. if (const MaterializeTemporaryExpr *MTE = dyn_cast(Base)) { SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Temp = MTE->GetTemporaryExpr(); const Expr *Inner = Temp->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); // Keep any cv-qualifiers from the reference if we generated a temporary // for it directly. Otherwise use the type after adjustment. if (!Adjustments.empty()) return Inner->getType(); } return Base->getType(); } /// Get an LValue path entry, which is known to not be an array index, as a /// field or base class. static APValue::BaseOrMemberType getAsBaseOrMember(APValue::LValuePathEntry E) { APValue::BaseOrMemberType Value; Value.setFromOpaqueValue(E.BaseOrMember); return Value; } /// Get an LValue path entry, which is known to not be an array index, as a /// field declaration. static const FieldDecl *getAsField(APValue::LValuePathEntry E) { return dyn_cast(getAsBaseOrMember(E).getPointer()); } /// Get an LValue path entry, which is known to not be an array index, as a /// base class declaration. static const CXXRecordDecl *getAsBaseClass(APValue::LValuePathEntry E) { return dyn_cast(getAsBaseOrMember(E).getPointer()); } /// Determine whether this LValue path entry for a base class names a virtual /// base class. static bool isVirtualBaseClass(APValue::LValuePathEntry E) { return getAsBaseOrMember(E).getInt(); } /// Given a CallExpr, try to get the alloc_size attribute. May return null. static const AllocSizeAttr *getAllocSizeAttr(const CallExpr *CE) { const FunctionDecl *Callee = CE->getDirectCallee(); return Callee ? Callee->getAttr() : nullptr; } /// Attempts to unwrap a CallExpr (with an alloc_size attribute) from an Expr. /// This will look through a single cast. /// /// Returns null if we couldn't unwrap a function with alloc_size. static const CallExpr *tryUnwrapAllocSizeCall(const Expr *E) { if (!E->getType()->isPointerType()) return nullptr; E = E->IgnoreParens(); // If we're doing a variable assignment from e.g. malloc(N), there will // probably be a cast of some kind. Ignore it. if (const auto *Cast = dyn_cast(E)) E = Cast->getSubExpr()->IgnoreParens(); if (const auto *CE = dyn_cast(E)) return getAllocSizeAttr(CE) ? CE : nullptr; return nullptr; } /// Determines whether or not the given Base contains a call to a function /// with the alloc_size attribute. static bool isBaseAnAllocSizeCall(APValue::LValueBase Base) { const auto *E = Base.dyn_cast(); return E && E->getType()->isPointerType() && tryUnwrapAllocSizeCall(E); } /// Determines if an LValue with the given LValueBase will have an unsized /// array in its designator. /// Find the path length and type of the most-derived subobject in the given /// path, and find the size of the containing array, if any. static unsigned findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base, ArrayRef Path, uint64_t &ArraySize, QualType &Type, bool &IsArray) { // This only accepts LValueBases from APValues, and APValues don't support // arrays that lack size info. assert(!isBaseAnAllocSizeCall(Base) && "Unsized arrays shouldn't appear here"); unsigned MostDerivedLength = 0; Type = getType(Base); for (unsigned I = 0, N = Path.size(); I != N; ++I) { if (Type->isArrayType()) { const ConstantArrayType *CAT = cast(Ctx.getAsArrayType(Type)); Type = CAT->getElementType(); ArraySize = CAT->getSize().getZExtValue(); MostDerivedLength = I + 1; IsArray = true; } else if (Type->isAnyComplexType()) { const ComplexType *CT = Type->castAs(); Type = CT->getElementType(); ArraySize = 2; MostDerivedLength = I + 1; IsArray = true; } else if (const FieldDecl *FD = getAsField(Path[I])) { Type = FD->getType(); ArraySize = 0; MostDerivedLength = I + 1; IsArray = false; } else { // Path[I] describes a base class. ArraySize = 0; IsArray = false; } } return MostDerivedLength; } // The order of this enum is important for diagnostics. enum CheckSubobjectKind { CSK_Base, CSK_Derived, CSK_Field, CSK_ArrayToPointer, CSK_ArrayIndex, CSK_This, CSK_Real, CSK_Imag }; /// A path from a glvalue to a subobject of that glvalue. struct SubobjectDesignator { /// True if the subobject was named in a manner not supported by C++11. Such /// lvalues can still be folded, but they are not core constant expressions /// and we cannot perform lvalue-to-rvalue conversions on them. unsigned Invalid : 1; /// Is this a pointer one past the end of an object? unsigned IsOnePastTheEnd : 1; /// Indicator of whether the first entry is an unsized array. unsigned FirstEntryIsAnUnsizedArray : 1; /// Indicator of whether the most-derived object is an array element. unsigned MostDerivedIsArrayElement : 1; /// The length of the path to the most-derived object of which this is a /// subobject. unsigned MostDerivedPathLength : 28; /// The size of the array of which the most-derived object is an element. /// This will always be 0 if the most-derived object is not an array /// element. 0 is not an indicator of whether or not the most-derived object /// is an array, however, because 0-length arrays are allowed. /// /// If the current array is an unsized array, the value of this is /// undefined. uint64_t MostDerivedArraySize; /// The type of the most derived object referred to by this address. QualType MostDerivedType; typedef APValue::LValuePathEntry PathEntry; /// The entries on the path from the glvalue to the designated subobject. SmallVector Entries; SubobjectDesignator() : Invalid(true) {} explicit SubobjectDesignator(QualType T) : Invalid(false), IsOnePastTheEnd(false), FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0), MostDerivedType(T) {} SubobjectDesignator(ASTContext &Ctx, const APValue &V) : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false), FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0) { assert(V.isLValue() && "Non-LValue used to make an LValue designator?"); if (!Invalid) { IsOnePastTheEnd = V.isLValueOnePastTheEnd(); ArrayRef VEntries = V.getLValuePath(); Entries.insert(Entries.end(), VEntries.begin(), VEntries.end()); if (V.getLValueBase()) { bool IsArray = false; MostDerivedPathLength = findMostDerivedSubobject( Ctx, V.getLValueBase(), V.getLValuePath(), MostDerivedArraySize, MostDerivedType, IsArray); MostDerivedIsArrayElement = IsArray; } } } void setInvalid() { Invalid = true; Entries.clear(); } /// Determine whether the most derived subobject is an array without a /// known bound. bool isMostDerivedAnUnsizedArray() const { assert(!Invalid && "Calling this makes no sense on invalid designators"); return Entries.size() == 1 && FirstEntryIsAnUnsizedArray; } /// Determine what the most derived array's size is. Results in an assertion /// failure if the most derived array lacks a size. uint64_t getMostDerivedArraySize() const { assert(!isMostDerivedAnUnsizedArray() && "Unsized array has no size"); return MostDerivedArraySize; } /// Determine whether this is a one-past-the-end pointer. bool isOnePastTheEnd() const { assert(!Invalid); if (IsOnePastTheEnd) return true; if (!isMostDerivedAnUnsizedArray() && MostDerivedIsArrayElement && Entries[MostDerivedPathLength - 1].ArrayIndex == MostDerivedArraySize) return true; return false; } /// Check that this refers to a valid subobject. bool isValidSubobject() const { if (Invalid) return false; return !isOnePastTheEnd(); } /// Check that this refers to a valid subobject, and if not, produce a /// relevant diagnostic and set the designator as invalid. bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK); /// Update this designator to refer to the first element within this array. void addArrayUnchecked(const ConstantArrayType *CAT) { PathEntry Entry; Entry.ArrayIndex = 0; Entries.push_back(Entry); // This is a most-derived object. MostDerivedType = CAT->getElementType(); MostDerivedIsArrayElement = true; MostDerivedArraySize = CAT->getSize().getZExtValue(); MostDerivedPathLength = Entries.size(); } /// Update this designator to refer to the first element within the array of /// elements of type T. This is an array of unknown size. void addUnsizedArrayUnchecked(QualType ElemTy) { PathEntry Entry; Entry.ArrayIndex = 0; Entries.push_back(Entry); MostDerivedType = ElemTy; MostDerivedIsArrayElement = true; // The value in MostDerivedArraySize is undefined in this case. So, set it // to an arbitrary value that's likely to loudly break things if it's // used. MostDerivedArraySize = std::numeric_limits::max() / 2; MostDerivedPathLength = Entries.size(); } /// Update this designator to refer to the given base or member of this /// object. void addDeclUnchecked(const Decl *D, bool Virtual = false) { PathEntry Entry; APValue::BaseOrMemberType Value(D, Virtual); Entry.BaseOrMember = Value.getOpaqueValue(); Entries.push_back(Entry); // If this isn't a base class, it's a new most-derived object. if (const FieldDecl *FD = dyn_cast(D)) { MostDerivedType = FD->getType(); MostDerivedIsArrayElement = false; MostDerivedArraySize = 0; MostDerivedPathLength = Entries.size(); } } /// Update this designator to refer to the given complex component. void addComplexUnchecked(QualType EltTy, bool Imag) { PathEntry Entry; Entry.ArrayIndex = Imag; Entries.push_back(Entry); // This is technically a most-derived object, though in practice this // is unlikely to matter. MostDerivedType = EltTy; MostDerivedIsArrayElement = true; MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, uint64_t N); /// Add N to the address of this subobject. void adjustIndex(EvalInfo &Info, const Expr *E, uint64_t N) { if (Invalid) return; if (isMostDerivedAnUnsizedArray()) { // Can't verify -- trust that the user is doing the right thing (or if // not, trust that the caller will catch the bad behavior). Entries.back().ArrayIndex += N; return; } if (MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement) { Entries.back().ArrayIndex += N; if (Entries.back().ArrayIndex > getMostDerivedArraySize()) { diagnosePointerArithmetic(Info, E, Entries.back().ArrayIndex); setInvalid(); } return; } // [expr.add]p4: For the purposes of these operators, a pointer to a // nonarray object behaves the same as a pointer to the first element of // an array of length one with the type of the object as its element type. if (IsOnePastTheEnd && N == (uint64_t)-1) IsOnePastTheEnd = false; else if (!IsOnePastTheEnd && N == 1) IsOnePastTheEnd = true; else if (N != 0) { diagnosePointerArithmetic(Info, E, uint64_t(IsOnePastTheEnd) + N); setInvalid(); } } }; /// A stack frame in the constexpr call stack. struct CallStackFrame { EvalInfo &Info; /// Parent - The caller of this stack frame. CallStackFrame *Caller; /// Callee - The function which was called. const FunctionDecl *Callee; /// This - The binding for the this pointer in this call, if any. const LValue *This; /// Arguments - Parameter bindings for this function call, indexed by /// parameters' function scope indices. APValue *Arguments; // Note that we intentionally use std::map here so that references to // values are stable. typedef std::map MapTy; typedef MapTy::const_iterator temp_iterator; /// Temporaries - Temporary lvalues materialized within this stack frame. MapTy Temporaries; /// CallLoc - The location of the call expression for this call. SourceLocation CallLoc; /// Index - The call index of this call. unsigned Index; CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments); ~CallStackFrame(); APValue *getTemporary(const void *Key) { MapTy::iterator I = Temporaries.find(Key); return I == Temporaries.end() ? nullptr : &I->second; } APValue &createTemporary(const void *Key, bool IsLifetimeExtended); }; /// Temporarily override 'this'. class ThisOverrideRAII { public: ThisOverrideRAII(CallStackFrame &Frame, const LValue *NewThis, bool Enable) : Frame(Frame), OldThis(Frame.This) { if (Enable) Frame.This = NewThis; } ~ThisOverrideRAII() { Frame.This = OldThis; } private: CallStackFrame &Frame; const LValue *OldThis; }; /// A partial diagnostic which we might know in advance that we are not going /// to emit. class OptionalDiagnostic { PartialDiagnostic *Diag; public: explicit OptionalDiagnostic(PartialDiagnostic *Diag = nullptr) : Diag(Diag) {} template OptionalDiagnostic &operator<<(const T &v) { if (Diag) *Diag << v; return *this; } OptionalDiagnostic &operator<<(const APSInt &I) { if (Diag) { SmallVector Buffer; I.toString(Buffer); *Diag << StringRef(Buffer.data(), Buffer.size()); } return *this; } OptionalDiagnostic &operator<<(const APFloat &F) { if (Diag) { // FIXME: Force the precision of the source value down so we don't // print digits which are usually useless (we don't really care here if // we truncate a digit by accident in edge cases). Ideally, // APFloat::toString would automatically print the shortest // representation which rounds to the correct value, but it's a bit // tricky to implement. unsigned precision = llvm::APFloat::semanticsPrecision(F.getSemantics()); precision = (precision * 59 + 195) / 196; SmallVector Buffer; F.toString(Buffer, precision); *Diag << StringRef(Buffer.data(), Buffer.size()); } return *this; } }; /// A cleanup, and a flag indicating whether it is lifetime-extended. class Cleanup { llvm::PointerIntPair Value; public: Cleanup(APValue *Val, bool IsLifetimeExtended) : Value(Val, IsLifetimeExtended) {} bool isLifetimeExtended() const { return Value.getInt(); } void endLifetime() { *Value.getPointer() = APValue(); } }; /// EvalInfo - This is a private struct used by the evaluator to capture /// information about a subexpression as it is folded. It retains information /// about the AST context, but also maintains information about the folded /// expression. /// /// If an expression could be evaluated, it is still possible it is not a C /// "integer constant expression" or constant expression. If not, this struct /// captures information about how and why not. /// /// One bit of information passed *into* the request for constant folding /// indicates whether the subexpression is "evaluated" or not according to C /// rules. For example, the RHS of (0 && foo()) is not evaluated. We can /// evaluate the expression regardless of what the RHS is, but C only allows /// certain things in certain situations. struct LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EvalInfo { ASTContext &Ctx; /// EvalStatus - Contains information about the evaluation. Expr::EvalStatus &EvalStatus; /// CurrentCall - The top of the constexpr call stack. CallStackFrame *CurrentCall; /// CallStackDepth - The number of calls in the call stack right now. unsigned CallStackDepth; /// NextCallIndex - The next call index to assign. unsigned NextCallIndex; /// StepsLeft - The remaining number of evaluation steps we're permitted /// to perform. This is essentially a limit for the number of statements /// we will evaluate. unsigned StepsLeft; /// BottomFrame - The frame in which evaluation started. This must be /// initialized after CurrentCall and CallStackDepth. CallStackFrame BottomFrame; /// A stack of values whose lifetimes end at the end of some surrounding /// evaluation frame. llvm::SmallVector CleanupStack; /// EvaluatingDecl - This is the declaration whose initializer is being /// evaluated, if any. APValue::LValueBase EvaluatingDecl; /// EvaluatingDeclValue - This is the value being constructed for the /// declaration whose initializer is being evaluated, if any. APValue *EvaluatingDeclValue; /// The current array initialization index, if we're performing array /// initialization. uint64_t ArrayInitIndex = -1; /// HasActiveDiagnostic - Was the previous diagnostic stored? If so, further /// notes attached to it will also be stored, otherwise they will not be. bool HasActiveDiagnostic; /// \brief Have we emitted a diagnostic explaining why we couldn't constant /// fold (not just why it's not strictly a constant expression)? bool HasFoldFailureDiagnostic; /// \brief Whether or not we're currently speculatively evaluating. bool IsSpeculativelyEvaluating; enum EvaluationMode { /// Evaluate as a constant expression. Stop if we find that the expression /// is not a constant expression. EM_ConstantExpression, /// Evaluate as a potential constant expression. Keep going if we hit a /// construct that we can't evaluate yet (because we don't yet know the /// value of something) but stop if we hit something that could never be /// a constant expression. EM_PotentialConstantExpression, /// Fold the expression to a constant. Stop if we hit a side-effect that /// we can't model. EM_ConstantFold, /// Evaluate the expression looking for integer overflow and similar /// issues. Don't worry about side-effects, and try to visit all /// subexpressions. EM_EvaluateForOverflow, /// Evaluate in any way we know how. Don't worry about side-effects that /// can't be modeled. EM_IgnoreSideEffects, /// Evaluate as a constant expression. Stop if we find that the expression /// is not a constant expression. Some expressions can be retried in the /// optimizer if we don't constant fold them here, but in an unevaluated /// context we try to fold them immediately since the optimizer never /// gets a chance to look at it. EM_ConstantExpressionUnevaluated, /// Evaluate as a potential constant expression. Keep going if we hit a /// construct that we can't evaluate yet (because we don't yet know the /// value of something) but stop if we hit something that could never be /// a constant expression. Some expressions can be retried in the /// optimizer if we don't constant fold them here, but in an unevaluated /// context we try to fold them immediately since the optimizer never /// gets a chance to look at it. EM_PotentialConstantExpressionUnevaluated, - /// Evaluate as a constant expression. Continue evaluating if either: - /// - We find a MemberExpr with a base that can't be evaluated. - /// - We find a variable initialized with a call to a function that has - /// the alloc_size attribute on it. + /// Evaluate as a constant expression. In certain scenarios, if: + /// - we find a MemberExpr with a base that can't be evaluated, or + /// - we find a variable initialized with a call to a function that has + /// the alloc_size attribute on it + /// then we may consider evaluation to have succeeded. + /// /// In either case, the LValue returned shall have an invalid base; in the /// former, the base will be the invalid MemberExpr, in the latter, the /// base will be either the alloc_size CallExpr or a CastExpr wrapping /// said CallExpr. EM_OffsetFold, } EvalMode; /// Are we checking whether the expression is a potential constant /// expression? bool checkingPotentialConstantExpression() const { return EvalMode == EM_PotentialConstantExpression || EvalMode == EM_PotentialConstantExpressionUnevaluated; } /// Are we checking an expression for overflow? // FIXME: We should check for any kind of undefined or suspicious behavior // in such constructs, not just overflow. bool checkingForOverflow() { return EvalMode == EM_EvaluateForOverflow; } EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), CallStackDepth(0), NextCallIndex(1), StepsLeft(getLangOpts().ConstexprStepLimit), BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), EvaluatingDecl((const ValueDecl *)nullptr), EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), HasFoldFailureDiagnostic(false), IsSpeculativelyEvaluating(false), EvalMode(Mode) {} void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value) { EvaluatingDecl = Base; EvaluatingDeclValue = &Value; } const LangOptions &getLangOpts() const { return Ctx.getLangOpts(); } bool CheckCallLimit(SourceLocation Loc) { // Don't perform any constexpr calls (other than the call we're checking) // when checking a potential constant expression. if (checkingPotentialConstantExpression() && CallStackDepth > 1) return false; if (NextCallIndex == 0) { // NextCallIndex has wrapped around. FFDiag(Loc, diag::note_constexpr_call_limit_exceeded); return false; } if (CallStackDepth <= getLangOpts().ConstexprCallDepth) return true; FFDiag(Loc, diag::note_constexpr_depth_limit_exceeded) << getLangOpts().ConstexprCallDepth; return false; } CallStackFrame *getCallFrame(unsigned CallIndex) { assert(CallIndex && "no call index in getCallFrame"); // We will eventually hit BottomFrame, which has Index 1, so Frame can't // be null in this loop. CallStackFrame *Frame = CurrentCall; while (Frame->Index > CallIndex) Frame = Frame->Caller; return (Frame->Index == CallIndex) ? Frame : nullptr; } bool nextStep(const Stmt *S) { if (!StepsLeft) { FFDiag(S->getLocStart(), diag::note_constexpr_step_limit_exceeded); return false; } --StepsLeft; return true; } private: /// Add a diagnostic to the diagnostics list. PartialDiagnostic &addDiag(SourceLocation Loc, diag::kind DiagId) { PartialDiagnostic PD(DiagId, Ctx.getDiagAllocator()); EvalStatus.Diag->push_back(std::make_pair(Loc, PD)); return EvalStatus.Diag->back().second; } /// Add notes containing a call stack to the current point of evaluation. void addCallStack(unsigned Limit); private: OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId, unsigned ExtraNotes, bool IsCCEDiag) { if (EvalStatus.Diag) { // If we have a prior diagnostic, it will be noting that the expression // isn't a constant expression. This diagnostic is more important, // unless we require this evaluation to produce a constant expression. // // FIXME: We might want to show both diagnostics to the user in // EM_ConstantFold mode. if (!EvalStatus.Diag->empty()) { switch (EvalMode) { case EM_ConstantFold: case EM_IgnoreSideEffects: case EM_EvaluateForOverflow: if (!HasFoldFailureDiagnostic) break; // We've already failed to fold something. Keep that diagnostic. case EM_ConstantExpression: case EM_PotentialConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_PotentialConstantExpressionUnevaluated: case EM_OffsetFold: HasActiveDiagnostic = false; return OptionalDiagnostic(); } } unsigned CallStackNotes = CallStackDepth - 1; unsigned Limit = Ctx.getDiagnostics().getConstexprBacktraceLimit(); if (Limit) CallStackNotes = std::min(CallStackNotes, Limit + 1); if (checkingPotentialConstantExpression()) CallStackNotes = 0; HasActiveDiagnostic = true; HasFoldFailureDiagnostic = !IsCCEDiag; EvalStatus.Diag->clear(); EvalStatus.Diag->reserve(1 + ExtraNotes + CallStackNotes); addDiag(Loc, DiagId); if (!checkingPotentialConstantExpression()) addCallStack(Limit); return OptionalDiagnostic(&(*EvalStatus.Diag)[0].second); } HasActiveDiagnostic = false; return OptionalDiagnostic(); } public: // Diagnose that the evaluation could not be folded (FF => FoldFailure) OptionalDiagnostic FFDiag(SourceLocation Loc, diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, unsigned ExtraNotes = 0) { return Diag(Loc, DiagId, ExtraNotes, false); } OptionalDiagnostic FFDiag(const Expr *E, diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, unsigned ExtraNotes = 0) { if (EvalStatus.Diag) return Diag(E->getExprLoc(), DiagId, ExtraNotes, /*IsCCEDiag*/false); HasActiveDiagnostic = false; return OptionalDiagnostic(); } /// Diagnose that the evaluation does not produce a C++11 core constant /// expression. /// /// FIXME: Stop evaluating if we're in EM_ConstantExpression or /// EM_PotentialConstantExpression mode and we produce one of these. OptionalDiagnostic CCEDiag(SourceLocation Loc, diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, unsigned ExtraNotes = 0) { // Don't override a previous diagnostic. Don't bother collecting // diagnostics if we're evaluating for overflow. if (!EvalStatus.Diag || !EvalStatus.Diag->empty()) { HasActiveDiagnostic = false; return OptionalDiagnostic(); } return Diag(Loc, DiagId, ExtraNotes, true); } OptionalDiagnostic CCEDiag(const Expr *E, diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, unsigned ExtraNotes = 0) { return CCEDiag(E->getExprLoc(), DiagId, ExtraNotes); } /// Add a note to a prior diagnostic. OptionalDiagnostic Note(SourceLocation Loc, diag::kind DiagId) { if (!HasActiveDiagnostic) return OptionalDiagnostic(); return OptionalDiagnostic(&addDiag(Loc, DiagId)); } /// Add a stack of notes to a prior diagnostic. void addNotes(ArrayRef Diags) { if (HasActiveDiagnostic) { EvalStatus.Diag->insert(EvalStatus.Diag->end(), Diags.begin(), Diags.end()); } } /// Should we continue evaluation after encountering a side-effect that we /// couldn't model? bool keepEvaluatingAfterSideEffect() { switch (EvalMode) { case EM_PotentialConstantExpression: case EM_PotentialConstantExpressionUnevaluated: case EM_EvaluateForOverflow: case EM_IgnoreSideEffects: return true; case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: case EM_OffsetFold: return false; } llvm_unreachable("Missed EvalMode case"); } /// Note that we have had a side-effect, and determine whether we should /// keep evaluating. bool noteSideEffect() { EvalStatus.HasSideEffects = true; return keepEvaluatingAfterSideEffect(); } /// Should we continue evaluation after encountering undefined behavior? bool keepEvaluatingAfterUndefinedBehavior() { switch (EvalMode) { case EM_EvaluateForOverflow: case EM_IgnoreSideEffects: case EM_ConstantFold: case EM_OffsetFold: return true; case EM_PotentialConstantExpression: case EM_PotentialConstantExpressionUnevaluated: case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: return false; } llvm_unreachable("Missed EvalMode case"); } /// Note that we hit something that was technically undefined behavior, but /// that we can evaluate past it (such as signed overflow or floating-point /// division by zero.) bool noteUndefinedBehavior() { EvalStatus.HasUndefinedBehavior = true; return keepEvaluatingAfterUndefinedBehavior(); } /// Should we continue evaluation as much as possible after encountering a /// construct which can't be reduced to a value? bool keepEvaluatingAfterFailure() { if (!StepsLeft) return false; switch (EvalMode) { case EM_PotentialConstantExpression: case EM_PotentialConstantExpressionUnevaluated: case EM_EvaluateForOverflow: return true; case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: case EM_IgnoreSideEffects: case EM_OffsetFold: return false; } llvm_unreachable("Missed EvalMode case"); } /// Notes that we failed to evaluate an expression that other expressions /// directly depend on, and determine if we should keep evaluating. This /// should only be called if we actually intend to keep evaluating. /// /// Call noteSideEffect() instead if we may be able to ignore the value that /// we failed to evaluate, e.g. if we failed to evaluate Foo() in: /// /// (Foo(), 1) // use noteSideEffect /// (Foo() || true) // use noteSideEffect /// Foo() + 1 // use noteFailure LLVM_NODISCARD bool noteFailure() { // Failure when evaluating some expression often means there is some // subexpression whose evaluation was skipped. Therefore, (because we // don't track whether we skipped an expression when unwinding after an // evaluation failure) every evaluation failure that bubbles up from a // subexpression implies that a side-effect has potentially happened. We // skip setting the HasSideEffects flag to true until we decide to // continue evaluating after that point, which happens here. bool KeepGoing = keepEvaluatingAfterFailure(); EvalStatus.HasSideEffects |= KeepGoing; return KeepGoing; } - bool allowInvalidBaseExpr() const { - return EvalMode == EM_OffsetFold; - } - class ArrayInitLoopIndex { EvalInfo &Info; uint64_t OuterIndex; public: ArrayInitLoopIndex(EvalInfo &Info) : Info(Info), OuterIndex(Info.ArrayInitIndex) { Info.ArrayInitIndex = 0; } ~ArrayInitLoopIndex() { Info.ArrayInitIndex = OuterIndex; } operator uint64_t&() { return Info.ArrayInitIndex; } }; }; /// Object used to treat all foldable expressions as constant expressions. struct FoldConstant { EvalInfo &Info; bool Enabled; bool HadNoPriorDiags; EvalInfo::EvaluationMode OldMode; explicit FoldConstant(EvalInfo &Info, bool Enabled) : Info(Info), Enabled(Enabled), HadNoPriorDiags(Info.EvalStatus.Diag && Info.EvalStatus.Diag->empty() && !Info.EvalStatus.HasSideEffects), OldMode(Info.EvalMode) { if (Enabled && (Info.EvalMode == EvalInfo::EM_ConstantExpression || Info.EvalMode == EvalInfo::EM_ConstantExpressionUnevaluated)) Info.EvalMode = EvalInfo::EM_ConstantFold; } void keepDiagnostics() { Enabled = false; } ~FoldConstant() { if (Enabled && HadNoPriorDiags && !Info.EvalStatus.Diag->empty() && !Info.EvalStatus.HasSideEffects) Info.EvalStatus.Diag->clear(); Info.EvalMode = OldMode; } }; /// RAII object used to treat the current evaluation as the correct pointer /// offset fold for the current EvalMode struct FoldOffsetRAII { EvalInfo &Info; EvalInfo::EvaluationMode OldMode; explicit FoldOffsetRAII(EvalInfo &Info) : Info(Info), OldMode(Info.EvalMode) { if (!Info.checkingPotentialConstantExpression()) Info.EvalMode = EvalInfo::EM_OffsetFold; } ~FoldOffsetRAII() { Info.EvalMode = OldMode; } }; /// RAII object used to optionally suppress diagnostics and side-effects from /// a speculative evaluation. class SpeculativeEvaluationRAII { /// Pair of EvalInfo, and a bit that stores whether or not we were /// speculatively evaluating when we created this RAII. llvm::PointerIntPair InfoAndOldSpecEval; Expr::EvalStatus Old; void moveFromAndCancel(SpeculativeEvaluationRAII &&Other) { InfoAndOldSpecEval = Other.InfoAndOldSpecEval; Old = Other.Old; Other.InfoAndOldSpecEval.setPointer(nullptr); } void maybeRestoreState() { EvalInfo *Info = InfoAndOldSpecEval.getPointer(); if (!Info) return; Info->EvalStatus = Old; Info->IsSpeculativelyEvaluating = InfoAndOldSpecEval.getInt(); } public: SpeculativeEvaluationRAII() = default; SpeculativeEvaluationRAII( EvalInfo &Info, SmallVectorImpl *NewDiag = nullptr) : InfoAndOldSpecEval(&Info, Info.IsSpeculativelyEvaluating), Old(Info.EvalStatus) { Info.EvalStatus.Diag = NewDiag; Info.IsSpeculativelyEvaluating = true; } SpeculativeEvaluationRAII(const SpeculativeEvaluationRAII &Other) = delete; SpeculativeEvaluationRAII(SpeculativeEvaluationRAII &&Other) { moveFromAndCancel(std::move(Other)); } SpeculativeEvaluationRAII &operator=(SpeculativeEvaluationRAII &&Other) { maybeRestoreState(); moveFromAndCancel(std::move(Other)); return *this; } ~SpeculativeEvaluationRAII() { maybeRestoreState(); } }; /// RAII object wrapping a full-expression or block scope, and handling /// the ending of the lifetime of temporaries created within it. template class ScopeRAII { EvalInfo &Info; unsigned OldStackSize; public: ScopeRAII(EvalInfo &Info) : Info(Info), OldStackSize(Info.CleanupStack.size()) {} ~ScopeRAII() { // Body moved to a static method to encourage the compiler to inline away // instances of this class. cleanup(Info, OldStackSize); } private: static void cleanup(EvalInfo &Info, unsigned OldStackSize) { unsigned NewEnd = OldStackSize; for (unsigned I = OldStackSize, N = Info.CleanupStack.size(); I != N; ++I) { if (IsFullExpression && Info.CleanupStack[I].isLifetimeExtended()) { // Full-expression cleanup of a lifetime-extended temporary: nothing // to do, just move this cleanup to the right place in the stack. std::swap(Info.CleanupStack[I], Info.CleanupStack[NewEnd]); ++NewEnd; } else { // End the lifetime of the object. Info.CleanupStack[I].endLifetime(); } } Info.CleanupStack.erase(Info.CleanupStack.begin() + NewEnd, Info.CleanupStack.end()); } }; typedef ScopeRAII BlockScopeRAII; typedef ScopeRAII FullExpressionRAII; } bool SubobjectDesignator::checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { if (Invalid) return false; if (isOnePastTheEnd()) { Info.CCEDiag(E, diag::note_constexpr_past_end_subobject) << CSK; setInvalid(); return false; } return true; } void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, uint64_t N) { // If we're complaining, we must be able to statically determine the size of // the most derived array. if (MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement) Info.CCEDiag(E, diag::note_constexpr_array_index) << static_cast(N) << /*array*/ 0 << static_cast(getMostDerivedArraySize()); else Info.CCEDiag(E, diag::note_constexpr_array_index) << static_cast(N) << /*non-array*/ 1; setInvalid(); } CallStackFrame::CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments) : Info(Info), Caller(Info.CurrentCall), Callee(Callee), This(This), Arguments(Arguments), CallLoc(CallLoc), Index(Info.NextCallIndex++) { Info.CurrentCall = this; ++Info.CallStackDepth; } CallStackFrame::~CallStackFrame() { assert(Info.CurrentCall == this && "calls retired out of order"); --Info.CallStackDepth; Info.CurrentCall = Caller; } APValue &CallStackFrame::createTemporary(const void *Key, bool IsLifetimeExtended) { APValue &Result = Temporaries[Key]; assert(Result.isUninit() && "temporary created multiple times"); Info.CleanupStack.push_back(Cleanup(&Result, IsLifetimeExtended)); return Result; } static void describeCall(CallStackFrame *Frame, raw_ostream &Out); void EvalInfo::addCallStack(unsigned Limit) { // Determine which calls to skip, if any. unsigned ActiveCalls = CallStackDepth - 1; unsigned SkipStart = ActiveCalls, SkipEnd = SkipStart; if (Limit && Limit < ActiveCalls) { SkipStart = Limit / 2 + Limit % 2; SkipEnd = ActiveCalls - Limit / 2; } // Walk the call stack and add the diagnostics. unsigned CallIdx = 0; for (CallStackFrame *Frame = CurrentCall; Frame != &BottomFrame; Frame = Frame->Caller, ++CallIdx) { // Skip this call? if (CallIdx >= SkipStart && CallIdx < SkipEnd) { if (CallIdx == SkipStart) { // Note that we're skipping calls. addDiag(Frame->CallLoc, diag::note_constexpr_calls_suppressed) << unsigned(ActiveCalls - Limit); } continue; } // Use a different note for an inheriting constructor, because from the // user's perspective it's not really a function at all. if (auto *CD = dyn_cast_or_null(Frame->Callee)) { if (CD->isInheritingConstructor()) { addDiag(Frame->CallLoc, diag::note_constexpr_inherited_ctor_call_here) << CD->getParent(); continue; } } SmallVector Buffer; llvm::raw_svector_ostream Out(Buffer); describeCall(Frame, Out); addDiag(Frame->CallLoc, diag::note_constexpr_call_here) << Out.str(); } } namespace { struct ComplexValue { private: bool IsInt; public: APSInt IntReal, IntImag; APFloat FloatReal, FloatImag; ComplexValue() : FloatReal(APFloat::Bogus()), FloatImag(APFloat::Bogus()) {} void makeComplexFloat() { IsInt = false; } bool isComplexFloat() const { return !IsInt; } APFloat &getComplexFloatReal() { return FloatReal; } APFloat &getComplexFloatImag() { return FloatImag; } void makeComplexInt() { IsInt = true; } bool isComplexInt() const { return IsInt; } APSInt &getComplexIntReal() { return IntReal; } APSInt &getComplexIntImag() { return IntImag; } void moveInto(APValue &v) const { if (isComplexFloat()) v = APValue(FloatReal, FloatImag); else v = APValue(IntReal, IntImag); } void setFrom(const APValue &v) { assert(v.isComplexFloat() || v.isComplexInt()); if (v.isComplexFloat()) { makeComplexFloat(); FloatReal = v.getComplexFloatReal(); FloatImag = v.getComplexFloatImag(); } else { makeComplexInt(); IntReal = v.getComplexIntReal(); IntImag = v.getComplexIntImag(); } } }; struct LValue { APValue::LValueBase Base; CharUnits Offset; unsigned InvalidBase : 1; unsigned CallIndex : 31; SubobjectDesignator Designator; bool IsNullPtr; const APValue::LValueBase getLValueBase() const { return Base; } CharUnits &getLValueOffset() { return Offset; } const CharUnits &getLValueOffset() const { return Offset; } unsigned getLValueCallIndex() const { return CallIndex; } SubobjectDesignator &getLValueDesignator() { return Designator; } const SubobjectDesignator &getLValueDesignator() const { return Designator;} bool isNullPointer() const { return IsNullPtr;} void moveInto(APValue &V) const { if (Designator.Invalid) V = APValue(Base, Offset, APValue::NoLValuePath(), CallIndex, IsNullPtr); else { assert(!InvalidBase && "APValues can't handle invalid LValue bases"); assert(!Designator.FirstEntryIsAnUnsizedArray && "Unsized array with a valid base?"); V = APValue(Base, Offset, Designator.Entries, Designator.IsOnePastTheEnd, CallIndex, IsNullPtr); } } void setFrom(ASTContext &Ctx, const APValue &V) { assert(V.isLValue() && "Setting LValue from a non-LValue?"); Base = V.getLValueBase(); Offset = V.getLValueOffset(); InvalidBase = false; CallIndex = V.getLValueCallIndex(); Designator = SubobjectDesignator(Ctx, V); IsNullPtr = V.isNullPointer(); } void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false, bool IsNullPtr_ = false, uint64_t Offset_ = 0) { #ifndef NDEBUG // We only allow a few types of invalid bases. Enforce that here. if (BInvalid) { const auto *E = B.get(); assert((isa(E) || tryUnwrapAllocSizeCall(E)) && "Unexpected type of invalid base"); } #endif Base = B; Offset = CharUnits::fromQuantity(Offset_); InvalidBase = BInvalid; CallIndex = I; Designator = SubobjectDesignator(getType(B)); IsNullPtr = IsNullPtr_; } void setInvalid(APValue::LValueBase B, unsigned I = 0) { set(B, I, true); } // Check that this LValue is not based on a null pointer. If it is, produce // a diagnostic and mark the designator as invalid. bool checkNullPointer(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { if (Designator.Invalid) return false; if (IsNullPtr) { Info.CCEDiag(E, diag::note_constexpr_null_subobject) << CSK; Designator.setInvalid(); return false; } return true; } // Check this LValue refers to an object. If not, set the designator to be // invalid and emit a diagnostic. bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { return (CSK == CSK_ArrayToPointer || checkNullPointer(Info, E, CSK)) && Designator.checkSubobject(Info, E, CSK); } void addDecl(EvalInfo &Info, const Expr *E, const Decl *D, bool Virtual = false) { if (checkSubobject(Info, E, isa(D) ? CSK_Field : CSK_Base)) Designator.addDeclUnchecked(D, Virtual); } void addUnsizedArray(EvalInfo &Info, QualType ElemTy) { assert(Designator.Entries.empty() && getType(Base)->isPointerType()); assert(isBaseAnAllocSizeCall(Base) && "Only alloc_size bases can have unsized arrays"); Designator.FirstEntryIsAnUnsizedArray = true; Designator.addUnsizedArrayUnchecked(ElemTy); } void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) { if (checkSubobject(Info, E, CSK_ArrayToPointer)) Designator.addArrayUnchecked(CAT); } void addComplex(EvalInfo &Info, const Expr *E, QualType EltTy, bool Imag) { if (checkSubobject(Info, E, Imag ? CSK_Imag : CSK_Real)) Designator.addComplexUnchecked(EltTy, Imag); } void clearIsNullPointer() { IsNullPtr = false; } void adjustOffsetAndIndex(EvalInfo &Info, const Expr *E, uint64_t Index, CharUnits ElementSize) { // Compute the new offset in the appropriate width. Offset += Index * ElementSize; if (Index && checkNullPointer(Info, E, CSK_ArrayIndex)) Designator.adjustIndex(Info, E, Index); if (Index) clearIsNullPointer(); } void adjustOffset(CharUnits N) { Offset += N; if (N.getQuantity()) clearIsNullPointer(); } }; struct MemberPtr { MemberPtr() {} explicit MemberPtr(const ValueDecl *Decl) : DeclAndIsDerivedMember(Decl, false), Path() {} /// The member or (direct or indirect) field referred to by this member /// pointer, or 0 if this is a null member pointer. const ValueDecl *getDecl() const { return DeclAndIsDerivedMember.getPointer(); } /// Is this actually a member of some type derived from the relevant class? bool isDerivedMember() const { return DeclAndIsDerivedMember.getInt(); } /// Get the class which the declaration actually lives in. const CXXRecordDecl *getContainingRecord() const { return cast( DeclAndIsDerivedMember.getPointer()->getDeclContext()); } void moveInto(APValue &V) const { V = APValue(getDecl(), isDerivedMember(), Path); } void setFrom(const APValue &V) { assert(V.isMemberPointer()); DeclAndIsDerivedMember.setPointer(V.getMemberPointerDecl()); DeclAndIsDerivedMember.setInt(V.isMemberPointerToDerivedMember()); Path.clear(); ArrayRef P = V.getMemberPointerPath(); Path.insert(Path.end(), P.begin(), P.end()); } /// DeclAndIsDerivedMember - The member declaration, and a flag indicating /// whether the member is a member of some class derived from the class type /// of the member pointer. llvm::PointerIntPair DeclAndIsDerivedMember; /// Path - The path of base/derived classes from the member declaration's /// class (exclusive) to the class type of the member pointer (inclusive). SmallVector Path; /// Perform a cast towards the class of the Decl (either up or down the /// hierarchy). bool castBack(const CXXRecordDecl *Class) { assert(!Path.empty()); const CXXRecordDecl *Expected; if (Path.size() >= 2) Expected = Path[Path.size() - 2]; else Expected = getContainingRecord(); if (Expected->getCanonicalDecl() != Class->getCanonicalDecl()) { // C++11 [expr.static.cast]p12: In a conversion from (D::*) to (B::*), // if B does not contain the original member and is not a base or // derived class of the class containing the original member, the result // of the cast is undefined. // C++11 [conv.mem]p2 does not cover this case for a cast from (B::*) to // (D::*). We consider that to be a language defect. return false; } Path.pop_back(); return true; } /// Perform a base-to-derived member pointer cast. bool castToDerived(const CXXRecordDecl *Derived) { if (!getDecl()) return true; if (!isDerivedMember()) { Path.push_back(Derived); return true; } if (!castBack(Derived)) return false; if (Path.empty()) DeclAndIsDerivedMember.setInt(false); return true; } /// Perform a derived-to-base member pointer cast. bool castToBase(const CXXRecordDecl *Base) { if (!getDecl()) return true; if (Path.empty()) DeclAndIsDerivedMember.setInt(true); if (isDerivedMember()) { Path.push_back(Base); return true; } return castBack(Base); } }; /// Compare two member pointers, which are assumed to be of the same type. static bool operator==(const MemberPtr &LHS, const MemberPtr &RHS) { if (!LHS.getDecl() || !RHS.getDecl()) return !LHS.getDecl() && !RHS.getDecl(); if (LHS.getDecl()->getCanonicalDecl() != RHS.getDecl()->getCanonicalDecl()) return false; return LHS.Path == RHS.Path; } } static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E); static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, const Expr *E, bool AllowNonLiteralTypes = false); -static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info); -static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info); +static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info, + bool InvalidBaseOK = false); +static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info, + bool InvalidBaseOK = false); static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result, EvalInfo &Info); static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info); static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info); static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result, EvalInfo &Info); static bool EvaluateFloat(const Expr *E, APFloat &Result, EvalInfo &Info); static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info); static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info); static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); //===----------------------------------------------------------------------===// // Misc utilities //===----------------------------------------------------------------------===// /// Produce a string describing the given constexpr call. static void describeCall(CallStackFrame *Frame, raw_ostream &Out) { unsigned ArgIndex = 0; bool IsMemberCall = isa(Frame->Callee) && !isa(Frame->Callee) && cast(Frame->Callee)->isInstance(); if (!IsMemberCall) Out << *Frame->Callee << '('; if (Frame->This && IsMemberCall) { APValue Val; Frame->This->moveInto(Val); Val.printPretty(Out, Frame->Info.Ctx, Frame->This->Designator.MostDerivedType); // FIXME: Add parens around Val if needed. Out << "->" << *Frame->Callee << '('; IsMemberCall = false; } for (FunctionDecl::param_const_iterator I = Frame->Callee->param_begin(), E = Frame->Callee->param_end(); I != E; ++I, ++ArgIndex) { if (ArgIndex > (unsigned)IsMemberCall) Out << ", "; const ParmVarDecl *Param = *I; const APValue &Arg = Frame->Arguments[ArgIndex]; Arg.printPretty(Out, Frame->Info.Ctx, Param->getType()); if (ArgIndex == 0 && IsMemberCall) Out << "->" << *Frame->Callee << '('; } Out << ')'; } /// Evaluate an expression to see if it had side-effects, and discard its /// result. /// \return \c true if the caller should keep evaluating. static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) { APValue Scratch; if (!Evaluate(Scratch, Info, E)) // We don't need the value, but we might have skipped a side effect here. return Info.noteSideEffect(); return true; } /// Sign- or zero-extend a value to 64 bits. If it's already 64 bits, just /// return its existing value. static int64_t getExtValue(const APSInt &Value) { return Value.isSigned() ? Value.getSExtValue() : static_cast(Value.getZExtValue()); } /// Should this call expression be treated as a string literal? static bool IsStringLiteralCall(const CallExpr *E) { unsigned Builtin = E->getBuiltinCallee(); return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString || Builtin == Builtin::BI__builtin___NSStringMakeConstantString); } static bool IsGlobalLValue(APValue::LValueBase B) { // C++11 [expr.const]p3 An address constant expression is a prvalue core // constant expression of pointer type that evaluates to... // ... a null pointer value, or a prvalue core constant expression of type // std::nullptr_t. if (!B) return true; if (const ValueDecl *D = B.dyn_cast()) { // ... the address of an object with static storage duration, if (const VarDecl *VD = dyn_cast(D)) return VD->hasGlobalStorage(); // ... the address of a function, return isa(D); } const Expr *E = B.get(); switch (E->getStmtClass()) { default: return false; case Expr::CompoundLiteralExprClass: { const CompoundLiteralExpr *CLE = cast(E); return CLE->isFileScope() && CLE->isLValue(); } case Expr::MaterializeTemporaryExprClass: // A materialized temporary might have been lifetime-extended to static // storage duration. return cast(E)->getStorageDuration() == SD_Static; // A string literal has static storage duration. case Expr::StringLiteralClass: case Expr::PredefinedExprClass: case Expr::ObjCStringLiteralClass: case Expr::ObjCEncodeExprClass: case Expr::CXXTypeidExprClass: case Expr::CXXUuidofExprClass: return true; case Expr::CallExprClass: return IsStringLiteralCall(cast(E)); // For GCC compatibility, &&label has static storage duration. case Expr::AddrLabelExprClass: return true; // A Block literal expression may be used as the initialization value for // Block variables at global or local static scope. case Expr::BlockExprClass: return !cast(E)->getBlockDecl()->hasCaptures(); case Expr::ImplicitValueInitExprClass: // FIXME: // We can never form an lvalue with an implicit value initialization as its // base through expression evaluation, so these only appear in one case: the // implicit variable declaration we invent when checking whether a constexpr // constructor can produce a constant expression. We must assume that such // an expression might be a global lvalue. return true; } } static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) { assert(Base && "no location for a null lvalue"); const ValueDecl *VD = Base.dyn_cast(); if (VD) Info.Note(VD->getLocation(), diag::note_declared_at); else Info.Note(Base.get()->getExprLoc(), diag::note_constexpr_temporary_here); } /// Check that this reference or pointer core constant expression is a valid /// value for an address or reference constant expression. Return true if we /// can fold this expression, whether or not it's a constant expression. static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, QualType Type, const LValue &LVal) { bool IsReferenceType = Type->isReferenceType(); APValue::LValueBase Base = LVal.getLValueBase(); const SubobjectDesignator &Designator = LVal.getLValueDesignator(); // Check that the object is a global. Note that the fake 'this' object we // manufacture when checking potential constant expressions is conservatively // assumed to be global here. if (!IsGlobalLValue(Base)) { if (Info.getLangOpts().CPlusPlus11) { const ValueDecl *VD = Base.dyn_cast(); Info.FFDiag(Loc, diag::note_constexpr_non_global, 1) << IsReferenceType << !Designator.Entries.empty() << !!VD << VD; NoteLValueLocation(Info, Base); } else { Info.FFDiag(Loc); } // Don't allow references to temporaries to escape. return false; } assert((Info.checkingPotentialConstantExpression() || LVal.getLValueCallIndex() == 0) && "have call index for global lvalue"); if (const ValueDecl *VD = Base.dyn_cast()) { if (const VarDecl *Var = dyn_cast(VD)) { // Check if this is a thread-local variable. if (Var->getTLSKind()) return false; // A dllimport variable never acts like a constant. if (Var->hasAttr()) return false; } if (const auto *FD = dyn_cast(VD)) { // __declspec(dllimport) must be handled very carefully: // We must never initialize an expression with the thunk in C++. // Doing otherwise would allow the same id-expression to yield // different addresses for the same function in different translation // units. However, this means that we must dynamically initialize the // expression with the contents of the import address table at runtime. // // The C language has no notion of ODR; furthermore, it has no notion of // dynamic initialization. This means that we are permitted to // perform initialization with the address of the thunk. if (Info.getLangOpts().CPlusPlus && FD->hasAttr()) return false; } } // Allow address constant expressions to be past-the-end pointers. This is // an extension: the standard requires them to point to an object. if (!IsReferenceType) return true; // A reference constant expression must refer to an object. if (!Base) { // FIXME: diagnostic Info.CCEDiag(Loc); return true; } // Does this refer one past the end of some object? if (!Designator.Invalid && Designator.isOnePastTheEnd()) { const ValueDecl *VD = Base.dyn_cast(); Info.FFDiag(Loc, diag::note_constexpr_past_end, 1) << !Designator.Entries.empty() << !!VD << VD; NoteLValueLocation(Info, Base); } return true; } /// Check that this core constant expression is of literal type, and if not, /// produce an appropriate diagnostic. static bool CheckLiteralType(EvalInfo &Info, const Expr *E, const LValue *This = nullptr) { if (!E->isRValue() || E->getType()->isLiteralType(Info.Ctx)) return true; // C++1y: A constant initializer for an object o [...] may also invoke // constexpr constructors for o and its subobjects even if those objects // are of non-literal class types. // // C++11 missed this detail for aggregates, so classes like this: // struct foo_t { union { int i; volatile int j; } u; }; // are not (obviously) initializable like so: // __attribute__((__require_constant_initialization__)) // static const foo_t x = {{0}}; // because "i" is a subobject with non-literal initialization (due to the // volatile member of the union). See: // http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1677 // Therefore, we use the C++1y behavior. if (This && Info.EvaluatingDecl == This->getLValueBase()) return true; // Prvalue constant expressions must be of literal types. if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_nonliteral) << E->getType(); else Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } /// Check that this core constant expression value is a valid value for a /// constant expression. If not, report an appropriate diagnostic. Does not /// check that the expression is of literal type. static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, const APValue &Value) { if (Value.isUninit()) { Info.FFDiag(DiagLoc, diag::note_constexpr_uninitialized) << true << Type; return false; } // We allow _Atomic(T) to be initialized from anything that T can be // initialized from. if (const AtomicType *AT = Type->getAs()) Type = AT->getValueType(); // Core issue 1454: For a literal constant expression of array or class type, // each subobject of its value shall have been initialized by a constant // expression. if (Value.isArray()) { QualType EltTy = Type->castAsArrayTypeUnsafe()->getElementType(); for (unsigned I = 0, N = Value.getArrayInitializedElts(); I != N; ++I) { if (!CheckConstantExpression(Info, DiagLoc, EltTy, Value.getArrayInitializedElt(I))) return false; } if (!Value.hasArrayFiller()) return true; return CheckConstantExpression(Info, DiagLoc, EltTy, Value.getArrayFiller()); } if (Value.isUnion() && Value.getUnionField()) { return CheckConstantExpression(Info, DiagLoc, Value.getUnionField()->getType(), Value.getUnionValue()); } if (Value.isStruct()) { RecordDecl *RD = Type->castAs()->getDecl(); if (const CXXRecordDecl *CD = dyn_cast(RD)) { unsigned BaseIndex = 0; for (CXXRecordDecl::base_class_const_iterator I = CD->bases_begin(), End = CD->bases_end(); I != End; ++I, ++BaseIndex) { if (!CheckConstantExpression(Info, DiagLoc, I->getType(), Value.getStructBase(BaseIndex))) return false; } } for (const auto *I : RD->fields()) { if (!CheckConstantExpression(Info, DiagLoc, I->getType(), Value.getStructField(I->getFieldIndex()))) return false; } } if (Value.isLValue()) { LValue LVal; LVal.setFrom(Info.Ctx, Value); return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal); } // Everything else is fine. return true; } static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) { return LVal.Base.dyn_cast(); } static bool IsLiteralLValue(const LValue &Value) { if (Value.CallIndex) return false; const Expr *E = Value.Base.dyn_cast(); return E && !isa(E); } static bool IsWeakLValue(const LValue &Value) { const ValueDecl *Decl = GetLValueBaseDecl(Value); return Decl && Decl->isWeak(); } static bool isZeroSized(const LValue &Value) { const ValueDecl *Decl = GetLValueBaseDecl(Value); if (Decl && isa(Decl)) { QualType Ty = Decl->getType(); if (Ty->isArrayType()) return Ty->isIncompleteType() || Decl->getASTContext().getTypeSize(Ty) == 0; } return false; } static bool EvalPointerValueAsBool(const APValue &Value, bool &Result) { // A null base expression indicates a null pointer. These are always // evaluatable, and they are false unless the offset is zero. if (!Value.getLValueBase()) { Result = !Value.getLValueOffset().isZero(); return true; } // We have a non-null base. These are generally known to be true, but if it's // a weak declaration it can be null at runtime. Result = true; const ValueDecl *Decl = Value.getLValueBase().dyn_cast(); return !Decl || !Decl->isWeak(); } static bool HandleConversionToBool(const APValue &Val, bool &Result) { switch (Val.getKind()) { case APValue::Uninitialized: return false; case APValue::Int: Result = Val.getInt().getBoolValue(); return true; case APValue::Float: Result = !Val.getFloat().isZero(); return true; case APValue::ComplexInt: Result = Val.getComplexIntReal().getBoolValue() || Val.getComplexIntImag().getBoolValue(); return true; case APValue::ComplexFloat: Result = !Val.getComplexFloatReal().isZero() || !Val.getComplexFloatImag().isZero(); return true; case APValue::LValue: return EvalPointerValueAsBool(Val, Result); case APValue::MemberPointer: Result = Val.getMemberPointerDecl(); return true; case APValue::Vector: case APValue::Array: case APValue::Struct: case APValue::Union: case APValue::AddrLabelDiff: return false; } llvm_unreachable("unknown APValue kind"); } static bool EvaluateAsBooleanCondition(const Expr *E, bool &Result, EvalInfo &Info) { assert(E->isRValue() && "missing lvalue-to-rvalue conv in bool condition"); APValue Val; if (!Evaluate(Val, Info, E)) return false; return HandleConversionToBool(Val, Result); } template static bool HandleOverflow(EvalInfo &Info, const Expr *E, const T &SrcValue, QualType DestType) { Info.CCEDiag(E, diag::note_constexpr_overflow) << SrcValue << DestType; return Info.noteUndefinedBehavior(); } static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E, QualType SrcType, const APFloat &Value, QualType DestType, APSInt &Result) { unsigned DestWidth = Info.Ctx.getIntWidth(DestType); // Determine whether we are converting to unsigned or signed. bool DestSigned = DestType->isSignedIntegerOrEnumerationType(); Result = APSInt(DestWidth, !DestSigned); bool ignored; if (Value.convertToInteger(Result, llvm::APFloat::rmTowardZero, &ignored) & APFloat::opInvalidOp) return HandleOverflow(Info, E, Value, DestType); return true; } static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E, QualType SrcType, QualType DestType, APFloat &Result) { APFloat Value = Result; bool ignored; if (Result.convert(Info.Ctx.getFloatTypeSemantics(DestType), APFloat::rmNearestTiesToEven, &ignored) & APFloat::opOverflow) return HandleOverflow(Info, E, Value, DestType); return true; } static APSInt HandleIntToIntCast(EvalInfo &Info, const Expr *E, QualType DestType, QualType SrcType, const APSInt &Value) { unsigned DestWidth = Info.Ctx.getIntWidth(DestType); APSInt Result = Value; // Figure out if this is a truncate, extend or noop cast. // If the input is signed, do a sign extend, noop, or truncate. Result = Result.extOrTrunc(DestWidth); Result.setIsUnsigned(DestType->isUnsignedIntegerOrEnumerationType()); return Result; } static bool HandleIntToFloatCast(EvalInfo &Info, const Expr *E, QualType SrcType, const APSInt &Value, QualType DestType, APFloat &Result) { Result = APFloat(Info.Ctx.getFloatTypeSemantics(DestType), 1); if (Result.convertFromAPInt(Value, Value.isSigned(), APFloat::rmNearestTiesToEven) & APFloat::opOverflow) return HandleOverflow(Info, E, Value, DestType); return true; } static bool truncateBitfieldValue(EvalInfo &Info, const Expr *E, APValue &Value, const FieldDecl *FD) { assert(FD->isBitField() && "truncateBitfieldValue on non-bitfield"); if (!Value.isInt()) { // Trying to store a pointer-cast-to-integer into a bitfield. // FIXME: In this case, we should provide the diagnostic for casting // a pointer to an integer. assert(Value.isLValue() && "integral value neither int nor lvalue?"); Info.FFDiag(E); return false; } APSInt &Int = Value.getInt(); unsigned OldBitWidth = Int.getBitWidth(); unsigned NewBitWidth = FD->getBitWidthValue(Info.Ctx); if (NewBitWidth < OldBitWidth) Int = Int.trunc(NewBitWidth).extend(OldBitWidth); return true; } static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E, llvm::APInt &Res) { APValue SVal; if (!Evaluate(SVal, Info, E)) return false; if (SVal.isInt()) { Res = SVal.getInt(); return true; } if (SVal.isFloat()) { Res = SVal.getFloat().bitcastToAPInt(); return true; } if (SVal.isVector()) { QualType VecTy = E->getType(); unsigned VecSize = Info.Ctx.getTypeSize(VecTy); QualType EltTy = VecTy->castAs()->getElementType(); unsigned EltSize = Info.Ctx.getTypeSize(EltTy); bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); Res = llvm::APInt::getNullValue(VecSize); for (unsigned i = 0; i < SVal.getVectorLength(); i++) { APValue &Elt = SVal.getVectorElt(i); llvm::APInt EltAsInt; if (Elt.isInt()) { EltAsInt = Elt.getInt(); } else if (Elt.isFloat()) { EltAsInt = Elt.getFloat().bitcastToAPInt(); } else { // Don't try to handle vectors of anything other than int or float // (not sure if it's possible to hit this case). Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } unsigned BaseEltSize = EltAsInt.getBitWidth(); if (BigEndian) Res |= EltAsInt.zextOrTrunc(VecSize).rotr(i*EltSize+BaseEltSize); else Res |= EltAsInt.zextOrTrunc(VecSize).rotl(i*EltSize); } return true; } // Give up if the input isn't an int, float, or vector. For example, we // reject "(v4i16)(intptr_t)&a". Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } /// Perform the given integer operation, which is known to need at most BitWidth /// bits, and check for overflow in the original type (if that type was not an /// unsigned type). template static bool CheckedIntArithmetic(EvalInfo &Info, const Expr *E, const APSInt &LHS, const APSInt &RHS, unsigned BitWidth, Operation Op, APSInt &Result) { if (LHS.isUnsigned()) { Result = Op(LHS, RHS); return true; } APSInt Value(Op(LHS.extend(BitWidth), RHS.extend(BitWidth)), false); Result = Value.trunc(LHS.getBitWidth()); if (Result.extend(BitWidth) != Value) { if (Info.checkingForOverflow()) Info.Ctx.getDiagnostics().Report(E->getExprLoc(), diag::warn_integer_constant_overflow) << Result.toString(10) << E->getType(); else return HandleOverflow(Info, E, Value, E->getType()); } return true; } /// Perform the given binary integer operation. static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS, BinaryOperatorKind Opcode, APSInt RHS, APSInt &Result) { switch (Opcode) { default: Info.FFDiag(E); return false; case BO_Mul: return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() * 2, std::multiplies(), Result); case BO_Add: return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1, std::plus(), Result); case BO_Sub: return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1, std::minus(), Result); case BO_And: Result = LHS & RHS; return true; case BO_Xor: Result = LHS ^ RHS; return true; case BO_Or: Result = LHS | RHS; return true; case BO_Div: case BO_Rem: if (RHS == 0) { Info.FFDiag(E, diag::note_expr_divide_by_zero); return false; } Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS); // Check for overflow case: INT_MIN / -1 or INT_MIN % -1. APSInt supports // this operation and gives the two's complement result. if (RHS.isNegative() && RHS.isAllOnesValue() && LHS.isSigned() && LHS.isMinSignedValue()) return HandleOverflow(Info, E, -LHS.extend(LHS.getBitWidth() + 1), E->getType()); return true; case BO_Shl: { if (Info.getLangOpts().OpenCL) // OpenCL 6.3j: shift values are effectively % word size of LHS. RHS &= APSInt(llvm::APInt(RHS.getBitWidth(), static_cast(LHS.getBitWidth() - 1)), RHS.isUnsigned()); else if (RHS.isSigned() && RHS.isNegative()) { // During constant-folding, a negative shift is an opposite shift. Such // a shift is not a constant expression. Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS; RHS = -RHS; goto shift_right; } shift_left: // C++11 [expr.shift]p1: Shift width must be less than the bit width of // the shifted type. unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1); if (SA != RHS) { Info.CCEDiag(E, diag::note_constexpr_large_shift) << RHS << E->getType() << LHS.getBitWidth(); } else if (LHS.isSigned()) { // C++11 [expr.shift]p2: A signed left shift must have a non-negative // operand, and must not overflow the corresponding unsigned type. if (LHS.isNegative()) Info.CCEDiag(E, diag::note_constexpr_lshift_of_negative) << LHS; else if (LHS.countLeadingZeros() < SA) Info.CCEDiag(E, diag::note_constexpr_lshift_discards); } Result = LHS << SA; return true; } case BO_Shr: { if (Info.getLangOpts().OpenCL) // OpenCL 6.3j: shift values are effectively % word size of LHS. RHS &= APSInt(llvm::APInt(RHS.getBitWidth(), static_cast(LHS.getBitWidth() - 1)), RHS.isUnsigned()); else if (RHS.isSigned() && RHS.isNegative()) { // During constant-folding, a negative shift is an opposite shift. Such a // shift is not a constant expression. Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS; RHS = -RHS; goto shift_left; } shift_right: // C++11 [expr.shift]p1: Shift width must be less than the bit width of the // shifted type. unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1); if (SA != RHS) Info.CCEDiag(E, diag::note_constexpr_large_shift) << RHS << E->getType() << LHS.getBitWidth(); Result = LHS >> SA; return true; } case BO_LT: Result = LHS < RHS; return true; case BO_GT: Result = LHS > RHS; return true; case BO_LE: Result = LHS <= RHS; return true; case BO_GE: Result = LHS >= RHS; return true; case BO_EQ: Result = LHS == RHS; return true; case BO_NE: Result = LHS != RHS; return true; } } /// Perform the given binary floating-point operation, in-place, on LHS. static bool handleFloatFloatBinOp(EvalInfo &Info, const Expr *E, APFloat &LHS, BinaryOperatorKind Opcode, const APFloat &RHS) { switch (Opcode) { default: Info.FFDiag(E); return false; case BO_Mul: LHS.multiply(RHS, APFloat::rmNearestTiesToEven); break; case BO_Add: LHS.add(RHS, APFloat::rmNearestTiesToEven); break; case BO_Sub: LHS.subtract(RHS, APFloat::rmNearestTiesToEven); break; case BO_Div: LHS.divide(RHS, APFloat::rmNearestTiesToEven); break; } if (LHS.isInfinity() || LHS.isNaN()) { Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << LHS.isNaN(); return Info.noteUndefinedBehavior(); } return true; } /// Cast an lvalue referring to a base subobject to a derived class, by /// truncating the lvalue's path to the given length. static bool CastToDerivedClass(EvalInfo &Info, const Expr *E, LValue &Result, const RecordDecl *TruncatedType, unsigned TruncatedElements) { SubobjectDesignator &D = Result.Designator; // Check we actually point to a derived class object. if (TruncatedElements == D.Entries.size()) return true; assert(TruncatedElements >= D.MostDerivedPathLength && "not casting to a derived class"); if (!Result.checkSubobject(Info, E, CSK_Derived)) return false; // Truncate the path to the subobject, and remove any derived-to-base offsets. const RecordDecl *RD = TruncatedType; for (unsigned I = TruncatedElements, N = D.Entries.size(); I != N; ++I) { if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); const CXXRecordDecl *Base = getAsBaseClass(D.Entries[I]); if (isVirtualBaseClass(D.Entries[I])) Result.Offset -= Layout.getVBaseClassOffset(Base); else Result.Offset -= Layout.getBaseClassOffset(Base); RD = Base; } D.Entries.resize(TruncatedElements); return true; } static bool HandleLValueDirectBase(EvalInfo &Info, const Expr *E, LValue &Obj, const CXXRecordDecl *Derived, const CXXRecordDecl *Base, const ASTRecordLayout *RL = nullptr) { if (!RL) { if (Derived->isInvalidDecl()) return false; RL = &Info.Ctx.getASTRecordLayout(Derived); } Obj.getLValueOffset() += RL->getBaseClassOffset(Base); Obj.addDecl(Info, E, Base, /*Virtual*/ false); return true; } static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, const CXXRecordDecl *DerivedDecl, const CXXBaseSpecifier *Base) { const CXXRecordDecl *BaseDecl = Base->getType()->getAsCXXRecordDecl(); if (!Base->isVirtual()) return HandleLValueDirectBase(Info, E, Obj, DerivedDecl, BaseDecl); SubobjectDesignator &D = Obj.Designator; if (D.Invalid) return false; // Extract most-derived object and corresponding type. DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl(); if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength)) return false; // Find the virtual base class. if (DerivedDecl->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(DerivedDecl); Obj.getLValueOffset() += Layout.getVBaseClassOffset(BaseDecl); Obj.addDecl(Info, E, BaseDecl, /*Virtual*/ true); return true; } static bool HandleLValueBasePath(EvalInfo &Info, const CastExpr *E, QualType Type, LValue &Result) { for (CastExpr::path_const_iterator PathI = E->path_begin(), PathE = E->path_end(); PathI != PathE; ++PathI) { if (!HandleLValueBase(Info, E, Result, Type->getAsCXXRecordDecl(), *PathI)) return false; Type = (*PathI)->getType(); } return true; } /// Update LVal to refer to the given field, which must be a member of the type /// currently described by LVal. static bool HandleLValueMember(EvalInfo &Info, const Expr *E, LValue &LVal, const FieldDecl *FD, const ASTRecordLayout *RL = nullptr) { if (!RL) { if (FD->getParent()->isInvalidDecl()) return false; RL = &Info.Ctx.getASTRecordLayout(FD->getParent()); } unsigned I = FD->getFieldIndex(); LVal.adjustOffset(Info.Ctx.toCharUnitsFromBits(RL->getFieldOffset(I))); LVal.addDecl(Info, E, FD); return true; } /// Update LVal to refer to the given indirect field. static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E, LValue &LVal, const IndirectFieldDecl *IFD) { for (const auto *C : IFD->chain()) if (!HandleLValueMember(Info, E, LVal, cast(C))) return false; return true; } /// Get the size of the given type in char units. static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, QualType Type, CharUnits &Size) { // sizeof(void), __alignof__(void), sizeof(function) = 1 as a gcc // extension. if (Type->isVoidType() || Type->isFunctionType()) { Size = CharUnits::One(); return true; } if (Type->isDependentType()) { Info.FFDiag(Loc); return false; } if (!Type->isConstantSizeType()) { // sizeof(vla) is not a constantexpr: C99 6.5.3.4p2. // FIXME: Better diagnostic. Info.FFDiag(Loc); return false; } Size = Info.Ctx.getTypeSizeInChars(Type); return true; } /// Update a pointer value to model pointer arithmetic. /// \param Info - Information about the ongoing evaluation. /// \param E - The expression being evaluated, for diagnostic purposes. /// \param LVal - The pointer value to be updated. /// \param EltTy - The pointee type represented by LVal. /// \param Adjustment - The adjustment, in objects of type EltTy, to add. static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E, LValue &LVal, QualType EltTy, int64_t Adjustment) { CharUnits SizeOfPointee; if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfPointee)) return false; LVal.adjustOffsetAndIndex(Info, E, Adjustment, SizeOfPointee); return true; } /// Update an lvalue to refer to a component of a complex number. /// \param Info - Information about the ongoing evaluation. /// \param LVal - The lvalue to be updated. /// \param EltTy - The complex number's component type. /// \param Imag - False for the real component, true for the imaginary. static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, LValue &LVal, QualType EltTy, bool Imag) { if (Imag) { CharUnits SizeOfComponent; if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfComponent)) return false; LVal.Offset += SizeOfComponent; } LVal.addComplex(Info, E, EltTy, Imag); return true; } /// Try to evaluate the initializer for a variable declaration. /// /// \param Info Information about the ongoing evaluation. /// \param E An expression to be used when printing diagnostics. /// \param VD The variable whose initializer should be obtained. /// \param Frame The frame in which the variable was created. Must be null /// if this variable is not local to the evaluation. /// \param Result Filled in with a pointer to the value of the variable. static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, const VarDecl *VD, CallStackFrame *Frame, APValue *&Result) { // If this is a parameter to an active constexpr function call, perform // argument substitution. if (const ParmVarDecl *PVD = dyn_cast(VD)) { // Assume arguments of a potential constant expression are unknown // constant expressions. if (Info.checkingPotentialConstantExpression()) return false; if (!Frame || !Frame->Arguments) { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } Result = &Frame->Arguments[PVD->getFunctionScopeIndex()]; return true; } // If this is a local variable, dig out its value. if (Frame) { Result = Frame->getTemporary(VD); if (!Result) { // Assume variables referenced within a lambda's call operator that were // not declared within the call operator are captures and during checking // of a potential constant expression, assume they are unknown constant // expressions. assert(isLambdaCallOperator(Frame->Callee) && (VD->getDeclContext() != Frame->Callee || VD->isInitCapture()) && "missing value for local variable"); if (Info.checkingPotentialConstantExpression()) return false; // FIXME: implement capture evaluation during constant expr evaluation. Info.FFDiag(E->getLocStart(), diag::note_unimplemented_constexpr_lambda_feature_ast) << "captures not currently allowed"; return false; } return true; } // Dig out the initializer, and use the declaration which it's attached to. const Expr *Init = VD->getAnyInitializer(VD); if (!Init || Init->isValueDependent()) { // If we're checking a potential constant expression, the variable could be // initialized later. if (!Info.checkingPotentialConstantExpression()) Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } // If we're currently evaluating the initializer of this declaration, use that // in-flight value. if (Info.EvaluatingDecl.dyn_cast() == VD) { Result = Info.EvaluatingDeclValue; return true; } // Never evaluate the initializer of a weak variable. We can't be sure that // this is the definition which will be used. if (VD->isWeak()) { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } // Check that we can fold the initializer. In C++, we will have already done // this in the cases where it matters for conformance. SmallVector Notes; if (!VD->evaluateValue(Notes)) { Info.FFDiag(E, diag::note_constexpr_var_init_non_constant, Notes.size() + 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); Info.addNotes(Notes); return false; } else if (!VD->checkInitIsICE()) { Info.CCEDiag(E, diag::note_constexpr_var_init_non_constant, Notes.size() + 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); Info.addNotes(Notes); } Result = VD->getEvaluatedValue(); return true; } static bool IsConstNonVolatile(QualType T) { Qualifiers Quals = T.getQualifiers(); return Quals.hasConst() && !Quals.hasVolatile(); } /// Get the base index of the given base class within an APValue representing /// the given derived class. static unsigned getBaseIndex(const CXXRecordDecl *Derived, const CXXRecordDecl *Base) { Base = Base->getCanonicalDecl(); unsigned Index = 0; for (CXXRecordDecl::base_class_const_iterator I = Derived->bases_begin(), E = Derived->bases_end(); I != E; ++I, ++Index) { if (I->getType()->getAsCXXRecordDecl()->getCanonicalDecl() == Base) return Index; } llvm_unreachable("base class missing from derived class's bases list"); } /// Extract the value of a character from a string literal. static APSInt extractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit, uint64_t Index) { // FIXME: Support MakeStringConstant if (const auto *ObjCEnc = dyn_cast(Lit)) { std::string Str; Info.Ctx.getObjCEncodingForType(ObjCEnc->getEncodedType(), Str); assert(Index <= Str.size() && "Index too large"); return APSInt::getUnsigned(Str.c_str()[Index]); } if (auto PE = dyn_cast(Lit)) Lit = PE->getFunctionName(); const StringLiteral *S = cast(Lit); const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(S->getType()); assert(CAT && "string literal isn't an array"); QualType CharType = CAT->getElementType(); assert(CharType->isIntegerType() && "unexpected character type"); APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(), CharType->isUnsignedIntegerType()); if (Index < S->getLength()) Value = S->getCodeUnit(Index); return Value; } // Expand a string literal into an array of characters. static void expandStringLiteral(EvalInfo &Info, const Expr *Lit, APValue &Result) { const StringLiteral *S = cast(Lit); const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(S->getType()); assert(CAT && "string literal isn't an array"); QualType CharType = CAT->getElementType(); assert(CharType->isIntegerType() && "unexpected character type"); unsigned Elts = CAT->getSize().getZExtValue(); Result = APValue(APValue::UninitArray(), std::min(S->getLength(), Elts), Elts); APSInt Value(S->getCharByteWidth() * Info.Ctx.getCharWidth(), CharType->isUnsignedIntegerType()); if (Result.hasArrayFiller()) Result.getArrayFiller() = APValue(Value); for (unsigned I = 0, N = Result.getArrayInitializedElts(); I != N; ++I) { Value = S->getCodeUnit(I); Result.getArrayInitializedElt(I) = APValue(Value); } } // Expand an array so that it has more than Index filled elements. static void expandArray(APValue &Array, unsigned Index) { unsigned Size = Array.getArraySize(); assert(Index < Size); // Always at least double the number of elements for which we store a value. unsigned OldElts = Array.getArrayInitializedElts(); unsigned NewElts = std::max(Index+1, OldElts * 2); NewElts = std::min(Size, std::max(NewElts, 8u)); // Copy the data across. APValue NewValue(APValue::UninitArray(), NewElts, Size); for (unsigned I = 0; I != OldElts; ++I) NewValue.getArrayInitializedElt(I).swap(Array.getArrayInitializedElt(I)); for (unsigned I = OldElts; I != NewElts; ++I) NewValue.getArrayInitializedElt(I) = Array.getArrayFiller(); if (NewValue.hasArrayFiller()) NewValue.getArrayFiller() = Array.getArrayFiller(); Array.swap(NewValue); } /// Determine whether a type would actually be read by an lvalue-to-rvalue /// conversion. If it's of class type, we may assume that the copy operation /// is trivial. Note that this is never true for a union type with fields /// (because the copy always "reads" the active member) and always true for /// a non-class type. static bool isReadByLvalueToRvalueConversion(QualType T) { CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); if (!RD || (RD->isUnion() && !RD->field_empty())) return true; if (RD->isEmpty()) return false; for (auto *Field : RD->fields()) if (isReadByLvalueToRvalueConversion(Field->getType())) return true; for (auto &BaseSpec : RD->bases()) if (isReadByLvalueToRvalueConversion(BaseSpec.getType())) return true; return false; } /// Diagnose an attempt to read from any unreadable field within the specified /// type, which might be a class type. static bool diagnoseUnreadableFields(EvalInfo &Info, const Expr *E, QualType T) { CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); if (!RD) return false; if (!RD->hasMutableFields()) return false; for (auto *Field : RD->fields()) { // If we're actually going to read this field in some way, then it can't // be mutable. If we're in a union, then assigning to a mutable field // (even an empty one) can change the active member, so that's not OK. // FIXME: Add core issue number for the union case. if (Field->isMutable() && (RD->isUnion() || isReadByLvalueToRvalueConversion(Field->getType()))) { Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1) << Field; Info.Note(Field->getLocation(), diag::note_declared_at); return true; } if (diagnoseUnreadableFields(Info, E, Field->getType())) return true; } for (auto &BaseSpec : RD->bases()) if (diagnoseUnreadableFields(Info, E, BaseSpec.getType())) return true; // All mutable fields were empty, and thus not actually read. return false; } /// Kinds of access we can perform on an object, for diagnostics. enum AccessKinds { AK_Read, AK_Assign, AK_Increment, AK_Decrement }; namespace { /// A handle to a complete object (an object that is not a subobject of /// another object). struct CompleteObject { /// The value of the complete object. APValue *Value; /// The type of the complete object. QualType Type; CompleteObject() : Value(nullptr) {} CompleteObject(APValue *Value, QualType Type) : Value(Value), Type(Type) { assert(Value && "missing value for complete object"); } explicit operator bool() const { return Value; } }; } // end anonymous namespace /// Find the designated sub-object of an rvalue. template typename SubobjectHandler::result_type findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, SubobjectHandler &handler) { if (Sub.Invalid) // A diagnostic will have already been produced. return handler.failed(); if (Sub.isOnePastTheEnd()) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_access_past_end) << handler.AccessKind; else Info.FFDiag(E); return handler.failed(); } APValue *O = Obj.Value; QualType ObjType = Obj.Type; const FieldDecl *LastField = nullptr; // Walk the designator's path to find the subobject. for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) { if (O->isUninit()) { if (!Info.checkingPotentialConstantExpression()) Info.FFDiag(E, diag::note_constexpr_access_uninit) << handler.AccessKind; return handler.failed(); } if (I == N) { // If we are reading an object of class type, there may still be more // things we need to check: if there are any mutable subobjects, we // cannot perform this read. (This only happens when performing a trivial // copy or assignment.) if (ObjType->isRecordType() && handler.AccessKind == AK_Read && diagnoseUnreadableFields(Info, E, ObjType)) return handler.failed(); if (!handler.found(*O, ObjType)) return false; // If we modified a bit-field, truncate it to the right width. if (handler.AccessKind != AK_Read && LastField && LastField->isBitField() && !truncateBitfieldValue(Info, E, *O, LastField)) return false; return true; } LastField = nullptr; if (ObjType->isArrayType()) { // Next subobject is an array element. const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(ObjType); assert(CAT && "vla in literal type?"); uint64_t Index = Sub.Entries[I].ArrayIndex; if (CAT->getSize().ule(Index)) { // Note, it should not be possible to form a pointer with a valid // designator which points more than one past the end of the array. if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_access_past_end) << handler.AccessKind; else Info.FFDiag(E); return handler.failed(); } ObjType = CAT->getElementType(); // An array object is represented as either an Array APValue or as an // LValue which refers to a string literal. if (O->isLValue()) { assert(I == N - 1 && "extracting subobject of character?"); assert(!O->hasLValuePath() || O->getLValuePath().empty()); if (handler.AccessKind != AK_Read) expandStringLiteral(Info, O->getLValueBase().get(), *O); else return handler.foundString(*O, ObjType, Index); } if (O->getArrayInitializedElts() > Index) O = &O->getArrayInitializedElt(Index); else if (handler.AccessKind != AK_Read) { expandArray(*O, Index); O = &O->getArrayInitializedElt(Index); } else O = &O->getArrayFiller(); } else if (ObjType->isAnyComplexType()) { // Next subobject is a complex number. uint64_t Index = Sub.Entries[I].ArrayIndex; if (Index > 1) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_access_past_end) << handler.AccessKind; else Info.FFDiag(E); return handler.failed(); } bool WasConstQualified = ObjType.isConstQualified(); ObjType = ObjType->castAs()->getElementType(); if (WasConstQualified) ObjType.addConst(); assert(I == N - 1 && "extracting subobject of scalar?"); if (O->isComplexInt()) { return handler.found(Index ? O->getComplexIntImag() : O->getComplexIntReal(), ObjType); } else { assert(O->isComplexFloat()); return handler.found(Index ? O->getComplexFloatImag() : O->getComplexFloatReal(), ObjType); } } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) { if (Field->isMutable() && handler.AccessKind == AK_Read) { Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1) << Field; Info.Note(Field->getLocation(), diag::note_declared_at); return handler.failed(); } // Next subobject is a class, struct or union field. RecordDecl *RD = ObjType->castAs()->getDecl(); if (RD->isUnion()) { const FieldDecl *UnionField = O->getUnionField(); if (!UnionField || UnionField->getCanonicalDecl() != Field->getCanonicalDecl()) { Info.FFDiag(E, diag::note_constexpr_access_inactive_union_member) << handler.AccessKind << Field << !UnionField << UnionField; return handler.failed(); } O = &O->getUnionValue(); } else O = &O->getStructField(Field->getFieldIndex()); bool WasConstQualified = ObjType.isConstQualified(); ObjType = Field->getType(); if (WasConstQualified && !Field->isMutable()) ObjType.addConst(); if (ObjType.isVolatileQualified()) { if (Info.getLangOpts().CPlusPlus) { // FIXME: Include a description of the path to the volatile subobject. Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1) << handler.AccessKind << 2 << Field; Info.Note(Field->getLocation(), diag::note_declared_at); } else { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); } return handler.failed(); } LastField = Field; } else { // Next subobject is a base class. const CXXRecordDecl *Derived = ObjType->getAsCXXRecordDecl(); const CXXRecordDecl *Base = getAsBaseClass(Sub.Entries[I]); O = &O->getStructBase(getBaseIndex(Derived, Base)); bool WasConstQualified = ObjType.isConstQualified(); ObjType = Info.Ctx.getRecordType(Base); if (WasConstQualified) ObjType.addConst(); } } } namespace { struct ExtractSubobjectHandler { EvalInfo &Info; APValue &Result; static const AccessKinds AccessKind = AK_Read; typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { Result = Subobj; return true; } bool found(APSInt &Value, QualType SubobjType) { Result = APValue(Value); return true; } bool found(APFloat &Value, QualType SubobjType) { Result = APValue(Value); return true; } bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) { Result = APValue(extractStringLiteralCharacter( Info, Subobj.getLValueBase().get(), Character)); return true; } }; } // end anonymous namespace const AccessKinds ExtractSubobjectHandler::AccessKind; /// Extract the designated sub-object of an rvalue. static bool extractSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, APValue &Result) { ExtractSubobjectHandler Handler = { Info, Result }; return findSubobject(Info, E, Obj, Sub, Handler); } namespace { struct ModifySubobjectHandler { EvalInfo &Info; APValue &NewVal; const Expr *E; typedef bool result_type; static const AccessKinds AccessKind = AK_Assign; bool checkConst(QualType QT) { // Assigning to a const object has undefined behavior. if (QT.isConstQualified()) { Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT; return false; } return true; } bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { if (!checkConst(SubobjType)) return false; // We've been given ownership of NewVal, so just swap it in. Subobj.swap(NewVal); return true; } bool found(APSInt &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!NewVal.isInt()) { // Maybe trying to write a cast pointer value into a complex? Info.FFDiag(E); return false; } Value = NewVal.getInt(); return true; } bool found(APFloat &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; Value = NewVal.getFloat(); return true; } bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) { llvm_unreachable("shouldn't encounter string elements with ExpandArrays"); } }; } // end anonymous namespace const AccessKinds ModifySubobjectHandler::AccessKind; /// Update the designated sub-object of an rvalue to the given value. static bool modifySubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, APValue &NewVal) { ModifySubobjectHandler Handler = { Info, NewVal, E }; return findSubobject(Info, E, Obj, Sub, Handler); } /// Find the position where two subobject designators diverge, or equivalently /// the length of the common initial subsequence. static unsigned FindDesignatorMismatch(QualType ObjType, const SubobjectDesignator &A, const SubobjectDesignator &B, bool &WasArrayIndex) { unsigned I = 0, N = std::min(A.Entries.size(), B.Entries.size()); for (/**/; I != N; ++I) { if (!ObjType.isNull() && (ObjType->isArrayType() || ObjType->isAnyComplexType())) { // Next subobject is an array element. if (A.Entries[I].ArrayIndex != B.Entries[I].ArrayIndex) { WasArrayIndex = true; return I; } if (ObjType->isAnyComplexType()) ObjType = ObjType->castAs()->getElementType(); else ObjType = ObjType->castAsArrayTypeUnsafe()->getElementType(); } else { if (A.Entries[I].BaseOrMember != B.Entries[I].BaseOrMember) { WasArrayIndex = false; return I; } if (const FieldDecl *FD = getAsField(A.Entries[I])) // Next subobject is a field. ObjType = FD->getType(); else // Next subobject is a base class. ObjType = QualType(); } } WasArrayIndex = false; return I; } /// Determine whether the given subobject designators refer to elements of the /// same array object. static bool AreElementsOfSameArray(QualType ObjType, const SubobjectDesignator &A, const SubobjectDesignator &B) { if (A.Entries.size() != B.Entries.size()) return false; bool IsArray = A.MostDerivedIsArrayElement; if (IsArray && A.MostDerivedPathLength != A.Entries.size()) // A is a subobject of the array element. return false; // If A (and B) designates an array element, the last entry will be the array // index. That doesn't have to match. Otherwise, we're in the 'implicit array // of length 1' case, and the entire path must match. bool WasArrayIndex; unsigned CommonLength = FindDesignatorMismatch(ObjType, A, B, WasArrayIndex); return CommonLength >= A.Entries.size() - IsArray; } /// Find the complete object to which an LValue refers. static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, AccessKinds AK, const LValue &LVal, QualType LValType) { if (!LVal.Base) { Info.FFDiag(E, diag::note_constexpr_access_null) << AK; return CompleteObject(); } CallStackFrame *Frame = nullptr; if (LVal.CallIndex) { Frame = Info.getCallFrame(LVal.CallIndex); if (!Frame) { Info.FFDiag(E, diag::note_constexpr_lifetime_ended, 1) << AK << LVal.Base.is(); NoteLValueLocation(Info, LVal.Base); return CompleteObject(); } } // C++11 DR1311: An lvalue-to-rvalue conversion on a volatile-qualified type // is not a constant expression (even if the object is non-volatile). We also // apply this rule to C++98, in order to conform to the expected 'volatile' // semantics. if (LValType.isVolatileQualified()) { if (Info.getLangOpts().CPlusPlus) Info.FFDiag(E, diag::note_constexpr_access_volatile_type) << AK << LValType; else Info.FFDiag(E); return CompleteObject(); } // Compute value storage location and type of base object. APValue *BaseVal = nullptr; QualType BaseType = getType(LVal.Base); if (const ValueDecl *D = LVal.Base.dyn_cast()) { // In C++98, const, non-volatile integers initialized with ICEs are ICEs. // In C++11, constexpr, non-volatile variables initialized with constant // expressions are constant expressions too. Inside constexpr functions, // parameters are constant expressions even if they're non-const. // In C++1y, objects local to a constant expression (those with a Frame) are // both readable and writable inside constant expressions. // In C, such things can also be folded, although they are not ICEs. const VarDecl *VD = dyn_cast(D); if (VD) { if (const VarDecl *VDef = VD->getDefinition(Info.Ctx)) VD = VDef; } if (!VD || VD->isInvalidDecl()) { Info.FFDiag(E); return CompleteObject(); } // Accesses of volatile-qualified objects are not allowed. if (BaseType.isVolatileQualified()) { if (Info.getLangOpts().CPlusPlus) { Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1) << AK << 1 << VD; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.FFDiag(E); } return CompleteObject(); } // Unless we're looking at a local variable or argument in a constexpr call, // the variable we're reading must be const. if (!Frame) { if (Info.getLangOpts().CPlusPlus14 && VD == Info.EvaluatingDecl.dyn_cast()) { // OK, we can read and modify an object if we're in the process of // evaluating its initializer, because its lifetime began in this // evaluation. } else if (AK != AK_Read) { // All the remaining cases only permit reading. Info.FFDiag(E, diag::note_constexpr_modify_global); return CompleteObject(); } else if (VD->isConstexpr()) { // OK, we can read this variable. } else if (BaseType->isIntegralOrEnumerationType()) { // In OpenCL if a variable is in constant address space it is a const value. if (!(BaseType.isConstQualified() || (Info.getLangOpts().OpenCL && BaseType.getAddressSpace() == LangAS::opencl_constant))) { if (Info.getLangOpts().CPlusPlus) { Info.FFDiag(E, diag::note_constexpr_ltor_non_const_int, 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.FFDiag(E); } return CompleteObject(); } } else if (BaseType->isFloatingType() && BaseType.isConstQualified()) { // We support folding of const floating-point types, in order to make // static const data members of such types (supported as an extension) // more useful. if (Info.getLangOpts().CPlusPlus11) { Info.CCEDiag(E, diag::note_constexpr_ltor_non_constexpr, 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.CCEDiag(E); } } else if (BaseType.isConstQualified() && VD->hasDefinition(Info.Ctx)) { Info.CCEDiag(E, diag::note_constexpr_ltor_non_constexpr) << VD; // Keep evaluating to see what we can do. } else { // FIXME: Allow folding of values of any literal type in all languages. if (Info.checkingPotentialConstantExpression() && VD->getType().isConstQualified() && !VD->hasDefinition(Info.Ctx)) { // The definition of this variable could be constexpr. We can't // access it right now, but may be able to in future. } else if (Info.getLangOpts().CPlusPlus11) { Info.FFDiag(E, diag::note_constexpr_ltor_non_constexpr, 1) << VD; Info.Note(VD->getLocation(), diag::note_declared_at); } else { Info.FFDiag(E); } return CompleteObject(); } } if (!evaluateVarDeclInit(Info, E, VD, Frame, BaseVal)) return CompleteObject(); } else { const Expr *Base = LVal.Base.dyn_cast(); if (!Frame) { if (const MaterializeTemporaryExpr *MTE = dyn_cast(Base)) { assert(MTE->getStorageDuration() == SD_Static && "should have a frame for a non-global materialized temporary"); // Per C++1y [expr.const]p2: // an lvalue-to-rvalue conversion [is not allowed unless it applies to] // - a [...] glvalue of integral or enumeration type that refers to // a non-volatile const object [...] // [...] // - a [...] glvalue of literal type that refers to a non-volatile // object whose lifetime began within the evaluation of e. // // C++11 misses the 'began within the evaluation of e' check and // instead allows all temporaries, including things like: // int &&r = 1; // int x = ++r; // constexpr int k = r; // Therefore we use the C++1y rules in C++11 too. const ValueDecl *VD = Info.EvaluatingDecl.dyn_cast(); const ValueDecl *ED = MTE->getExtendingDecl(); if (!(BaseType.isConstQualified() && BaseType->isIntegralOrEnumerationType()) && !(VD && VD->getCanonicalDecl() == ED->getCanonicalDecl())) { Info.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK; Info.Note(MTE->getExprLoc(), diag::note_constexpr_temporary_here); return CompleteObject(); } BaseVal = Info.Ctx.getMaterializedTemporaryValue(MTE, false); assert(BaseVal && "got reference to unevaluated temporary"); } else { Info.FFDiag(E); return CompleteObject(); } } else { BaseVal = Frame->getTemporary(Base); assert(BaseVal && "missing value for temporary"); } // Volatile temporary objects cannot be accessed in constant expressions. if (BaseType.isVolatileQualified()) { if (Info.getLangOpts().CPlusPlus) { Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1) << AK << 0; Info.Note(Base->getExprLoc(), diag::note_constexpr_temporary_here); } else { Info.FFDiag(E); } return CompleteObject(); } } // During the construction of an object, it is not yet 'const'. // FIXME: We don't set up EvaluatingDecl for local variables or temporaries, // and this doesn't do quite the right thing for const subobjects of the // object under construction. if (LVal.getLValueBase() == Info.EvaluatingDecl) { BaseType = Info.Ctx.getCanonicalType(BaseType); BaseType.removeLocalConst(); } // In C++1y, we can't safely access any mutable state when we might be // evaluating after an unmodeled side effect. // // FIXME: Not all local state is mutable. Allow local constant subobjects // to be read here (but take care with 'mutable' fields). if ((Frame && Info.getLangOpts().CPlusPlus14 && Info.EvalStatus.HasSideEffects) || (AK != AK_Read && Info.IsSpeculativelyEvaluating)) return CompleteObject(); return CompleteObject(BaseVal, BaseType); } /// \brief Perform an lvalue-to-rvalue conversion on the given glvalue. This /// can also be used for 'lvalue-to-lvalue' conversions for looking up the /// glvalue referred to by an entity of reference type. /// /// \param Info - Information about the ongoing evaluation. /// \param Conv - The expression for which we are performing the conversion. /// Used for diagnostics. /// \param Type - The type of the glvalue (before stripping cv-qualifiers in the /// case of a non-class type). /// \param LVal - The glvalue on which we are attempting to perform this action. /// \param RVal - The produced value will be placed here. static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, QualType Type, const LValue &LVal, APValue &RVal) { if (LVal.Designator.Invalid) return false; // Check for special cases where there is no existing APValue to look at. const Expr *Base = LVal.Base.dyn_cast(); if (Base && !LVal.CallIndex && !Type.isVolatileQualified()) { if (const CompoundLiteralExpr *CLE = dyn_cast(Base)) { // In C99, a CompoundLiteralExpr is an lvalue, and we defer evaluating the // initializer until now for such expressions. Such an expression can't be // an ICE in C, so this only matters for fold. if (Type.isVolatileQualified()) { Info.FFDiag(Conv); return false; } APValue Lit; if (!Evaluate(Lit, Info, CLE->getInitializer())) return false; CompleteObject LitObj(&Lit, Base->getType()); return extractSubobject(Info, Conv, LitObj, LVal.Designator, RVal); } else if (isa(Base) || isa(Base)) { // We represent a string literal array as an lvalue pointing at the // corresponding expression, rather than building an array of chars. // FIXME: Support ObjCEncodeExpr, MakeStringConstant APValue Str(Base, CharUnits::Zero(), APValue::NoLValuePath(), 0); CompleteObject StrObj(&Str, Base->getType()); return extractSubobject(Info, Conv, StrObj, LVal.Designator, RVal); } } CompleteObject Obj = findCompleteObject(Info, Conv, AK_Read, LVal, Type); return Obj && extractSubobject(Info, Conv, Obj, LVal.Designator, RVal); } /// Perform an assignment of Val to LVal. Takes ownership of Val. static bool handleAssignment(EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, APValue &Val) { if (LVal.Designator.Invalid) return false; if (!Info.getLangOpts().CPlusPlus14) { Info.FFDiag(E); return false; } CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType); return Obj && modifySubobject(Info, E, Obj, LVal.Designator, Val); } static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) { return T->isSignedIntegerType() && Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy); } namespace { struct CompoundAssignSubobjectHandler { EvalInfo &Info; const Expr *E; QualType PromotedLHSType; BinaryOperatorKind Opcode; const APValue &RHS; static const AccessKinds AccessKind = AK_Assign; typedef bool result_type; bool checkConst(QualType QT) { // Assigning to a const object has undefined behavior. if (QT.isConstQualified()) { Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT; return false; } return true; } bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { switch (Subobj.getKind()) { case APValue::Int: return found(Subobj.getInt(), SubobjType); case APValue::Float: return found(Subobj.getFloat(), SubobjType); case APValue::ComplexInt: case APValue::ComplexFloat: // FIXME: Implement complex compound assignment. Info.FFDiag(E); return false; case APValue::LValue: return foundPointer(Subobj, SubobjType); default: // FIXME: can this happen? Info.FFDiag(E); return false; } } bool found(APSInt &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!SubobjType->isIntegerType() || !RHS.isInt()) { // We don't support compound assignment on integer-cast-to-pointer // values. Info.FFDiag(E); return false; } APSInt LHS = HandleIntToIntCast(Info, E, PromotedLHSType, SubobjType, Value); if (!handleIntIntBinOp(Info, E, LHS, Opcode, RHS.getInt(), LHS)) return false; Value = HandleIntToIntCast(Info, E, SubobjType, PromotedLHSType, LHS); return true; } bool found(APFloat &Value, QualType SubobjType) { return checkConst(SubobjType) && HandleFloatToFloatCast(Info, E, SubobjType, PromotedLHSType, Value) && handleFloatFloatBinOp(Info, E, Value, Opcode, RHS.getFloat()) && HandleFloatToFloatCast(Info, E, PromotedLHSType, SubobjType, Value); } bool foundPointer(APValue &Subobj, QualType SubobjType) { if (!checkConst(SubobjType)) return false; QualType PointeeType; if (const PointerType *PT = SubobjType->getAs()) PointeeType = PT->getPointeeType(); if (PointeeType.isNull() || !RHS.isInt() || (Opcode != BO_Add && Opcode != BO_Sub)) { Info.FFDiag(E); return false; } int64_t Offset = getExtValue(RHS.getInt()); if (Opcode == BO_Sub) Offset = -Offset; LValue LVal; LVal.setFrom(Info.Ctx, Subobj); if (!HandleLValueArrayAdjustment(Info, E, LVal, PointeeType, Offset)) return false; LVal.moveInto(Subobj); return true; } bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) { llvm_unreachable("shouldn't encounter string elements here"); } }; } // end anonymous namespace const AccessKinds CompoundAssignSubobjectHandler::AccessKind; /// Perform a compound assignment of LVal = RVal. static bool handleCompoundAssignment( EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, QualType PromotedLValType, BinaryOperatorKind Opcode, const APValue &RVal) { if (LVal.Designator.Invalid) return false; if (!Info.getLangOpts().CPlusPlus14) { Info.FFDiag(E); return false; } CompleteObject Obj = findCompleteObject(Info, E, AK_Assign, LVal, LValType); CompoundAssignSubobjectHandler Handler = { Info, E, PromotedLValType, Opcode, RVal }; return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler); } namespace { struct IncDecSubobjectHandler { EvalInfo &Info; const Expr *E; AccessKinds AccessKind; APValue *Old; typedef bool result_type; bool checkConst(QualType QT) { // Assigning to a const object has undefined behavior. if (QT.isConstQualified()) { Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT; return false; } return true; } bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { // Stash the old value. Also clear Old, so we don't clobber it later // if we're post-incrementing a complex. if (Old) { *Old = Subobj; Old = nullptr; } switch (Subobj.getKind()) { case APValue::Int: return found(Subobj.getInt(), SubobjType); case APValue::Float: return found(Subobj.getFloat(), SubobjType); case APValue::ComplexInt: return found(Subobj.getComplexIntReal(), SubobjType->castAs()->getElementType() .withCVRQualifiers(SubobjType.getCVRQualifiers())); case APValue::ComplexFloat: return found(Subobj.getComplexFloatReal(), SubobjType->castAs()->getElementType() .withCVRQualifiers(SubobjType.getCVRQualifiers())); case APValue::LValue: return foundPointer(Subobj, SubobjType); default: // FIXME: can this happen? Info.FFDiag(E); return false; } } bool found(APSInt &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (!SubobjType->isIntegerType()) { // We don't support increment / decrement on integer-cast-to-pointer // values. Info.FFDiag(E); return false; } if (Old) *Old = APValue(Value); // bool arithmetic promotes to int, and the conversion back to bool // doesn't reduce mod 2^n, so special-case it. if (SubobjType->isBooleanType()) { if (AccessKind == AK_Increment) Value = 1; else Value = !Value; return true; } bool WasNegative = Value.isNegative(); if (AccessKind == AK_Increment) { ++Value; if (!WasNegative && Value.isNegative() && isOverflowingIntegerType(Info.Ctx, SubobjType)) { APSInt ActualValue(Value, /*IsUnsigned*/true); return HandleOverflow(Info, E, ActualValue, SubobjType); } } else { --Value; if (WasNegative && !Value.isNegative() && isOverflowingIntegerType(Info.Ctx, SubobjType)) { unsigned BitWidth = Value.getBitWidth(); APSInt ActualValue(Value.sext(BitWidth + 1), /*IsUnsigned*/false); ActualValue.setBit(BitWidth); return HandleOverflow(Info, E, ActualValue, SubobjType); } } return true; } bool found(APFloat &Value, QualType SubobjType) { if (!checkConst(SubobjType)) return false; if (Old) *Old = APValue(Value); APFloat One(Value.getSemantics(), 1); if (AccessKind == AK_Increment) Value.add(One, APFloat::rmNearestTiesToEven); else Value.subtract(One, APFloat::rmNearestTiesToEven); return true; } bool foundPointer(APValue &Subobj, QualType SubobjType) { if (!checkConst(SubobjType)) return false; QualType PointeeType; if (const PointerType *PT = SubobjType->getAs()) PointeeType = PT->getPointeeType(); else { Info.FFDiag(E); return false; } LValue LVal; LVal.setFrom(Info.Ctx, Subobj); if (!HandleLValueArrayAdjustment(Info, E, LVal, PointeeType, AccessKind == AK_Increment ? 1 : -1)) return false; LVal.moveInto(Subobj); return true; } bool foundString(APValue &Subobj, QualType SubobjType, uint64_t Character) { llvm_unreachable("shouldn't encounter string elements here"); } }; } // end anonymous namespace /// Perform an increment or decrement on LVal. static bool handleIncDec(EvalInfo &Info, const Expr *E, const LValue &LVal, QualType LValType, bool IsIncrement, APValue *Old) { if (LVal.Designator.Invalid) return false; if (!Info.getLangOpts().CPlusPlus14) { Info.FFDiag(E); return false; } AccessKinds AK = IsIncrement ? AK_Increment : AK_Decrement; CompleteObject Obj = findCompleteObject(Info, E, AK, LVal, LValType); IncDecSubobjectHandler Handler = { Info, E, AK, Old }; return Obj && findSubobject(Info, E, Obj, LVal.Designator, Handler); } /// Build an lvalue for the object argument of a member function call. static bool EvaluateObjectArgument(EvalInfo &Info, const Expr *Object, LValue &This) { if (Object->getType()->isPointerType()) return EvaluatePointer(Object, This, Info); if (Object->isGLValue()) return EvaluateLValue(Object, This, Info); if (Object->getType()->isLiteralType(Info.Ctx)) return EvaluateTemporary(Object, This, Info); Info.FFDiag(Object, diag::note_constexpr_nonliteral) << Object->getType(); return false; } /// HandleMemberPointerAccess - Evaluate a member access operation and build an /// lvalue referring to the result. /// /// \param Info - Information about the ongoing evaluation. /// \param LV - An lvalue referring to the base of the member pointer. /// \param RHS - The member pointer expression. /// \param IncludeMember - Specifies whether the member itself is included in /// the resulting LValue subobject designator. This is not possible when /// creating a bound member function. /// \return The field or method declaration to which the member pointer refers, /// or 0 if evaluation fails. static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info, QualType LVType, LValue &LV, const Expr *RHS, bool IncludeMember = true) { MemberPtr MemPtr; if (!EvaluateMemberPointer(RHS, MemPtr, Info)) return nullptr; // C++11 [expr.mptr.oper]p6: If the second operand is the null pointer to // member value, the behavior is undefined. if (!MemPtr.getDecl()) { // FIXME: Specific diagnostic. Info.FFDiag(RHS); return nullptr; } if (MemPtr.isDerivedMember()) { // This is a member of some derived class. Truncate LV appropriately. // The end of the derived-to-base path for the base object must match the // derived-to-base path for the member pointer. if (LV.Designator.MostDerivedPathLength + MemPtr.Path.size() > LV.Designator.Entries.size()) { Info.FFDiag(RHS); return nullptr; } unsigned PathLengthToMember = LV.Designator.Entries.size() - MemPtr.Path.size(); for (unsigned I = 0, N = MemPtr.Path.size(); I != N; ++I) { const CXXRecordDecl *LVDecl = getAsBaseClass( LV.Designator.Entries[PathLengthToMember + I]); const CXXRecordDecl *MPDecl = MemPtr.Path[I]; if (LVDecl->getCanonicalDecl() != MPDecl->getCanonicalDecl()) { Info.FFDiag(RHS); return nullptr; } } // Truncate the lvalue to the appropriate derived class. if (!CastToDerivedClass(Info, RHS, LV, MemPtr.getContainingRecord(), PathLengthToMember)) return nullptr; } else if (!MemPtr.Path.empty()) { // Extend the LValue path with the member pointer's path. LV.Designator.Entries.reserve(LV.Designator.Entries.size() + MemPtr.Path.size() + IncludeMember); // Walk down to the appropriate base class. if (const PointerType *PT = LVType->getAs()) LVType = PT->getPointeeType(); const CXXRecordDecl *RD = LVType->getAsCXXRecordDecl(); assert(RD && "member pointer access on non-class-type expression"); // The first class in the path is that of the lvalue. for (unsigned I = 1, N = MemPtr.Path.size(); I != N; ++I) { const CXXRecordDecl *Base = MemPtr.Path[N - I - 1]; if (!HandleLValueDirectBase(Info, RHS, LV, RD, Base)) return nullptr; RD = Base; } // Finally cast to the class containing the member. if (!HandleLValueDirectBase(Info, RHS, LV, RD, MemPtr.getContainingRecord())) return nullptr; } // Add the member. Note that we cannot build bound member functions here. if (IncludeMember) { if (const FieldDecl *FD = dyn_cast(MemPtr.getDecl())) { if (!HandleLValueMember(Info, RHS, LV, FD)) return nullptr; } else if (const IndirectFieldDecl *IFD = dyn_cast(MemPtr.getDecl())) { if (!HandleLValueIndirectMember(Info, RHS, LV, IFD)) return nullptr; } else { llvm_unreachable("can't construct reference to bound member function"); } } return MemPtr.getDecl(); } static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info, const BinaryOperator *BO, LValue &LV, bool IncludeMember = true) { assert(BO->getOpcode() == BO_PtrMemD || BO->getOpcode() == BO_PtrMemI); if (!EvaluateObjectArgument(Info, BO->getLHS(), LV)) { if (Info.noteFailure()) { MemberPtr MemPtr; EvaluateMemberPointer(BO->getRHS(), MemPtr, Info); } return nullptr; } return HandleMemberPointerAccess(Info, BO->getLHS()->getType(), LV, BO->getRHS(), IncludeMember); } /// HandleBaseToDerivedCast - Apply the given base-to-derived cast operation on /// the provided lvalue, which currently refers to the base object. static bool HandleBaseToDerivedCast(EvalInfo &Info, const CastExpr *E, LValue &Result) { SubobjectDesignator &D = Result.Designator; if (D.Invalid || !Result.checkNullPointer(Info, E, CSK_Derived)) return false; QualType TargetQT = E->getType(); if (const PointerType *PT = TargetQT->getAs()) TargetQT = PT->getPointeeType(); // Check this cast lands within the final derived-to-base subobject path. if (D.MostDerivedPathLength + E->path_size() > D.Entries.size()) { Info.CCEDiag(E, diag::note_constexpr_invalid_downcast) << D.MostDerivedType << TargetQT; return false; } // Check the type of the final cast. We don't need to check the path, // since a cast can only be formed if the path is unique. unsigned NewEntriesSize = D.Entries.size() - E->path_size(); const CXXRecordDecl *TargetType = TargetQT->getAsCXXRecordDecl(); const CXXRecordDecl *FinalType; if (NewEntriesSize == D.MostDerivedPathLength) FinalType = D.MostDerivedType->getAsCXXRecordDecl(); else FinalType = getAsBaseClass(D.Entries[NewEntriesSize - 1]); if (FinalType->getCanonicalDecl() != TargetType->getCanonicalDecl()) { Info.CCEDiag(E, diag::note_constexpr_invalid_downcast) << D.MostDerivedType << TargetQT; return false; } // Truncate the lvalue to the appropriate derived class. return CastToDerivedClass(Info, E, Result, TargetType, NewEntriesSize); } namespace { enum EvalStmtResult { /// Evaluation failed. ESR_Failed, /// Hit a 'return' statement. ESR_Returned, /// Evaluation succeeded. ESR_Succeeded, /// Hit a 'continue' statement. ESR_Continue, /// Hit a 'break' statement. ESR_Break, /// Still scanning for 'case' or 'default' statement. ESR_CaseNotFound }; } static bool EvaluateVarDecl(EvalInfo &Info, const VarDecl *VD) { // We don't need to evaluate the initializer for a static local. if (!VD->hasLocalStorage()) return true; LValue Result; Result.set(VD, Info.CurrentCall->Index); APValue &Val = Info.CurrentCall->createTemporary(VD, true); const Expr *InitE = VD->getInit(); if (!InitE) { Info.FFDiag(VD->getLocStart(), diag::note_constexpr_uninitialized) << false << VD->getType(); Val = APValue(); return false; } if (InitE->isValueDependent()) return false; if (!EvaluateInPlace(Val, Info, Result, InitE)) { // Wipe out any partially-computed value, to allow tracking that this // evaluation failed. Val = APValue(); return false; } return true; } static bool EvaluateDecl(EvalInfo &Info, const Decl *D) { bool OK = true; if (const VarDecl *VD = dyn_cast(D)) OK &= EvaluateVarDecl(Info, VD); if (const DecompositionDecl *DD = dyn_cast(D)) for (auto *BD : DD->bindings()) if (auto *VD = BD->getHoldingVar()) OK &= EvaluateDecl(Info, VD); return OK; } /// Evaluate a condition (either a variable declaration or an expression). static bool EvaluateCond(EvalInfo &Info, const VarDecl *CondDecl, const Expr *Cond, bool &Result) { FullExpressionRAII Scope(Info); if (CondDecl && !EvaluateDecl(Info, CondDecl)) return false; return EvaluateAsBooleanCondition(Cond, Result, Info); } namespace { /// \brief A location where the result (returned value) of evaluating a /// statement should be stored. struct StmtResult { /// The APValue that should be filled in with the returned value. APValue &Value; /// The location containing the result, if any (used to support RVO). const LValue *Slot; }; } static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, const Stmt *S, const SwitchCase *SC = nullptr); /// Evaluate the body of a loop, and translate the result as appropriate. static EvalStmtResult EvaluateLoopBody(StmtResult &Result, EvalInfo &Info, const Stmt *Body, const SwitchCase *Case = nullptr) { BlockScopeRAII Scope(Info); switch (EvalStmtResult ESR = EvaluateStmt(Result, Info, Body, Case)) { case ESR_Break: return ESR_Succeeded; case ESR_Succeeded: case ESR_Continue: return ESR_Continue; case ESR_Failed: case ESR_Returned: case ESR_CaseNotFound: return ESR; } llvm_unreachable("Invalid EvalStmtResult!"); } /// Evaluate a switch statement. static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, const SwitchStmt *SS) { BlockScopeRAII Scope(Info); // Evaluate the switch condition. APSInt Value; { FullExpressionRAII Scope(Info); if (const Stmt *Init = SS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init); if (ESR != ESR_Succeeded) return ESR; } if (SS->getConditionVariable() && !EvaluateDecl(Info, SS->getConditionVariable())) return ESR_Failed; if (!EvaluateInteger(SS->getCond(), Value, Info)) return ESR_Failed; } // Find the switch case corresponding to the value of the condition. // FIXME: Cache this lookup. const SwitchCase *Found = nullptr; for (const SwitchCase *SC = SS->getSwitchCaseList(); SC; SC = SC->getNextSwitchCase()) { if (isa(SC)) { Found = SC; continue; } const CaseStmt *CS = cast(SC); APSInt LHS = CS->getLHS()->EvaluateKnownConstInt(Info.Ctx); APSInt RHS = CS->getRHS() ? CS->getRHS()->EvaluateKnownConstInt(Info.Ctx) : LHS; if (LHS <= Value && Value <= RHS) { Found = SC; break; } } if (!Found) return ESR_Succeeded; // Search the switch body for the switch case and evaluate it from there. switch (EvalStmtResult ESR = EvaluateStmt(Result, Info, SS->getBody(), Found)) { case ESR_Break: return ESR_Succeeded; case ESR_Succeeded: case ESR_Continue: case ESR_Failed: case ESR_Returned: return ESR; case ESR_CaseNotFound: // This can only happen if the switch case is nested within a statement // expression. We have no intention of supporting that. Info.FFDiag(Found->getLocStart(), diag::note_constexpr_stmt_expr_unsupported); return ESR_Failed; } llvm_unreachable("Invalid EvalStmtResult!"); } // Evaluate a statement. static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, const Stmt *S, const SwitchCase *Case) { if (!Info.nextStep(S)) return ESR_Failed; // If we're hunting down a 'case' or 'default' label, recurse through // substatements until we hit the label. if (Case) { // FIXME: We don't start the lifetime of objects whose initialization we // jump over. However, such objects must be of class type with a trivial // default constructor that initialize all subobjects, so must be empty, // so this almost never matters. switch (S->getStmtClass()) { case Stmt::CompoundStmtClass: // FIXME: Precompute which substatement of a compound statement we // would jump to, and go straight there rather than performing a // linear scan each time. case Stmt::LabelStmtClass: case Stmt::AttributedStmtClass: case Stmt::DoStmtClass: break; case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: if (Case == S) Case = nullptr; break; case Stmt::IfStmtClass: { // FIXME: Precompute which side of an 'if' we would jump to, and go // straight there rather than scanning both sides. const IfStmt *IS = cast(S); // Wrap the evaluation in a block scope, in case it's a DeclStmt // preceded by our switch label. BlockScopeRAII Scope(Info); EvalStmtResult ESR = EvaluateStmt(Result, Info, IS->getThen(), Case); if (ESR != ESR_CaseNotFound || !IS->getElse()) return ESR; return EvaluateStmt(Result, Info, IS->getElse(), Case); } case Stmt::WhileStmtClass: { EvalStmtResult ESR = EvaluateLoopBody(Result, Info, cast(S)->getBody(), Case); if (ESR != ESR_Continue) return ESR; break; } case Stmt::ForStmtClass: { const ForStmt *FS = cast(S); EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody(), Case); if (ESR != ESR_Continue) return ESR; if (FS->getInc()) { FullExpressionRAII IncScope(Info); if (!EvaluateIgnoredValue(Info, FS->getInc())) return ESR_Failed; } break; } case Stmt::DeclStmtClass: // FIXME: If the variable has initialization that can't be jumped over, // bail out of any immediately-surrounding compound-statement too. default: return ESR_CaseNotFound; } } switch (S->getStmtClass()) { default: if (const Expr *E = dyn_cast(S)) { // Don't bother evaluating beyond an expression-statement which couldn't // be evaluated. FullExpressionRAII Scope(Info); if (!EvaluateIgnoredValue(Info, E)) return ESR_Failed; return ESR_Succeeded; } Info.FFDiag(S->getLocStart()); return ESR_Failed; case Stmt::NullStmtClass: return ESR_Succeeded; case Stmt::DeclStmtClass: { const DeclStmt *DS = cast(S); for (const auto *DclIt : DS->decls()) { // Each declaration initialization is its own full-expression. // FIXME: This isn't quite right; if we're performing aggregate // initialization, each braced subexpression is its own full-expression. FullExpressionRAII Scope(Info); if (!EvaluateDecl(Info, DclIt) && !Info.noteFailure()) return ESR_Failed; } return ESR_Succeeded; } case Stmt::ReturnStmtClass: { const Expr *RetExpr = cast(S)->getRetValue(); FullExpressionRAII Scope(Info); if (RetExpr && !(Result.Slot ? EvaluateInPlace(Result.Value, Info, *Result.Slot, RetExpr) : Evaluate(Result.Value, Info, RetExpr))) return ESR_Failed; return ESR_Returned; } case Stmt::CompoundStmtClass: { BlockScopeRAII Scope(Info); const CompoundStmt *CS = cast(S); for (const auto *BI : CS->body()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, BI, Case); if (ESR == ESR_Succeeded) Case = nullptr; else if (ESR != ESR_CaseNotFound) return ESR; } return Case ? ESR_CaseNotFound : ESR_Succeeded; } case Stmt::IfStmtClass: { const IfStmt *IS = cast(S); // Evaluate the condition, as either a var decl or as an expression. BlockScopeRAII Scope(Info); if (const Stmt *Init = IS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init); if (ESR != ESR_Succeeded) return ESR; } bool Cond; if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(), Cond)) return ESR_Failed; if (const Stmt *SubStmt = Cond ? IS->getThen() : IS->getElse()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, SubStmt); if (ESR != ESR_Succeeded) return ESR; } return ESR_Succeeded; } case Stmt::WhileStmtClass: { const WhileStmt *WS = cast(S); while (true) { BlockScopeRAII Scope(Info); bool Continue; if (!EvaluateCond(Info, WS->getConditionVariable(), WS->getCond(), Continue)) return ESR_Failed; if (!Continue) break; EvalStmtResult ESR = EvaluateLoopBody(Result, Info, WS->getBody()); if (ESR != ESR_Continue) return ESR; } return ESR_Succeeded; } case Stmt::DoStmtClass: { const DoStmt *DS = cast(S); bool Continue; do { EvalStmtResult ESR = EvaluateLoopBody(Result, Info, DS->getBody(), Case); if (ESR != ESR_Continue) return ESR; Case = nullptr; FullExpressionRAII CondScope(Info); if (!EvaluateAsBooleanCondition(DS->getCond(), Continue, Info)) return ESR_Failed; } while (Continue); return ESR_Succeeded; } case Stmt::ForStmtClass: { const ForStmt *FS = cast(S); BlockScopeRAII Scope(Info); if (FS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getInit()); if (ESR != ESR_Succeeded) return ESR; } while (true) { BlockScopeRAII Scope(Info); bool Continue = true; if (FS->getCond() && !EvaluateCond(Info, FS->getConditionVariable(), FS->getCond(), Continue)) return ESR_Failed; if (!Continue) break; EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody()); if (ESR != ESR_Continue) return ESR; if (FS->getInc()) { FullExpressionRAII IncScope(Info); if (!EvaluateIgnoredValue(Info, FS->getInc())) return ESR_Failed; } } return ESR_Succeeded; } case Stmt::CXXForRangeStmtClass: { const CXXForRangeStmt *FS = cast(S); BlockScopeRAII Scope(Info); // Initialize the __range variable. EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getRangeStmt()); if (ESR != ESR_Succeeded) return ESR; // Create the __begin and __end iterators. ESR = EvaluateStmt(Result, Info, FS->getBeginStmt()); if (ESR != ESR_Succeeded) return ESR; ESR = EvaluateStmt(Result, Info, FS->getEndStmt()); if (ESR != ESR_Succeeded) return ESR; while (true) { // Condition: __begin != __end. { bool Continue = true; FullExpressionRAII CondExpr(Info); if (!EvaluateAsBooleanCondition(FS->getCond(), Continue, Info)) return ESR_Failed; if (!Continue) break; } // User's variable declaration, initialized by *__begin. BlockScopeRAII InnerScope(Info); ESR = EvaluateStmt(Result, Info, FS->getLoopVarStmt()); if (ESR != ESR_Succeeded) return ESR; // Loop body. ESR = EvaluateLoopBody(Result, Info, FS->getBody()); if (ESR != ESR_Continue) return ESR; // Increment: ++__begin if (!EvaluateIgnoredValue(Info, FS->getInc())) return ESR_Failed; } return ESR_Succeeded; } case Stmt::SwitchStmtClass: return EvaluateSwitch(Result, Info, cast(S)); case Stmt::ContinueStmtClass: return ESR_Continue; case Stmt::BreakStmtClass: return ESR_Break; case Stmt::LabelStmtClass: return EvaluateStmt(Result, Info, cast(S)->getSubStmt(), Case); case Stmt::AttributedStmtClass: // As a general principle, C++11 attributes can be ignored without // any semantic impact. return EvaluateStmt(Result, Info, cast(S)->getSubStmt(), Case); case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: return EvaluateStmt(Result, Info, cast(S)->getSubStmt(), Case); } } /// CheckTrivialDefaultConstructor - Check whether a constructor is a trivial /// default constructor. If so, we'll fold it whether or not it's marked as /// constexpr. If it is marked as constexpr, we will never implicitly define it, /// so we need special handling. static bool CheckTrivialDefaultConstructor(EvalInfo &Info, SourceLocation Loc, const CXXConstructorDecl *CD, bool IsValueInitialization) { if (!CD->isTrivial() || !CD->isDefaultConstructor()) return false; // Value-initialization does not call a trivial default constructor, so such a // call is a core constant expression whether or not the constructor is // constexpr. if (!CD->isConstexpr() && !IsValueInitialization) { if (Info.getLangOpts().CPlusPlus11) { // FIXME: If DiagDecl is an implicitly-declared special member function, // we should be much more explicit about why it's not constexpr. Info.CCEDiag(Loc, diag::note_constexpr_invalid_function, 1) << /*IsConstexpr*/0 << /*IsConstructor*/1 << CD; Info.Note(CD->getLocation(), diag::note_declared_at); } else { Info.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); } } return true; } /// CheckConstexprFunction - Check that a function can be called in a constant /// expression. static bool CheckConstexprFunction(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Declaration, const FunctionDecl *Definition, const Stmt *Body) { // Potential constant expressions can contain calls to declared, but not yet // defined, constexpr functions. if (Info.checkingPotentialConstantExpression() && !Definition && Declaration->isConstexpr()) return false; // Bail out with no diagnostic if the function declaration itself is invalid. // We will have produced a relevant diagnostic while parsing it. if (Declaration->isInvalidDecl()) return false; // Can we evaluate this function call? if (Definition && Definition->isConstexpr() && !Definition->isInvalidDecl() && Body) return true; if (Info.getLangOpts().CPlusPlus11) { const FunctionDecl *DiagDecl = Definition ? Definition : Declaration; // If this function is not constexpr because it is an inherited // non-constexpr constructor, diagnose that directly. auto *CD = dyn_cast(DiagDecl); if (CD && CD->isInheritingConstructor()) { auto *Inherited = CD->getInheritedConstructor().getConstructor(); if (!Inherited->isConstexpr()) DiagDecl = CD = Inherited; } // FIXME: If DiagDecl is an implicitly-declared special member function // or an inheriting constructor, we should be much more explicit about why // it's not constexpr. if (CD && CD->isInheritingConstructor()) Info.FFDiag(CallLoc, diag::note_constexpr_invalid_inhctor, 1) << CD->getInheritedConstructor().getConstructor()->getParent(); else Info.FFDiag(CallLoc, diag::note_constexpr_invalid_function, 1) << DiagDecl->isConstexpr() << (bool)CD << DiagDecl; Info.Note(DiagDecl->getLocation(), diag::note_declared_at); } else { Info.FFDiag(CallLoc, diag::note_invalid_subexpr_in_const_expr); } return false; } /// Determine if a class has any fields that might need to be copied by a /// trivial copy or move operation. static bool hasFields(const CXXRecordDecl *RD) { if (!RD || RD->isEmpty()) return false; for (auto *FD : RD->fields()) { if (FD->isUnnamedBitfield()) continue; return true; } for (auto &Base : RD->bases()) if (hasFields(Base.getType()->getAsCXXRecordDecl())) return true; return false; } namespace { typedef SmallVector ArgVector; } /// EvaluateArgs - Evaluate the arguments to a function call. static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, EvalInfo &Info) { bool Success = true; for (ArrayRef::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { if (!Evaluate(ArgValues[I - Args.begin()], Info, *I)) { // If we're checking for a potential constant expression, evaluate all // initializers even if some of them fail. if (!Info.noteFailure()) return false; Success = false; } } return Success; } /// Evaluate a function call. static bool HandleFunctionCall(SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, ArrayRef Args, const Stmt *Body, EvalInfo &Info, APValue &Result, const LValue *ResultSlot) { ArgVector ArgValues(Args.size()); if (!EvaluateArgs(Args, ArgValues, Info)) return false; if (!Info.CheckCallLimit(CallLoc)) return false; CallStackFrame Frame(Info, CallLoc, Callee, This, ArgValues.data()); // For a trivial copy or move assignment, perform an APValue copy. This is // essential for unions, where the operations performed by the assignment // operator cannot be represented as statements. // // Skip this for non-union classes with no fields; in that case, the defaulted // copy/move does not actually read the object. const CXXMethodDecl *MD = dyn_cast(Callee); if (MD && MD->isDefaulted() && (MD->getParent()->isUnion() || (MD->isTrivial() && hasFields(MD->getParent())))) { assert(This && (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator())); LValue RHS; RHS.setFrom(Info.Ctx, ArgValues[0]); APValue RHSValue; if (!handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(), RHS, RHSValue)) return false; if (!handleAssignment(Info, Args[0], *This, MD->getThisType(Info.Ctx), RHSValue)) return false; This->moveInto(Result); return true; } StmtResult Ret = {Result, ResultSlot}; EvalStmtResult ESR = EvaluateStmt(Ret, Info, Body); if (ESR == ESR_Succeeded) { if (Callee->getReturnType()->isVoidType()) return true; Info.FFDiag(Callee->getLocEnd(), diag::note_constexpr_no_return); } return ESR == ESR_Returned; } /// Evaluate a constructor call. static bool HandleConstructorCall(const Expr *E, const LValue &This, APValue *ArgValues, const CXXConstructorDecl *Definition, EvalInfo &Info, APValue &Result) { SourceLocation CallLoc = E->getExprLoc(); if (!Info.CheckCallLimit(CallLoc)) return false; const CXXRecordDecl *RD = Definition->getParent(); if (RD->getNumVBases()) { Info.FFDiag(CallLoc, diag::note_constexpr_virtual_base) << RD; return false; } CallStackFrame Frame(Info, CallLoc, Definition, &This, ArgValues); // FIXME: Creating an APValue just to hold a nonexistent return value is // wasteful. APValue RetVal; StmtResult Ret = {RetVal, nullptr}; // If it's a delegating constructor, delegate. if (Definition->isDelegatingConstructor()) { CXXConstructorDecl::init_const_iterator I = Definition->init_begin(); { FullExpressionRAII InitScope(Info); if (!EvaluateInPlace(Result, Info, This, (*I)->getInit())) return false; } return EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed; } // For a trivial copy or move constructor, perform an APValue copy. This is // essential for unions (or classes with anonymous union members), where the // operations performed by the constructor cannot be represented by // ctor-initializers. // // Skip this for empty non-union classes; we should not perform an // lvalue-to-rvalue conversion on them because their copy constructor does not // actually read them. if (Definition->isDefaulted() && Definition->isCopyOrMoveConstructor() && (Definition->getParent()->isUnion() || (Definition->isTrivial() && hasFields(Definition->getParent())))) { LValue RHS; RHS.setFrom(Info.Ctx, ArgValues[0]); return handleLValueToRValueConversion( Info, E, Definition->getParamDecl(0)->getType().getNonReferenceType(), RHS, Result); } // Reserve space for the struct members. if (!RD->isUnion() && Result.isUninit()) Result = APValue(APValue::UninitStruct(), RD->getNumBases(), std::distance(RD->field_begin(), RD->field_end())); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); // A scope for temporaries lifetime-extended by reference members. BlockScopeRAII LifetimeExtendedScope(Info); bool Success = true; unsigned BasesSeen = 0; #ifndef NDEBUG CXXRecordDecl::base_class_const_iterator BaseIt = RD->bases_begin(); #endif for (const auto *I : Definition->inits()) { LValue Subobject = This; APValue *Value = &Result; // Determine the subobject to initialize. FieldDecl *FD = nullptr; if (I->isBaseInitializer()) { QualType BaseType(I->getBaseClass(), 0); #ifndef NDEBUG // Non-virtual base classes are initialized in the order in the class // definition. We have already checked for virtual base classes. assert(!BaseIt->isVirtual() && "virtual base for literal type"); assert(Info.Ctx.hasSameType(BaseIt->getType(), BaseType) && "base class initializers not in expected order"); ++BaseIt; #endif if (!HandleLValueDirectBase(Info, I->getInit(), Subobject, RD, BaseType->getAsCXXRecordDecl(), &Layout)) return false; Value = &Result.getStructBase(BasesSeen++); } else if ((FD = I->getMember())) { if (!HandleLValueMember(Info, I->getInit(), Subobject, FD, &Layout)) return false; if (RD->isUnion()) { Result = APValue(FD); Value = &Result.getUnionValue(); } else { Value = &Result.getStructField(FD->getFieldIndex()); } } else if (IndirectFieldDecl *IFD = I->getIndirectMember()) { // Walk the indirect field decl's chain to find the object to initialize, // and make sure we've initialized every step along it. for (auto *C : IFD->chain()) { FD = cast(C); CXXRecordDecl *CD = cast(FD->getParent()); // Switch the union field if it differs. This happens if we had // preceding zero-initialization, and we're now initializing a union // subobject other than the first. // FIXME: In this case, the values of the other subobjects are // specified, since zero-initialization sets all padding bits to zero. if (Value->isUninit() || (Value->isUnion() && Value->getUnionField() != FD)) { if (CD->isUnion()) *Value = APValue(FD); else *Value = APValue(APValue::UninitStruct(), CD->getNumBases(), std::distance(CD->field_begin(), CD->field_end())); } if (!HandleLValueMember(Info, I->getInit(), Subobject, FD)) return false; if (CD->isUnion()) Value = &Value->getUnionValue(); else Value = &Value->getStructField(FD->getFieldIndex()); } } else { llvm_unreachable("unknown base initializer kind"); } FullExpressionRAII InitScope(Info); if (!EvaluateInPlace(*Value, Info, Subobject, I->getInit()) || (FD && FD->isBitField() && !truncateBitfieldValue(Info, I->getInit(), *Value, FD))) { // If we're checking for a potential constant expression, evaluate all // initializers even if some of them fail. if (!Info.noteFailure()) return false; Success = false; } } return Success && EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed; } static bool HandleConstructorCall(const Expr *E, const LValue &This, ArrayRef Args, const CXXConstructorDecl *Definition, EvalInfo &Info, APValue &Result) { ArgVector ArgValues(Args.size()); if (!EvaluateArgs(Args, ArgValues, Info)) return false; return HandleConstructorCall(E, This, ArgValues.data(), Definition, Info, Result); } //===----------------------------------------------------------------------===// // Generic Evaluation //===----------------------------------------------------------------------===// namespace { template class ExprEvaluatorBase : public ConstStmtVisitor { private: Derived &getDerived() { return static_cast(*this); } bool DerivedSuccess(const APValue &V, const Expr *E) { return getDerived().Success(V, E); } bool DerivedZeroInitialization(const Expr *E) { return getDerived().ZeroInitialization(E); } // Check whether a conditional operator with a non-constant condition is a // potential constant expression. If neither arm is a potential constant // expression, then the conditional operator is not either. template void CheckPotentialConstantConditional(const ConditionalOperator *E) { assert(Info.checkingPotentialConstantExpression()); // Speculatively evaluate both arms. SmallVector Diag; { SpeculativeEvaluationRAII Speculate(Info, &Diag); StmtVisitorTy::Visit(E->getFalseExpr()); if (Diag.empty()) return; } { SpeculativeEvaluationRAII Speculate(Info, &Diag); Diag.clear(); StmtVisitorTy::Visit(E->getTrueExpr()); if (Diag.empty()) return; } Error(E, diag::note_constexpr_conditional_never_const); } template bool HandleConditionalOperator(const ConditionalOperator *E) { bool BoolResult; if (!EvaluateAsBooleanCondition(E->getCond(), BoolResult, Info)) { if (Info.checkingPotentialConstantExpression() && Info.noteFailure()) CheckPotentialConstantConditional(E); return false; } Expr *EvalExpr = BoolResult ? E->getTrueExpr() : E->getFalseExpr(); return StmtVisitorTy::Visit(EvalExpr); } protected: EvalInfo &Info; typedef ConstStmtVisitor StmtVisitorTy; typedef ExprEvaluatorBase ExprEvaluatorBaseTy; OptionalDiagnostic CCEDiag(const Expr *E, diag::kind D) { return Info.CCEDiag(E, D); } bool ZeroInitialization(const Expr *E) { return Error(E); } public: ExprEvaluatorBase(EvalInfo &Info) : Info(Info) {} EvalInfo &getEvalInfo() { return Info; } /// Report an evaluation error. This should only be called when an error is /// first discovered. When propagating an error, just return false. bool Error(const Expr *E, diag::kind D) { Info.FFDiag(E, D); return false; } bool Error(const Expr *E) { return Error(E, diag::note_invalid_subexpr_in_const_expr); } bool VisitStmt(const Stmt *) { llvm_unreachable("Expression evaluator should not be called on stmts"); } bool VisitExpr(const Expr *E) { return Error(E); } bool VisitParenExpr(const ParenExpr *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitUnaryExtension(const UnaryOperator *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitUnaryPlus(const UnaryOperator *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitChooseExpr(const ChooseExpr *E) { return StmtVisitorTy::Visit(E->getChosenSubExpr()); } bool VisitGenericSelectionExpr(const GenericSelectionExpr *E) { return StmtVisitorTy::Visit(E->getResultExpr()); } bool VisitSubstNonTypeTemplateParmExpr(const SubstNonTypeTemplateParmExpr *E) { return StmtVisitorTy::Visit(E->getReplacement()); } bool VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *E) { return StmtVisitorTy::Visit(E->getExpr()); } bool VisitCXXDefaultInitExpr(const CXXDefaultInitExpr *E) { // The initializer may not have been parsed yet, or might be erroneous. if (!E->getExpr()) return Error(E); return StmtVisitorTy::Visit(E->getExpr()); } // We cannot create any objects for which cleanups are required, so there is // nothing to do here; all cleanups must come from unevaluated subexpressions. bool VisitExprWithCleanups(const ExprWithCleanups *E) { return StmtVisitorTy::Visit(E->getSubExpr()); } bool VisitCXXReinterpretCastExpr(const CXXReinterpretCastExpr *E) { CCEDiag(E, diag::note_constexpr_invalid_cast) << 0; return static_cast(this)->VisitCastExpr(E); } bool VisitCXXDynamicCastExpr(const CXXDynamicCastExpr *E) { CCEDiag(E, diag::note_constexpr_invalid_cast) << 1; return static_cast(this)->VisitCastExpr(E); } bool VisitBinaryOperator(const BinaryOperator *E) { switch (E->getOpcode()) { default: return Error(E); case BO_Comma: VisitIgnoredValue(E->getLHS()); return StmtVisitorTy::Visit(E->getRHS()); case BO_PtrMemD: case BO_PtrMemI: { LValue Obj; if (!HandleMemberPointerAccess(Info, E, Obj)) return false; APValue Result; if (!handleLValueToRValueConversion(Info, E, E->getType(), Obj, Result)) return false; return DerivedSuccess(Result, E); } } } bool VisitBinaryConditionalOperator(const BinaryConditionalOperator *E) { // Evaluate and cache the common expression. We treat it as a temporary, // even though it's not quite the same thing. if (!Evaluate(Info.CurrentCall->createTemporary(E->getOpaqueValue(), false), Info, E->getCommon())) return false; return HandleConditionalOperator(E); } bool VisitConditionalOperator(const ConditionalOperator *E) { bool IsBcpCall = false; // If the condition (ignoring parens) is a __builtin_constant_p call, // the result is a constant expression if it can be folded without // side-effects. This is an important GNU extension. See GCC PR38377 // for discussion. if (const CallExpr *CallCE = dyn_cast(E->getCond()->IgnoreParenCasts())) if (CallCE->getBuiltinCallee() == Builtin::BI__builtin_constant_p) IsBcpCall = true; // Always assume __builtin_constant_p(...) ? ... : ... is a potential // constant expression; we can't check whether it's potentially foldable. if (Info.checkingPotentialConstantExpression() && IsBcpCall) return false; FoldConstant Fold(Info, IsBcpCall); if (!HandleConditionalOperator(E)) { Fold.keepDiagnostics(); return false; } return true; } bool VisitOpaqueValueExpr(const OpaqueValueExpr *E) { if (APValue *Value = Info.CurrentCall->getTemporary(E)) return DerivedSuccess(*Value, E); const Expr *Source = E->getSourceExpr(); if (!Source) return Error(E); if (Source == E) { // sanity checking. assert(0 && "OpaqueValueExpr recursively refers to itself"); return Error(E); } return StmtVisitorTy::Visit(Source); } bool VisitCallExpr(const CallExpr *E) { APValue Result; if (!handleCallExpr(E, Result, nullptr)) return false; return DerivedSuccess(Result, E); } bool handleCallExpr(const CallExpr *E, APValue &Result, const LValue *ResultSlot) { const Expr *Callee = E->getCallee()->IgnoreParens(); QualType CalleeType = Callee->getType(); const FunctionDecl *FD = nullptr; LValue *This = nullptr, ThisVal; auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); bool HasQualifier = false; // Extract function decl and 'this' pointer from the callee. if (CalleeType->isSpecificBuiltinType(BuiltinType::BoundMember)) { const ValueDecl *Member = nullptr; if (const MemberExpr *ME = dyn_cast(Callee)) { // Explicit bound member calls, such as x.f() or p->g(); if (!EvaluateObjectArgument(Info, ME->getBase(), ThisVal)) return false; Member = ME->getMemberDecl(); This = &ThisVal; HasQualifier = ME->hasQualifier(); } else if (const BinaryOperator *BE = dyn_cast(Callee)) { // Indirect bound member calls ('.*' or '->*'). Member = HandleMemberPointerAccess(Info, BE, ThisVal, false); if (!Member) return false; This = &ThisVal; } else return Error(Callee); FD = dyn_cast(Member); if (!FD) return Error(Callee); } else if (CalleeType->isFunctionPointerType()) { LValue Call; if (!EvaluatePointer(Callee, Call, Info)) return false; if (!Call.getLValueOffset().isZero()) return Error(Callee); FD = dyn_cast_or_null( Call.getLValueBase().dyn_cast()); if (!FD) return Error(Callee); // Don't call function pointers which have been cast to some other type. // Per DR (no number yet), the caller and callee can differ in noexcept. if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec( CalleeType->getPointeeType(), FD->getType())) { return Error(E); } // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. const CXXMethodDecl *MD = dyn_cast(FD); if (MD && !MD->isStatic()) { // FIXME: When selecting an implicit conversion for an overloaded // operator delete, we sometimes try to evaluate calls to conversion // operators without a 'this' parameter! if (Args.empty()) return Error(E); if (!EvaluateObjectArgument(Info, Args[0], ThisVal)) return false; This = &ThisVal; Args = Args.slice(1); } else if (MD && MD->isLambdaStaticInvoker()) { // Map the static invoker for the lambda back to the call operator. // Conveniently, we don't have to slice out the 'this' argument (as is // being done for the non-static case), since a static member function // doesn't have an implicit argument passed in. const CXXRecordDecl *ClosureClass = MD->getParent(); assert( ClosureClass->captures_begin() == ClosureClass->captures_end() && "Number of captures must be zero for conversion to function-ptr"); const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator(); // Set 'FD', the function that will be called below, to the call // operator. If the closure object represents a generic lambda, find // the corresponding specialization of the call operator. if (ClosureClass->isGenericLambda()) { assert(MD->isFunctionTemplateSpecialization() && "A generic lambda's static-invoker function must be a " "template specialization"); const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); FunctionTemplateDecl *CallOpTemplate = LambdaCallOp->getDescribedFunctionTemplate(); void *InsertPos = nullptr; FunctionDecl *CorrespondingCallOpSpecialization = CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos); assert(CorrespondingCallOpSpecialization && "We must always have a function call operator specialization " "that corresponds to our static invoker specialization"); FD = cast(CorrespondingCallOpSpecialization); } else FD = LambdaCallOp; } } else return Error(E); if (This && !This->checkSubobject(Info, E, CSK_This)) return false; // DR1358 allows virtual constexpr functions in some cases. Don't allow // calls to such functions in constant expressions. if (This && !HasQualifier && isa(FD) && cast(FD)->isVirtual()) return Error(E, diag::note_constexpr_virtual_call); const FunctionDecl *Definition = nullptr; Stmt *Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body) || !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info, Result, ResultSlot)) return false; return true; } bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { return StmtVisitorTy::Visit(E->getInitializer()); } bool VisitInitListExpr(const InitListExpr *E) { if (E->getNumInits() == 0) return DerivedZeroInitialization(E); if (E->getNumInits() == 1) return StmtVisitorTy::Visit(E->getInit(0)); return Error(E); } bool VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) { return DerivedZeroInitialization(E); } bool VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E) { return DerivedZeroInitialization(E); } bool VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *E) { return DerivedZeroInitialization(E); } /// A member expression where the object is a prvalue is itself a prvalue. bool VisitMemberExpr(const MemberExpr *E) { assert(!E->isArrow() && "missing call to bound member function?"); APValue Val; if (!Evaluate(Val, Info, E->getBase())) return false; QualType BaseTy = E->getBase()->getType(); const FieldDecl *FD = dyn_cast(E->getMemberDecl()); if (!FD) return Error(E); assert(!FD->getType()->isReferenceType() && "prvalue reference?"); assert(BaseTy->castAs()->getDecl()->getCanonicalDecl() == FD->getParent()->getCanonicalDecl() && "record / field mismatch"); CompleteObject Obj(&Val, BaseTy); SubobjectDesignator Designator(BaseTy); Designator.addDeclUnchecked(FD); APValue Result; return extractSubobject(Info, E, Obj, Designator, Result) && DerivedSuccess(Result, E); } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: break; case CK_AtomicToNonAtomic: { APValue AtomicVal; if (!EvaluateAtomic(E->getSubExpr(), AtomicVal, Info)) return false; return DerivedSuccess(AtomicVal, E); } case CK_NoOp: case CK_UserDefinedConversion: return StmtVisitorTy::Visit(E->getSubExpr()); case CK_LValueToRValue: { LValue LVal; if (!EvaluateLValue(E->getSubExpr(), LVal, Info)) return false; APValue RVal; // Note, we use the subexpression's type in order to retain cv-qualifiers. if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(), LVal, RVal)) return false; return DerivedSuccess(RVal, E); } } return Error(E); } bool VisitUnaryPostInc(const UnaryOperator *UO) { return VisitUnaryPostIncDec(UO); } bool VisitUnaryPostDec(const UnaryOperator *UO) { return VisitUnaryPostIncDec(UO); } bool VisitUnaryPostIncDec(const UnaryOperator *UO) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(UO); LValue LVal; if (!EvaluateLValue(UO->getSubExpr(), LVal, Info)) return false; APValue RVal; if (!handleIncDec(this->Info, UO, LVal, UO->getSubExpr()->getType(), UO->isIncrementOp(), &RVal)) return false; return DerivedSuccess(RVal, UO); } bool VisitStmtExpr(const StmtExpr *E) { // We will have checked the full-expressions inside the statement expression // when they were completed, and don't need to check them again now. if (Info.checkingForOverflow()) return Error(E); BlockScopeRAII Scope(Info); const CompoundStmt *CS = E->getSubStmt(); if (CS->body_empty()) return true; for (CompoundStmt::const_body_iterator BI = CS->body_begin(), BE = CS->body_end(); /**/; ++BI) { if (BI + 1 == BE) { const Expr *FinalExpr = dyn_cast(*BI); if (!FinalExpr) { Info.FFDiag((*BI)->getLocStart(), diag::note_constexpr_stmt_expr_unsupported); return false; } return this->Visit(FinalExpr); } APValue ReturnValue; StmtResult Result = { ReturnValue, nullptr }; EvalStmtResult ESR = EvaluateStmt(Result, Info, *BI); if (ESR != ESR_Succeeded) { // FIXME: If the statement-expression terminated due to 'return', // 'break', or 'continue', it would be nice to propagate that to // the outer statement evaluation rather than bailing out. if (ESR != ESR_Failed) Info.FFDiag((*BI)->getLocStart(), diag::note_constexpr_stmt_expr_unsupported); return false; } } llvm_unreachable("Return from function from the loop above."); } /// Visit a value which is evaluated, but whose value is ignored. void VisitIgnoredValue(const Expr *E) { EvaluateIgnoredValue(Info, E); } /// Potentially visit a MemberExpr's base expression. void VisitIgnoredBaseExpression(const Expr *E) { // While MSVC doesn't evaluate the base expression, it does diagnose the // presence of side-effecting behavior. if (Info.getLangOpts().MSVCCompat && !E->HasSideEffects(Info.Ctx)) return; VisitIgnoredValue(E); } }; } //===----------------------------------------------------------------------===// // Common base class for lvalue and temporary evaluation. //===----------------------------------------------------------------------===// namespace { template class LValueExprEvaluatorBase : public ExprEvaluatorBase { protected: LValue &Result; + bool InvalidBaseOK; typedef LValueExprEvaluatorBase LValueExprEvaluatorBaseTy; typedef ExprEvaluatorBase ExprEvaluatorBaseTy; bool Success(APValue::LValueBase B) { Result.set(B); return true; } + bool evaluatePointer(const Expr *E, LValue &Result) { + return EvaluatePointer(E, Result, this->Info, InvalidBaseOK); + } + public: - LValueExprEvaluatorBase(EvalInfo &Info, LValue &Result) : - ExprEvaluatorBaseTy(Info), Result(Result) {} + LValueExprEvaluatorBase(EvalInfo &Info, LValue &Result, bool InvalidBaseOK) + : ExprEvaluatorBaseTy(Info), Result(Result), + InvalidBaseOK(InvalidBaseOK) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(this->Info.Ctx, V); return true; } bool VisitMemberExpr(const MemberExpr *E) { // Handle non-static data members. QualType BaseTy; bool EvalOK; if (E->isArrow()) { - EvalOK = EvaluatePointer(E->getBase(), Result, this->Info); + EvalOK = evaluatePointer(E->getBase(), Result); BaseTy = E->getBase()->getType()->castAs()->getPointeeType(); } else if (E->getBase()->isRValue()) { assert(E->getBase()->getType()->isRecordType()); EvalOK = EvaluateTemporary(E->getBase(), Result, this->Info); BaseTy = E->getBase()->getType(); } else { EvalOK = this->Visit(E->getBase()); BaseTy = E->getBase()->getType(); } if (!EvalOK) { - if (!this->Info.allowInvalidBaseExpr()) + if (!InvalidBaseOK) return false; Result.setInvalid(E); return true; } const ValueDecl *MD = E->getMemberDecl(); if (const FieldDecl *FD = dyn_cast(E->getMemberDecl())) { assert(BaseTy->getAs()->getDecl()->getCanonicalDecl() == FD->getParent()->getCanonicalDecl() && "record / field mismatch"); (void)BaseTy; if (!HandleLValueMember(this->Info, E, Result, FD)) return false; } else if (const IndirectFieldDecl *IFD = dyn_cast(MD)) { if (!HandleLValueIndirectMember(this->Info, E, Result, IFD)) return false; } else return this->Error(E); if (MD->getType()->isReferenceType()) { APValue RefValue; if (!handleLValueToRValueConversion(this->Info, E, MD->getType(), Result, RefValue)) return false; return Success(RefValue, E); } return true; } bool VisitBinaryOperator(const BinaryOperator *E) { switch (E->getOpcode()) { default: return ExprEvaluatorBaseTy::VisitBinaryOperator(E); case BO_PtrMemD: case BO_PtrMemI: return HandleMemberPointerAccess(this->Info, E, Result); } } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_DerivedToBase: case CK_UncheckedDerivedToBase: if (!this->Visit(E->getSubExpr())) return false; // Now figure out the necessary offset to add to the base LV to get from // the derived class to the base class. return HandleLValueBasePath(this->Info, E, E->getSubExpr()->getType(), Result); } } }; } //===----------------------------------------------------------------------===// // LValue Evaluation // // This is used for evaluating lvalues (in C and C++), xvalues (in C++11), // function designators (in C), decl references to void objects (in C), and // temporaries (if building with -Wno-address-of-temporary). // // LValue evaluation produces values comprising a base expression of one of the // following types: // - Declarations // * VarDecl // * FunctionDecl // - Literals // * CompoundLiteralExpr in C (and in global scope in C++) // * StringLiteral // * CXXTypeidExpr // * PredefinedExpr // * ObjCStringLiteralExpr // * ObjCEncodeExpr // * AddrLabelExpr // * BlockExpr // * CallExpr for a MakeStringConstant builtin // - Locals and temporaries // * MaterializeTemporaryExpr // * Any Expr, with a CallIndex indicating the function in which the temporary // was evaluated, for cases where the MaterializeTemporaryExpr is missing // from the AST (FIXME). // * A MaterializeTemporaryExpr that has static storage duration, with no // CallIndex, for a lifetime-extended temporary. // plus an offset in bytes. //===----------------------------------------------------------------------===// namespace { class LValueExprEvaluator : public LValueExprEvaluatorBase { public: - LValueExprEvaluator(EvalInfo &Info, LValue &Result) : - LValueExprEvaluatorBaseTy(Info, Result) {} + LValueExprEvaluator(EvalInfo &Info, LValue &Result, bool InvalidBaseOK) : + LValueExprEvaluatorBaseTy(Info, Result, InvalidBaseOK) {} bool VisitVarDecl(const Expr *E, const VarDecl *VD); bool VisitUnaryPreIncDec(const UnaryOperator *UO); bool VisitDeclRefExpr(const DeclRefExpr *E); bool VisitPredefinedExpr(const PredefinedExpr *E) { return Success(E); } bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); bool VisitMemberExpr(const MemberExpr *E); bool VisitStringLiteral(const StringLiteral *E) { return Success(E); } bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); } bool VisitCXXTypeidExpr(const CXXTypeidExpr *E); bool VisitCXXUuidofExpr(const CXXUuidofExpr *E); bool VisitArraySubscriptExpr(const ArraySubscriptExpr *E); bool VisitUnaryDeref(const UnaryOperator *E); bool VisitUnaryReal(const UnaryOperator *E); bool VisitUnaryImag(const UnaryOperator *E); bool VisitUnaryPreInc(const UnaryOperator *UO) { return VisitUnaryPreIncDec(UO); } bool VisitUnaryPreDec(const UnaryOperator *UO) { return VisitUnaryPreIncDec(UO); } bool VisitBinAssign(const BinaryOperator *BO); bool VisitCompoundAssignOperator(const CompoundAssignOperator *CAO); bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return LValueExprEvaluatorBaseTy::VisitCastExpr(E); case CK_LValueBitCast: this->CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; if (!Visit(E->getSubExpr())) return false; Result.Designator.setInvalid(); return true; case CK_BaseToDerived: if (!Visit(E->getSubExpr())) return false; return HandleBaseToDerivedCast(Info, E, Result); } } }; } // end anonymous namespace /// Evaluate an expression as an lvalue. This can be legitimately called on /// expressions which are not glvalues, in three cases: /// * function designators in C, and /// * "extern void" objects /// * @selector() expressions in Objective-C -static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info) { +static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info, + bool InvalidBaseOK) { assert(E->isGLValue() || E->getType()->isFunctionType() || E->getType()->isVoidType() || isa(E)); - return LValueExprEvaluator(Info, Result).Visit(E); + return LValueExprEvaluator(Info, Result, InvalidBaseOK).Visit(E); } bool LValueExprEvaluator::VisitDeclRefExpr(const DeclRefExpr *E) { if (const FunctionDecl *FD = dyn_cast(E->getDecl())) return Success(FD); if (const VarDecl *VD = dyn_cast(E->getDecl())) return VisitVarDecl(E, VD); if (const BindingDecl *BD = dyn_cast(E->getDecl())) return Visit(BD->getBinding()); return Error(E); } bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) { CallStackFrame *Frame = nullptr; if (VD->hasLocalStorage() && Info.CurrentCall->Index > 1) { // Only if a local variable was declared in the function currently being // evaluated, do we expect to be able to find its value in the current // frame. (Otherwise it was likely declared in an enclosing context and // could either have a valid evaluatable value (for e.g. a constexpr // variable) or be ill-formed (and trigger an appropriate evaluation // diagnostic)). if (Info.CurrentCall->Callee && Info.CurrentCall->Callee->Equals(VD->getDeclContext())) { Frame = Info.CurrentCall; } } if (!VD->getType()->isReferenceType()) { if (Frame) { Result.set(VD, Frame->Index); return true; } return Success(VD); } APValue *V; if (!evaluateVarDeclInit(Info, E, VD, Frame, V)) return false; if (V->isUninit()) { if (!Info.checkingPotentialConstantExpression()) Info.FFDiag(E, diag::note_constexpr_use_uninit_reference); return false; } return Success(*V, E); } bool LValueExprEvaluator::VisitMaterializeTemporaryExpr( const MaterializeTemporaryExpr *E) { // Walk through the expression to find the materialized temporary itself. SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Inner = E->GetTemporaryExpr()-> skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); // If we passed any comma operators, evaluate their LHSs. for (unsigned I = 0, N = CommaLHSs.size(); I != N; ++I) if (!EvaluateIgnoredValue(Info, CommaLHSs[I])) return false; // A materialized temporary with static storage duration can appear within the // result of a constant expression evaluation, so we need to preserve its // value for use outside this evaluation. APValue *Value; if (E->getStorageDuration() == SD_Static) { Value = Info.Ctx.getMaterializedTemporaryValue(E, true); *Value = APValue(); Result.set(E); } else { Value = &Info.CurrentCall-> createTemporary(E, E->getStorageDuration() == SD_Automatic); Result.set(E, Info.CurrentCall->Index); } QualType Type = Inner->getType(); // Materialize the temporary itself. if (!EvaluateInPlace(*Value, Info, Result, Inner) || (E->getStorageDuration() == SD_Static && !CheckConstantExpression(Info, E->getExprLoc(), Type, *Value))) { *Value = APValue(); return false; } // Adjust our lvalue to refer to the desired subobject. for (unsigned I = Adjustments.size(); I != 0; /**/) { --I; switch (Adjustments[I].Kind) { case SubobjectAdjustment::DerivedToBaseAdjustment: if (!HandleLValueBasePath(Info, Adjustments[I].DerivedToBase.BasePath, Type, Result)) return false; Type = Adjustments[I].DerivedToBase.BasePath->getType(); break; case SubobjectAdjustment::FieldAdjustment: if (!HandleLValueMember(Info, E, Result, Adjustments[I].Field)) return false; Type = Adjustments[I].Field->getType(); break; case SubobjectAdjustment::MemberPointerAdjustment: if (!HandleMemberPointerAccess(this->Info, Type, Result, Adjustments[I].Ptr.RHS)) return false; Type = Adjustments[I].Ptr.MPT->getPointeeType(); break; } } return true; } bool LValueExprEvaluator::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { assert((!Info.getLangOpts().CPlusPlus || E->isFileScope()) && "lvalue compound literal in c++?"); // Defer visiting the literal until the lvalue-to-rvalue conversion. We can // only see this when folding in C, so there's no standard to follow here. return Success(E); } bool LValueExprEvaluator::VisitCXXTypeidExpr(const CXXTypeidExpr *E) { if (!E->isPotentiallyEvaluated()) return Success(E); Info.FFDiag(E, diag::note_constexpr_typeid_polymorphic) << E->getExprOperand()->getType() << E->getExprOperand()->getSourceRange(); return false; } bool LValueExprEvaluator::VisitCXXUuidofExpr(const CXXUuidofExpr *E) { return Success(E); } bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) { // Handle static data members. if (const VarDecl *VD = dyn_cast(E->getMemberDecl())) { VisitIgnoredBaseExpression(E->getBase()); return VisitVarDecl(E, VD); } // Handle static member functions. if (const CXXMethodDecl *MD = dyn_cast(E->getMemberDecl())) { if (MD->isStatic()) { VisitIgnoredBaseExpression(E->getBase()); return Success(MD); } } // Handle non-static data members. return LValueExprEvaluatorBaseTy::VisitMemberExpr(E); } bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { // FIXME: Deal with vectors as array subscript bases. if (E->getBase()->getType()->isVectorType()) return Error(E); - if (!EvaluatePointer(E->getBase(), Result, Info)) + if (!evaluatePointer(E->getBase(), Result)) return false; APSInt Index; if (!EvaluateInteger(E->getIdx(), Index, Info)) return false; return HandleLValueArrayAdjustment(Info, E, Result, E->getType(), getExtValue(Index)); } bool LValueExprEvaluator::VisitUnaryDeref(const UnaryOperator *E) { - return EvaluatePointer(E->getSubExpr(), Result, Info); + return evaluatePointer(E->getSubExpr(), Result); } bool LValueExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { if (!Visit(E->getSubExpr())) return false; // __real is a no-op on scalar lvalues. if (E->getSubExpr()->getType()->isAnyComplexType()) HandleLValueComplexElement(Info, E, Result, E->getType(), false); return true; } bool LValueExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { assert(E->getSubExpr()->getType()->isAnyComplexType() && "lvalue __imag__ on scalar?"); if (!Visit(E->getSubExpr())) return false; HandleLValueComplexElement(Info, E, Result, E->getType(), true); return true; } bool LValueExprEvaluator::VisitUnaryPreIncDec(const UnaryOperator *UO) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(UO); if (!this->Visit(UO->getSubExpr())) return false; return handleIncDec( this->Info, UO, Result, UO->getSubExpr()->getType(), UO->isIncrementOp(), nullptr); } bool LValueExprEvaluator::VisitCompoundAssignOperator( const CompoundAssignOperator *CAO) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(CAO); APValue RHS; // The overall lvalue result is the result of evaluating the LHS. if (!this->Visit(CAO->getLHS())) { if (Info.noteFailure()) Evaluate(RHS, this->Info, CAO->getRHS()); return false; } if (!Evaluate(RHS, this->Info, CAO->getRHS())) return false; return handleCompoundAssignment( this->Info, CAO, Result, CAO->getLHS()->getType(), CAO->getComputationLHSType(), CAO->getOpForCompoundAssignment(CAO->getOpcode()), RHS); } bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { if (!Info.getLangOpts().CPlusPlus14 && !Info.keepEvaluatingAfterFailure()) return Error(E); APValue NewVal; if (!this->Visit(E->getLHS())) { if (Info.noteFailure()) Evaluate(NewVal, this->Info, E->getRHS()); return false; } if (!Evaluate(NewVal, this->Info, E->getRHS())) return false; return handleAssignment(this->Info, E, Result, E->getLHS()->getType(), NewVal); } //===----------------------------------------------------------------------===// // Pointer Evaluation //===----------------------------------------------------------------------===// /// \brief Attempts to compute the number of bytes available at the pointer /// returned by a function with the alloc_size attribute. Returns true if we /// were successful. Places an unsigned number into `Result`. /// /// This expects the given CallExpr to be a call to a function with an /// alloc_size attribute. static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx, const CallExpr *Call, llvm::APInt &Result) { const AllocSizeAttr *AllocSize = getAllocSizeAttr(Call); // alloc_size args are 1-indexed, 0 means not present. assert(AllocSize && AllocSize->getElemSizeParam() != 0); unsigned SizeArgNo = AllocSize->getElemSizeParam() - 1; unsigned BitsInSizeT = Ctx.getTypeSize(Ctx.getSizeType()); if (Call->getNumArgs() <= SizeArgNo) return false; auto EvaluateAsSizeT = [&](const Expr *E, APSInt &Into) { if (!E->EvaluateAsInt(Into, Ctx, Expr::SE_AllowSideEffects)) return false; if (Into.isNegative() || !Into.isIntN(BitsInSizeT)) return false; Into = Into.zextOrSelf(BitsInSizeT); return true; }; APSInt SizeOfElem; if (!EvaluateAsSizeT(Call->getArg(SizeArgNo), SizeOfElem)) return false; if (!AllocSize->getNumElemsParam()) { Result = std::move(SizeOfElem); return true; } APSInt NumberOfElems; // Argument numbers start at 1 unsigned NumArgNo = AllocSize->getNumElemsParam() - 1; if (!EvaluateAsSizeT(Call->getArg(NumArgNo), NumberOfElems)) return false; bool Overflow; llvm::APInt BytesAvailable = SizeOfElem.umul_ov(NumberOfElems, Overflow); if (Overflow) return false; Result = std::move(BytesAvailable); return true; } /// \brief Convenience function. LVal's base must be a call to an alloc_size /// function. static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx, const LValue &LVal, llvm::APInt &Result) { assert(isBaseAnAllocSizeCall(LVal.getLValueBase()) && "Can't get the size of a non alloc_size function"); const auto *Base = LVal.getLValueBase().get(); const CallExpr *CE = tryUnwrapAllocSizeCall(Base); return getBytesReturnedByAllocSizeCall(Ctx, CE, Result); } /// \brief Attempts to evaluate the given LValueBase as the result of a call to /// a function with the alloc_size attribute. If it was possible to do so, this /// function will return true, make Result's Base point to said function call, /// and mark Result's Base as invalid. static bool evaluateLValueAsAllocSize(EvalInfo &Info, APValue::LValueBase Base, LValue &Result) { - if (!Info.allowInvalidBaseExpr() || Base.isNull()) + if (Base.isNull()) return false; // Because we do no form of static analysis, we only support const variables. // // Additionally, we can't support parameters, nor can we support static // variables (in the latter case, use-before-assign isn't UB; in the former, // we have no clue what they'll be assigned to). const auto *VD = dyn_cast_or_null(Base.dyn_cast()); if (!VD || !VD->isLocalVarDecl() || !VD->getType().isConstQualified()) return false; const Expr *Init = VD->getAnyInitializer(); if (!Init) return false; const Expr *E = Init->IgnoreParens(); if (!tryUnwrapAllocSizeCall(E)) return false; // Store E instead of E unwrapped so that the type of the LValue's base is // what the user wanted. Result.setInvalid(E); QualType Pointee = E->getType()->castAs()->getPointeeType(); Result.addUnsizedArray(Info, Pointee); return true; } namespace { class PointerExprEvaluator : public ExprEvaluatorBase { LValue &Result; + bool InvalidBaseOK; bool Success(const Expr *E) { Result.set(E); return true; } + bool evaluateLValue(const Expr *E, LValue &Result) { + return EvaluateLValue(E, Result, Info, InvalidBaseOK); + } + + bool evaluatePointer(const Expr *E, LValue &Result) { + return EvaluatePointer(E, Result, Info, InvalidBaseOK); + } + bool visitNonBuiltinCallExpr(const CallExpr *E); public: - PointerExprEvaluator(EvalInfo &info, LValue &Result) - : ExprEvaluatorBaseTy(info), Result(Result) {} + PointerExprEvaluator(EvalInfo &info, LValue &Result, bool InvalidBaseOK) + : ExprEvaluatorBaseTy(info), Result(Result), + InvalidBaseOK(InvalidBaseOK) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(Info.Ctx, V); return true; } bool ZeroInitialization(const Expr *E) { auto Offset = Info.Ctx.getTargetNullPointerValue(E->getType()); Result.set((Expr*)nullptr, 0, false, true, Offset); return true; } bool VisitBinaryOperator(const BinaryOperator *E); bool VisitCastExpr(const CastExpr* E); bool VisitUnaryAddrOf(const UnaryOperator *E); bool VisitObjCStringLiteral(const ObjCStringLiteral *E) { return Success(E); } bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { return Success(E); } bool VisitAddrLabelExpr(const AddrLabelExpr *E) { return Success(E); } bool VisitCallExpr(const CallExpr *E); bool VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp); bool VisitBlockExpr(const BlockExpr *E) { if (!E->getBlockDecl()->hasCaptures()) return Success(E); return Error(E); } bool VisitCXXThisExpr(const CXXThisExpr *E) { // Can't look at 'this' when checking a potential constant expression. if (Info.checkingPotentialConstantExpression()) return false; if (!Info.CurrentCall->This) { if (Info.getLangOpts().CPlusPlus11) Info.FFDiag(E, diag::note_constexpr_this) << E->isImplicit(); else Info.FFDiag(E); return false; } Result = *Info.CurrentCall->This; return true; } // FIXME: Missing: @protocol, @selector }; } // end anonymous namespace -static bool EvaluatePointer(const Expr* E, LValue& Result, EvalInfo &Info) { +static bool EvaluatePointer(const Expr* E, LValue& Result, EvalInfo &Info, + bool InvalidBaseOK) { assert(E->isRValue() && E->getType()->hasPointerRepresentation()); - return PointerExprEvaluator(Info, Result).Visit(E); + return PointerExprEvaluator(Info, Result, InvalidBaseOK).Visit(E); } bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->getOpcode() != BO_Add && E->getOpcode() != BO_Sub) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); const Expr *PExp = E->getLHS(); const Expr *IExp = E->getRHS(); if (IExp->getType()->isPointerType()) std::swap(PExp, IExp); - bool EvalPtrOK = EvaluatePointer(PExp, Result, Info); + bool EvalPtrOK = evaluatePointer(PExp, Result); if (!EvalPtrOK && !Info.noteFailure()) return false; llvm::APSInt Offset; if (!EvaluateInteger(IExp, Offset, Info) || !EvalPtrOK) return false; int64_t AdditionalOffset = getExtValue(Offset); if (E->getOpcode() == BO_Sub) AdditionalOffset = -AdditionalOffset; QualType Pointee = PExp->getType()->castAs()->getPointeeType(); return HandleLValueArrayAdjustment(Info, E, Result, Pointee, AdditionalOffset); } bool PointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) { - return EvaluateLValue(E->getSubExpr(), Result, Info); + return evaluateLValue(E->getSubExpr(), Result); } bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) { const Expr* SubExpr = E->getSubExpr(); switch (E->getCastKind()) { default: break; case CK_BitCast: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: case CK_AddressSpaceConversion: if (!Visit(SubExpr)) return false; // Bitcasts to cv void* are static_casts, not reinterpret_casts, so are // permitted in constant expressions in C++11. Bitcasts from cv void* are // also static_casts, but we disallow them as a resolution to DR1312. if (!E->getType()->isVoidPointerType()) { Result.Designator.setInvalid(); if (SubExpr->getType()->isVoidPointerType()) CCEDiag(E, diag::note_constexpr_invalid_cast) << 3 << SubExpr->getType(); else CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; } if (E->getCastKind() == CK_AddressSpaceConversion && Result.IsNullPtr) ZeroInitialization(E); return true; case CK_DerivedToBase: case CK_UncheckedDerivedToBase: - if (!EvaluatePointer(E->getSubExpr(), Result, Info)) + if (!evaluatePointer(E->getSubExpr(), Result)) return false; if (!Result.Base && Result.Offset.isZero()) return true; // Now figure out the necessary offset to add to the base LV to get from // the derived class to the base class. return HandleLValueBasePath(Info, E, E->getSubExpr()->getType()-> castAs()->getPointeeType(), Result); case CK_BaseToDerived: if (!Visit(E->getSubExpr())) return false; if (!Result.Base && Result.Offset.isZero()) return true; return HandleBaseToDerivedCast(Info, E, Result); case CK_NullToPointer: VisitIgnoredValue(E->getSubExpr()); return ZeroInitialization(E); case CK_IntegralToPointer: { CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; APValue Value; if (!EvaluateIntegerOrLValue(SubExpr, Value, Info)) break; if (Value.isInt()) { unsigned Size = Info.Ctx.getTypeSize(E->getType()); uint64_t N = Value.getInt().extOrTrunc(Size).getZExtValue(); Result.Base = (Expr*)nullptr; Result.InvalidBase = false; Result.Offset = CharUnits::fromQuantity(N); Result.CallIndex = 0; Result.Designator.setInvalid(); Result.IsNullPtr = false; return true; } else { // Cast is of an lvalue, no need to change value. Result.setFrom(Info.Ctx, Value); return true; } } case CK_ArrayToPointerDecay: if (SubExpr->isGLValue()) { - if (!EvaluateLValue(SubExpr, Result, Info)) + if (!evaluateLValue(SubExpr, Result)) return false; } else { Result.set(SubExpr, Info.CurrentCall->Index); if (!EvaluateInPlace(Info.CurrentCall->createTemporary(SubExpr, false), Info, Result, SubExpr)) return false; } // The result is a pointer to the first element of the array. if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(SubExpr->getType())) Result.addArray(Info, E, CAT); else Result.Designator.setInvalid(); return true; case CK_FunctionToPointerDecay: - return EvaluateLValue(SubExpr, Result, Info); + return evaluateLValue(SubExpr, Result); case CK_LValueToRValue: { LValue LVal; - if (!EvaluateLValue(E->getSubExpr(), LVal, Info)) + if (!evaluateLValue(E->getSubExpr(), LVal)) return false; APValue RVal; // Note, we use the subexpression's type in order to retain cv-qualifiers. if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(), LVal, RVal)) - return evaluateLValueAsAllocSize(Info, LVal.Base, Result); + return InvalidBaseOK && + evaluateLValueAsAllocSize(Info, LVal.Base, Result); return Success(RVal, E); } } return ExprEvaluatorBaseTy::VisitCastExpr(E); } static CharUnits GetAlignOfType(EvalInfo &Info, QualType T) { // C++ [expr.alignof]p3: // When alignof is applied to a reference type, the result is the // alignment of the referenced type. if (const ReferenceType *Ref = T->getAs()) T = Ref->getPointeeType(); // __alignof is defined to return the preferred alignment. return Info.Ctx.toCharUnitsFromBits( Info.Ctx.getPreferredTypeAlign(T.getTypePtr())); } static CharUnits GetAlignOfExpr(EvalInfo &Info, const Expr *E) { E = E->IgnoreParens(); // The kinds of expressions that we have special-case logic here for // should be kept up to date with the special checks for those // expressions in Sema. // alignof decl is always accepted, even if it doesn't make sense: we default // to 1 in those cases. if (const DeclRefExpr *DRE = dyn_cast(E)) return Info.Ctx.getDeclAlign(DRE->getDecl(), /*RefAsPointee*/true); if (const MemberExpr *ME = dyn_cast(E)) return Info.Ctx.getDeclAlign(ME->getMemberDecl(), /*RefAsPointee*/true); return GetAlignOfType(Info, E->getType()); } // To be clear: this happily visits unsupported builtins. Better name welcomed. bool PointerExprEvaluator::visitNonBuiltinCallExpr(const CallExpr *E) { if (ExprEvaluatorBaseTy::VisitCallExpr(E)) return true; - if (!(Info.allowInvalidBaseExpr() && getAllocSizeAttr(E))) + if (!(InvalidBaseOK && getAllocSizeAttr(E))) return false; Result.setInvalid(E); QualType PointeeTy = E->getType()->castAs()->getPointeeType(); Result.addUnsizedArray(Info, PointeeTy); return true; } bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) { if (IsStringLiteralCall(E)) return Success(E); if (unsigned BuiltinOp = E->getBuiltinCallee()) return VisitBuiltinCallExpr(E, BuiltinOp); return visitNonBuiltinCallExpr(E); } bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { switch (BuiltinOp) { case Builtin::BI__builtin_addressof: - return EvaluateLValue(E->getArg(0), Result, Info); + return evaluateLValue(E->getArg(0), Result); case Builtin::BI__builtin_assume_aligned: { // We need to be very careful here because: if the pointer does not have the // asserted alignment, then the behavior is undefined, and undefined // behavior is non-constant. - if (!EvaluatePointer(E->getArg(0), Result, Info)) + if (!evaluatePointer(E->getArg(0), Result)) return false; LValue OffsetResult(Result); APSInt Alignment; if (!EvaluateInteger(E->getArg(1), Alignment, Info)) return false; CharUnits Align = CharUnits::fromQuantity(getExtValue(Alignment)); if (E->getNumArgs() > 2) { APSInt Offset; if (!EvaluateInteger(E->getArg(2), Offset, Info)) return false; int64_t AdditionalOffset = -getExtValue(Offset); OffsetResult.Offset += CharUnits::fromQuantity(AdditionalOffset); } // If there is a base object, then it must have the correct alignment. if (OffsetResult.Base) { CharUnits BaseAlignment; if (const ValueDecl *VD = OffsetResult.Base.dyn_cast()) { BaseAlignment = Info.Ctx.getDeclAlign(VD); } else { BaseAlignment = GetAlignOfExpr(Info, OffsetResult.Base.get()); } if (BaseAlignment < Align) { Result.Designator.setInvalid(); // FIXME: Quantities here cast to integers because the plural modifier // does not work on APSInts yet. CCEDiag(E->getArg(0), diag::note_constexpr_baa_insufficient_alignment) << 0 << (int) BaseAlignment.getQuantity() << (unsigned) getExtValue(Alignment); return false; } } // The offset must also have the correct alignment. if (OffsetResult.Offset.alignTo(Align) != OffsetResult.Offset) { Result.Designator.setInvalid(); APSInt Offset(64, false); Offset = OffsetResult.Offset.getQuantity(); if (OffsetResult.Base) CCEDiag(E->getArg(0), diag::note_constexpr_baa_insufficient_alignment) << 1 << (int) getExtValue(Offset) << (unsigned) getExtValue(Alignment); else CCEDiag(E->getArg(0), diag::note_constexpr_baa_value_insufficient_alignment) << Offset << (unsigned) getExtValue(Alignment); return false; } return true; } case Builtin::BIstrchr: case Builtin::BIwcschr: case Builtin::BImemchr: case Builtin::BIwmemchr: if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); // Fall through. case Builtin::BI__builtin_strchr: case Builtin::BI__builtin_wcschr: case Builtin::BI__builtin_memchr: case Builtin::BI__builtin_char_memchr: case Builtin::BI__builtin_wmemchr: { if (!Visit(E->getArg(0))) return false; APSInt Desired; if (!EvaluateInteger(E->getArg(1), Desired, Info)) return false; uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrchr && BuiltinOp != Builtin::BIwcschr && BuiltinOp != Builtin::BI__builtin_strchr && BuiltinOp != Builtin::BI__builtin_wcschr) { APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; MaxLength = N.getExtValue(); } QualType CharTy = E->getArg(0)->getType()->getPointeeType(); // Figure out what value we're actually looking for (after converting to // the corresponding unsigned type if necessary). uint64_t DesiredVal; bool StopAtNull = false; switch (BuiltinOp) { case Builtin::BIstrchr: case Builtin::BI__builtin_strchr: // strchr compares directly to the passed integer, and therefore // always fails if given an int that is not a char. if (!APSInt::isSameValue(HandleIntToIntCast(Info, E, CharTy, E->getArg(1)->getType(), Desired), Desired)) return ZeroInitialization(E); StopAtNull = true; // Fall through. case Builtin::BImemchr: case Builtin::BI__builtin_memchr: case Builtin::BI__builtin_char_memchr: // memchr compares by converting both sides to unsigned char. That's also // correct for strchr if we get this far (to cope with plain char being // unsigned in the strchr case). DesiredVal = Desired.trunc(Info.Ctx.getCharWidth()).getZExtValue(); break; case Builtin::BIwcschr: case Builtin::BI__builtin_wcschr: StopAtNull = true; // Fall through. case Builtin::BIwmemchr: case Builtin::BI__builtin_wmemchr: // wcschr and wmemchr are given a wchar_t to look for. Just use it. DesiredVal = Desired.getZExtValue(); break; } for (; MaxLength; --MaxLength) { APValue Char; if (!handleLValueToRValueConversion(Info, E, CharTy, Result, Char) || !Char.isInt()) return false; if (Char.getInt().getZExtValue() == DesiredVal) return true; if (StopAtNull && !Char.getInt()) break; if (!HandleLValueArrayAdjustment(Info, E, Result, CharTy, 1)) return false; } // Not found: return nullptr. return ZeroInitialization(E); } default: return visitNonBuiltinCallExpr(E); } } //===----------------------------------------------------------------------===// // Member Pointer Evaluation //===----------------------------------------------------------------------===// namespace { class MemberPointerExprEvaluator : public ExprEvaluatorBase { MemberPtr &Result; bool Success(const ValueDecl *D) { Result = MemberPtr(D); return true; } public: MemberPointerExprEvaluator(EvalInfo &Info, MemberPtr &Result) : ExprEvaluatorBaseTy(Info), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(V); return true; } bool ZeroInitialization(const Expr *E) { return Success((const ValueDecl*)nullptr); } bool VisitCastExpr(const CastExpr *E); bool VisitUnaryAddrOf(const UnaryOperator *E); }; } // end anonymous namespace static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isMemberPointerType()); return MemberPointerExprEvaluator(Info, Result).Visit(E); } bool MemberPointerExprEvaluator::VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_NullToMemberPointer: VisitIgnoredValue(E->getSubExpr()); return ZeroInitialization(E); case CK_BaseToDerivedMemberPointer: { if (!Visit(E->getSubExpr())) return false; if (E->path_empty()) return true; // Base-to-derived member pointer casts store the path in derived-to-base // order, so iterate backwards. The CXXBaseSpecifier also provides us with // the wrong end of the derived->base arc, so stagger the path by one class. typedef std::reverse_iterator ReverseIter; for (ReverseIter PathI(E->path_end() - 1), PathE(E->path_begin()); PathI != PathE; ++PathI) { assert(!(*PathI)->isVirtual() && "memptr cast through vbase"); const CXXRecordDecl *Derived = (*PathI)->getType()->getAsCXXRecordDecl(); if (!Result.castToDerived(Derived)) return Error(E); } const Type *FinalTy = E->getType()->castAs()->getClass(); if (!Result.castToDerived(FinalTy->getAsCXXRecordDecl())) return Error(E); return true; } case CK_DerivedToBaseMemberPointer: if (!Visit(E->getSubExpr())) return false; for (CastExpr::path_const_iterator PathI = E->path_begin(), PathE = E->path_end(); PathI != PathE; ++PathI) { assert(!(*PathI)->isVirtual() && "memptr cast through vbase"); const CXXRecordDecl *Base = (*PathI)->getType()->getAsCXXRecordDecl(); if (!Result.castToBase(Base)) return Error(E); } return true; } } bool MemberPointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) { // C++11 [expr.unary.op]p3 has very strict rules on how the address of a // member can be formed. return Success(cast(E->getSubExpr())->getDecl()); } //===----------------------------------------------------------------------===// // Record Evaluation //===----------------------------------------------------------------------===// namespace { class RecordExprEvaluator : public ExprEvaluatorBase { const LValue &This; APValue &Result; public: RecordExprEvaluator(EvalInfo &info, const LValue &This, APValue &Result) : ExprEvaluatorBaseTy(info), This(This), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result = V; return true; } bool ZeroInitialization(const Expr *E) { return ZeroInitialization(E, E->getType()); } bool ZeroInitialization(const Expr *E, QualType T); bool VisitCallExpr(const CallExpr *E) { return handleCallExpr(E, Result, &This); } bool VisitCastExpr(const CastExpr *E); bool VisitInitListExpr(const InitListExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitCXXConstructExpr(E, E->getType()); } bool VisitLambdaExpr(const LambdaExpr *E); bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T); bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E); }; } /// Perform zero-initialization on an object of non-union class type. /// C++11 [dcl.init]p5: /// To zero-initialize an object or reference of type T means: /// [...] /// -- if T is a (possibly cv-qualified) non-union class type, /// each non-static data member and each base-class subobject is /// zero-initialized static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, const RecordDecl *RD, const LValue &This, APValue &Result) { assert(!RD->isUnion() && "Expected non-union class type"); const CXXRecordDecl *CD = dyn_cast(RD); Result = APValue(APValue::UninitStruct(), CD ? CD->getNumBases() : 0, std::distance(RD->field_begin(), RD->field_end())); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); if (CD) { unsigned Index = 0; for (CXXRecordDecl::base_class_const_iterator I = CD->bases_begin(), End = CD->bases_end(); I != End; ++I, ++Index) { const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl(); LValue Subobject = This; if (!HandleLValueDirectBase(Info, E, Subobject, CD, Base, &Layout)) return false; if (!HandleClassZeroInitialization(Info, E, Base, Subobject, Result.getStructBase(Index))) return false; } } for (const auto *I : RD->fields()) { // -- if T is a reference type, no initialization is performed. if (I->getType()->isReferenceType()) continue; LValue Subobject = This; if (!HandleLValueMember(Info, E, Subobject, I, &Layout)) return false; ImplicitValueInitExpr VIE(I->getType()); if (!EvaluateInPlace( Result.getStructField(I->getFieldIndex()), Info, Subobject, &VIE)) return false; } return true; } bool RecordExprEvaluator::ZeroInitialization(const Expr *E, QualType T) { const RecordDecl *RD = T->castAs()->getDecl(); if (RD->isInvalidDecl()) return false; if (RD->isUnion()) { // C++11 [dcl.init]p5: If T is a (possibly cv-qualified) union type, the // object's first non-static named data member is zero-initialized RecordDecl::field_iterator I = RD->field_begin(); if (I == RD->field_end()) { Result = APValue((const FieldDecl*)nullptr); return true; } LValue Subobject = This; if (!HandleLValueMember(Info, E, Subobject, *I)) return false; Result = APValue(*I); ImplicitValueInitExpr VIE(I->getType()); return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, &VIE); } if (isa(RD) && cast(RD)->getNumVBases()) { Info.FFDiag(E, diag::note_constexpr_virtual_base) << RD; return false; } return HandleClassZeroInitialization(Info, E, RD, This, Result); } bool RecordExprEvaluator::VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_ConstructorConversion: return Visit(E->getSubExpr()); case CK_DerivedToBase: case CK_UncheckedDerivedToBase: { APValue DerivedObject; if (!Evaluate(DerivedObject, Info, E->getSubExpr())) return false; if (!DerivedObject.isStruct()) return Error(E->getSubExpr()); // Derived-to-base rvalue conversion: just slice off the derived part. APValue *Value = &DerivedObject; const CXXRecordDecl *RD = E->getSubExpr()->getType()->getAsCXXRecordDecl(); for (CastExpr::path_const_iterator PathI = E->path_begin(), PathE = E->path_end(); PathI != PathE; ++PathI) { assert(!(*PathI)->isVirtual() && "record rvalue with virtual base"); const CXXRecordDecl *Base = (*PathI)->getType()->getAsCXXRecordDecl(); Value = &Value->getStructBase(getBaseIndex(RD, Base)); RD = Base; } Result = *Value; return true; } } } bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) { if (E->isTransparent()) return Visit(E->getInit(0)); const RecordDecl *RD = E->getType()->castAs()->getDecl(); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); if (RD->isUnion()) { const FieldDecl *Field = E->getInitializedFieldInUnion(); Result = APValue(Field); if (!Field) return true; // If the initializer list for a union does not contain any elements, the // first element of the union is value-initialized. // FIXME: The element should be initialized from an initializer list. // Is this difference ever observable for initializer lists which // we don't build? ImplicitValueInitExpr VIE(Field->getType()); const Expr *InitExpr = E->getNumInits() ? E->getInit(0) : &VIE; LValue Subobject = This; if (!HandleLValueMember(Info, InitExpr, Subobject, Field, &Layout)) return false; // Temporarily override This, in case there's a CXXDefaultInitExpr in here. ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This, isa(InitExpr)); return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, InitExpr); } auto *CXXRD = dyn_cast(RD); if (Result.isUninit()) Result = APValue(APValue::UninitStruct(), CXXRD ? CXXRD->getNumBases() : 0, std::distance(RD->field_begin(), RD->field_end())); unsigned ElementNo = 0; bool Success = true; // Initialize base classes. if (CXXRD) { for (const auto &Base : CXXRD->bases()) { assert(ElementNo < E->getNumInits() && "missing init for base class"); const Expr *Init = E->getInit(ElementNo); LValue Subobject = This; if (!HandleLValueBase(Info, Init, Subobject, CXXRD, &Base)) return false; APValue &FieldVal = Result.getStructBase(ElementNo); if (!EvaluateInPlace(FieldVal, Info, Subobject, Init)) { if (!Info.noteFailure()) return false; Success = false; } ++ElementNo; } } // Initialize members. for (const auto *Field : RD->fields()) { // Anonymous bit-fields are not considered members of the class for // purposes of aggregate initialization. if (Field->isUnnamedBitfield()) continue; LValue Subobject = This; bool HaveInit = ElementNo < E->getNumInits(); // FIXME: Diagnostics here should point to the end of the initializer // list, not the start. if (!HandleLValueMember(Info, HaveInit ? E->getInit(ElementNo) : E, Subobject, Field, &Layout)) return false; // Perform an implicit value-initialization for members beyond the end of // the initializer list. ImplicitValueInitExpr VIE(HaveInit ? Info.Ctx.IntTy : Field->getType()); const Expr *Init = HaveInit ? E->getInit(ElementNo++) : &VIE; // Temporarily override This, in case there's a CXXDefaultInitExpr in here. ThisOverrideRAII ThisOverride(*Info.CurrentCall, &This, isa(Init)); APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); if (!EvaluateInPlace(FieldVal, Info, Subobject, Init) || (Field->isBitField() && !truncateBitfieldValue(Info, Init, FieldVal, Field))) { if (!Info.noteFailure()) return false; Success = false; } } return Success; } bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T) { // Note that E's type is not necessarily the type of our class here; we might // be initializing an array element instead. const CXXConstructorDecl *FD = E->getConstructor(); if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false; bool ZeroInit = E->requiresZeroInitialization(); if (CheckTrivialDefaultConstructor(Info, E->getExprLoc(), FD, ZeroInit)) { // If we've already performed zero-initialization, we're already done. if (!Result.isUninit()) return true; // We can get here in two different ways: // 1) We're performing value-initialization, and should zero-initialize // the object, or // 2) We're performing default-initialization of an object with a trivial // constexpr default constructor, in which case we should start the // lifetimes of all the base subobjects (there can be no data member // subobjects in this case) per [basic.life]p1. // Either way, ZeroInitialization is appropriate. return ZeroInitialization(E, T); } const FunctionDecl *Definition = nullptr; auto Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body)) return false; // Avoid materializing a temporary for an elidable copy/move constructor. if (E->isElidable() && !ZeroInit) if (const MaterializeTemporaryExpr *ME = dyn_cast(E->getArg(0))) return Visit(ME->GetTemporaryExpr()); if (ZeroInit && !ZeroInitialization(E, T)) return false; auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs()); return HandleConstructorCall(E, This, Args, cast(Definition), Info, Result); } bool RecordExprEvaluator::VisitCXXInheritedCtorInitExpr( const CXXInheritedCtorInitExpr *E) { if (!Info.CurrentCall) { assert(Info.checkingPotentialConstantExpression()); return false; } const CXXConstructorDecl *FD = E->getConstructor(); if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false; const FunctionDecl *Definition = nullptr; auto Body = FD->getBody(Definition); if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body)) return false; return HandleConstructorCall(E, This, Info.CurrentCall->Arguments, cast(Definition), Info, Result); } bool RecordExprEvaluator::VisitCXXStdInitializerListExpr( const CXXStdInitializerListExpr *E) { const ConstantArrayType *ArrayType = Info.Ctx.getAsConstantArrayType(E->getSubExpr()->getType()); LValue Array; if (!EvaluateLValue(E->getSubExpr(), Array, Info)) return false; // Get a pointer to the first element of the array. Array.addArray(Info, E, ArrayType); // FIXME: Perform the checks on the field types in SemaInit. RecordDecl *Record = E->getType()->castAs()->getDecl(); RecordDecl::field_iterator Field = Record->field_begin(); if (Field == Record->field_end()) return Error(E); // Start pointer. if (!Field->getType()->isPointerType() || !Info.Ctx.hasSameType(Field->getType()->getPointeeType(), ArrayType->getElementType())) return Error(E); // FIXME: What if the initializer_list type has base classes, etc? Result = APValue(APValue::UninitStruct(), 0, 2); Array.moveInto(Result.getStructField(0)); if (++Field == Record->field_end()) return Error(E); if (Field->getType()->isPointerType() && Info.Ctx.hasSameType(Field->getType()->getPointeeType(), ArrayType->getElementType())) { // End pointer. if (!HandleLValueArrayAdjustment(Info, E, Array, ArrayType->getElementType(), ArrayType->getSize().getZExtValue())) return false; Array.moveInto(Result.getStructField(1)); } else if (Info.Ctx.hasSameType(Field->getType(), Info.Ctx.getSizeType())) // Length. Result.getStructField(1) = APValue(APSInt(ArrayType->getSize())); else return Error(E); if (++Field != Record->field_end()) return Error(E); return true; } bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { const CXXRecordDecl *ClosureClass = E->getLambdaClass(); if (ClosureClass->isInvalidDecl()) return false; if (Info.checkingPotentialConstantExpression()) return true; if (E->capture_size()) { Info.FFDiag(E, diag::note_unimplemented_constexpr_lambda_feature_ast) << "can not evaluate lambda expressions with captures"; return false; } // FIXME: Implement captures. Result = APValue(APValue::UninitStruct(), /*NumBases*/0, /*NumFields*/0); return true; } static bool EvaluateRecord(const Expr *E, const LValue &This, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRecordType() && "can't evaluate expression as a record rvalue"); return RecordExprEvaluator(Info, This, Result).Visit(E); } //===----------------------------------------------------------------------===// // Temporary Evaluation // // Temporaries are represented in the AST as rvalues, but generally behave like // lvalues. The full-object of which the temporary is a subobject is implicitly // materialized so that a reference can bind to it. //===----------------------------------------------------------------------===// namespace { class TemporaryExprEvaluator : public LValueExprEvaluatorBase { public: TemporaryExprEvaluator(EvalInfo &Info, LValue &Result) : - LValueExprEvaluatorBaseTy(Info, Result) {} + LValueExprEvaluatorBaseTy(Info, Result, false) {} /// Visit an expression which constructs the value of this temporary. bool VisitConstructExpr(const Expr *E) { Result.set(E, Info.CurrentCall->Index); return EvaluateInPlace(Info.CurrentCall->createTemporary(E, false), Info, Result, E); } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return LValueExprEvaluatorBaseTy::VisitCastExpr(E); case CK_ConstructorConversion: return VisitConstructExpr(E->getSubExpr()); } } bool VisitInitListExpr(const InitListExpr *E) { return VisitConstructExpr(E); } bool VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitConstructExpr(E); } bool VisitCallExpr(const CallExpr *E) { return VisitConstructExpr(E); } bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E) { return VisitConstructExpr(E); } bool VisitLambdaExpr(const LambdaExpr *E) { return VisitConstructExpr(E); } }; } // end anonymous namespace /// Evaluate an expression of record type as a temporary. static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRecordType()); return TemporaryExprEvaluator(Info, Result).Visit(E); } //===----------------------------------------------------------------------===// // Vector Evaluation //===----------------------------------------------------------------------===// namespace { class VectorExprEvaluator : public ExprEvaluatorBase { APValue &Result; public: VectorExprEvaluator(EvalInfo &info, APValue &Result) : ExprEvaluatorBaseTy(info), Result(Result) {} bool Success(ArrayRef V, const Expr *E) { assert(V.size() == E->getType()->castAs()->getNumElements()); // FIXME: remove this APValue copy. Result = APValue(V.data(), V.size()); return true; } bool Success(const APValue &V, const Expr *E) { assert(V.isVector()); Result = V; return true; } bool ZeroInitialization(const Expr *E); bool VisitUnaryReal(const UnaryOperator *E) { return Visit(E->getSubExpr()); } bool VisitCastExpr(const CastExpr* E); bool VisitInitListExpr(const InitListExpr *E); bool VisitUnaryImag(const UnaryOperator *E); // FIXME: Missing: unary -, unary ~, binary add/sub/mul/div, // binary comparisons, binary and/or/xor, // shufflevector, ExtVectorElementExpr }; } // end anonymous namespace static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isVectorType() &&"not a vector rvalue"); return VectorExprEvaluator(Info, Result).Visit(E); } bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { const VectorType *VTy = E->getType()->castAs(); unsigned NElts = VTy->getNumElements(); const Expr *SE = E->getSubExpr(); QualType SETy = SE->getType(); switch (E->getCastKind()) { case CK_VectorSplat: { APValue Val = APValue(); if (SETy->isIntegerType()) { APSInt IntResult; if (!EvaluateInteger(SE, IntResult, Info)) return false; Val = APValue(std::move(IntResult)); } else if (SETy->isRealFloatingType()) { APFloat FloatResult(0.0); if (!EvaluateFloat(SE, FloatResult, Info)) return false; Val = APValue(std::move(FloatResult)); } else { return Error(E); } // Splat and create vector APValue. SmallVector Elts(NElts, Val); return Success(Elts, E); } case CK_BitCast: { // Evaluate the operand into an APInt we can extract from. llvm::APInt SValInt; if (!EvalAndBitcastToAPInt(Info, SE, SValInt)) return false; // Extract the elements QualType EltTy = VTy->getElementType(); unsigned EltSize = Info.Ctx.getTypeSize(EltTy); bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); SmallVector Elts; if (EltTy->isRealFloatingType()) { const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(EltTy); unsigned FloatEltSize = EltSize; if (&Sem == &APFloat::x87DoubleExtended()) FloatEltSize = 80; for (unsigned i = 0; i < NElts; i++) { llvm::APInt Elt; if (BigEndian) Elt = SValInt.rotl(i*EltSize+FloatEltSize).trunc(FloatEltSize); else Elt = SValInt.rotr(i*EltSize).trunc(FloatEltSize); Elts.push_back(APValue(APFloat(Sem, Elt))); } } else if (EltTy->isIntegerType()) { for (unsigned i = 0; i < NElts; i++) { llvm::APInt Elt; if (BigEndian) Elt = SValInt.rotl(i*EltSize+EltSize).zextOrTrunc(EltSize); else Elt = SValInt.rotr(i*EltSize).zextOrTrunc(EltSize); Elts.push_back(APValue(APSInt(Elt, EltTy->isSignedIntegerType()))); } } else { return Error(E); } return Success(Elts, E); } default: return ExprEvaluatorBaseTy::VisitCastExpr(E); } } bool VectorExprEvaluator::VisitInitListExpr(const InitListExpr *E) { const VectorType *VT = E->getType()->castAs(); unsigned NumInits = E->getNumInits(); unsigned NumElements = VT->getNumElements(); QualType EltTy = VT->getElementType(); SmallVector Elements; // The number of initializers can be less than the number of // vector elements. For OpenCL, this can be due to nested vector // initialization. For GCC compatibility, missing trailing elements // should be initialized with zeroes. unsigned CountInits = 0, CountElts = 0; while (CountElts < NumElements) { // Handle nested vector initialization. if (CountInits < NumInits && E->getInit(CountInits)->getType()->isVectorType()) { APValue v; if (!EvaluateVector(E->getInit(CountInits), v, Info)) return Error(E); unsigned vlen = v.getVectorLength(); for (unsigned j = 0; j < vlen; j++) Elements.push_back(v.getVectorElt(j)); CountElts += vlen; } else if (EltTy->isIntegerType()) { llvm::APSInt sInt(32); if (CountInits < NumInits) { if (!EvaluateInteger(E->getInit(CountInits), sInt, Info)) return false; } else // trailing integer zero. sInt = Info.Ctx.MakeIntValue(0, EltTy); Elements.push_back(APValue(sInt)); CountElts++; } else { llvm::APFloat f(0.0); if (CountInits < NumInits) { if (!EvaluateFloat(E->getInit(CountInits), f, Info)) return false; } else // trailing float zero. f = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(EltTy)); Elements.push_back(APValue(f)); CountElts++; } CountInits++; } return Success(Elements, E); } bool VectorExprEvaluator::ZeroInitialization(const Expr *E) { const VectorType *VT = E->getType()->getAs(); QualType EltTy = VT->getElementType(); APValue ZeroElement; if (EltTy->isIntegerType()) ZeroElement = APValue(Info.Ctx.MakeIntValue(0, EltTy)); else ZeroElement = APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(EltTy))); SmallVector Elements(VT->getNumElements(), ZeroElement); return Success(Elements, E); } bool VectorExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { VisitIgnoredValue(E->getSubExpr()); return ZeroInitialization(E); } //===----------------------------------------------------------------------===// // Array Evaluation //===----------------------------------------------------------------------===// namespace { class ArrayExprEvaluator : public ExprEvaluatorBase { const LValue &This; APValue &Result; public: ArrayExprEvaluator(EvalInfo &Info, const LValue &This, APValue &Result) : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {} bool Success(const APValue &V, const Expr *E) { assert((V.isArray() || V.isLValue()) && "expected array or string literal"); Result = V; return true; } bool ZeroInitialization(const Expr *E) { const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType()); if (!CAT) return Error(E); Result = APValue(APValue::UninitArray(), 0, CAT->getSize().getZExtValue()); if (!Result.hasArrayFiller()) return true; // Zero-initialize all elements. LValue Subobject = This; Subobject.addArray(Info, E, CAT); ImplicitValueInitExpr VIE(CAT->getElementType()); return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE); } bool VisitCallExpr(const CallExpr *E) { return handleCallExpr(E, Result, &This); } bool VisitInitListExpr(const InitListExpr *E); bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, const LValue &Subobject, APValue *Value, QualType Type); }; } // end anonymous namespace static bool EvaluateArray(const Expr *E, const LValue &This, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isArrayType() && "not an array rvalue"); return ArrayExprEvaluator(Info, This, Result).Visit(E); } bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) { const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType()); if (!CAT) return Error(E); // C++11 [dcl.init.string]p1: A char array [...] can be initialized by [...] // an appropriately-typed string literal enclosed in braces. if (E->isStringLiteralInit()) { LValue LV; if (!EvaluateLValue(E->getInit(0), LV, Info)) return false; APValue Val; LV.moveInto(Val); return Success(Val, E); } bool Success = true; assert((!Result.isArray() || Result.getArrayInitializedElts() == 0) && "zero-initialized array shouldn't have any initialized elts"); APValue Filler; if (Result.isArray() && Result.hasArrayFiller()) Filler = Result.getArrayFiller(); unsigned NumEltsToInit = E->getNumInits(); unsigned NumElts = CAT->getSize().getZExtValue(); const Expr *FillerExpr = E->hasArrayFiller() ? E->getArrayFiller() : nullptr; // If the initializer might depend on the array index, run it for each // array element. For now, just whitelist non-class value-initialization. if (NumEltsToInit != NumElts && !isa(FillerExpr)) NumEltsToInit = NumElts; Result = APValue(APValue::UninitArray(), NumEltsToInit, NumElts); // If the array was previously zero-initialized, preserve the // zero-initialized values. if (!Filler.isUninit()) { for (unsigned I = 0, E = Result.getArrayInitializedElts(); I != E; ++I) Result.getArrayInitializedElt(I) = Filler; if (Result.hasArrayFiller()) Result.getArrayFiller() = Filler; } LValue Subobject = This; Subobject.addArray(Info, E, CAT); for (unsigned Index = 0; Index != NumEltsToInit; ++Index) { const Expr *Init = Index < E->getNumInits() ? E->getInit(Index) : FillerExpr; if (!EvaluateInPlace(Result.getArrayInitializedElt(Index), Info, Subobject, Init) || !HandleLValueArrayAdjustment(Info, Init, Subobject, CAT->getElementType(), 1)) { if (!Info.noteFailure()) return false; Success = false; } } if (!Result.hasArrayFiller()) return Success; // If we get here, we have a trivial filler, which we can just evaluate // once and splat over the rest of the array elements. assert(FillerExpr && "no array filler for incomplete init list"); return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, FillerExpr) && Success; } bool ArrayExprEvaluator::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E) { if (E->getCommonExpr() && !Evaluate(Info.CurrentCall->createTemporary(E->getCommonExpr(), false), Info, E->getCommonExpr()->getSourceExpr())) return false; auto *CAT = cast(E->getType()->castAsArrayTypeUnsafe()); uint64_t Elements = CAT->getSize().getZExtValue(); Result = APValue(APValue::UninitArray(), Elements, Elements); LValue Subobject = This; Subobject.addArray(Info, E, CAT); bool Success = true; for (EvalInfo::ArrayInitLoopIndex Index(Info); Index != Elements; ++Index) { if (!EvaluateInPlace(Result.getArrayInitializedElt(Index), Info, Subobject, E->getSubExpr()) || !HandleLValueArrayAdjustment(Info, E, Subobject, CAT->getElementType(), 1)) { if (!Info.noteFailure()) return false; Success = false; } } return Success; } bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitCXXConstructExpr(E, This, &Result, E->getType()); } bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E, const LValue &Subobject, APValue *Value, QualType Type) { bool HadZeroInit = !Value->isUninit(); if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(Type)) { unsigned N = CAT->getSize().getZExtValue(); // Preserve the array filler if we had prior zero-initialization. APValue Filler = HadZeroInit && Value->hasArrayFiller() ? Value->getArrayFiller() : APValue(); *Value = APValue(APValue::UninitArray(), N, N); if (HadZeroInit) for (unsigned I = 0; I != N; ++I) Value->getArrayInitializedElt(I) = Filler; // Initialize the elements. LValue ArrayElt = Subobject; ArrayElt.addArray(Info, E, CAT); for (unsigned I = 0; I != N; ++I) if (!VisitCXXConstructExpr(E, ArrayElt, &Value->getArrayInitializedElt(I), CAT->getElementType()) || !HandleLValueArrayAdjustment(Info, E, ArrayElt, CAT->getElementType(), 1)) return false; return true; } if (!Type->isRecordType()) return Error(E); return RecordExprEvaluator(Info, Subobject, *Value) .VisitCXXConstructExpr(E, Type); } //===----------------------------------------------------------------------===// // Integer Evaluation // // As a GNU extension, we support casting pointers to sufficiently-wide integer // types and back in constant folding. Integer values are thus represented // either as an integer-valued APValue, or as an lvalue-valued APValue. //===----------------------------------------------------------------------===// namespace { class IntExprEvaluator : public ExprEvaluatorBase { APValue &Result; public: IntExprEvaluator(EvalInfo &info, APValue &result) : ExprEvaluatorBaseTy(info), Result(result) {} bool Success(const llvm::APSInt &SI, const Expr *E, APValue &Result) { assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); assert(SI.isSigned() == E->getType()->isSignedIntegerOrEnumerationType() && "Invalid evaluation result."); assert(SI.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) && "Invalid evaluation result."); Result = APValue(SI); return true; } bool Success(const llvm::APSInt &SI, const Expr *E) { return Success(SI, E, Result); } bool Success(const llvm::APInt &I, const Expr *E, APValue &Result) { assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); assert(I.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) && "Invalid evaluation result."); Result = APValue(APSInt(I)); Result.getInt().setIsUnsigned( E->getType()->isUnsignedIntegerOrEnumerationType()); return true; } bool Success(const llvm::APInt &I, const Expr *E) { return Success(I, E, Result); } bool Success(uint64_t Value, const Expr *E, APValue &Result) { assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); Result = APValue(Info.Ctx.MakeIntValue(Value, E->getType())); return true; } bool Success(uint64_t Value, const Expr *E) { return Success(Value, E, Result); } bool Success(CharUnits Size, const Expr *E) { return Success(Size.getQuantity(), E); } bool Success(const APValue &V, const Expr *E) { if (V.isLValue() || V.isAddrLabelDiff()) { Result = V; return true; } return Success(V.getInt(), E); } bool ZeroInitialization(const Expr *E) { return Success(0, E); } //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// bool VisitIntegerLiteral(const IntegerLiteral *E) { return Success(E->getValue(), E); } bool VisitCharacterLiteral(const CharacterLiteral *E) { return Success(E->getValue(), E); } bool CheckReferencedDecl(const Expr *E, const Decl *D); bool VisitDeclRefExpr(const DeclRefExpr *E) { if (CheckReferencedDecl(E, E->getDecl())) return true; return ExprEvaluatorBaseTy::VisitDeclRefExpr(E); } bool VisitMemberExpr(const MemberExpr *E) { if (CheckReferencedDecl(E, E->getMemberDecl())) { VisitIgnoredBaseExpression(E->getBase()); return true; } return ExprEvaluatorBaseTy::VisitMemberExpr(E); } bool VisitCallExpr(const CallExpr *E); bool VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitOffsetOfExpr(const OffsetOfExpr *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitCastExpr(const CastExpr* E); bool VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E); bool VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E) { return Success(E->getValue(), E); } bool VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E) { return Success(E->getValue(), E); } bool VisitArrayInitIndexExpr(const ArrayInitIndexExpr *E) { if (Info.ArrayInitIndex == uint64_t(-1)) { // We were asked to evaluate this subexpression independent of the // enclosing ArrayInitLoopExpr. We can't do that. Info.FFDiag(E); return false; } return Success(Info.ArrayInitIndex, E); } // Note, GNU defines __null as an integer, not a pointer. bool VisitGNUNullExpr(const GNUNullExpr *E) { return ZeroInitialization(E); } bool VisitTypeTraitExpr(const TypeTraitExpr *E) { return Success(E->getValue(), E); } bool VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) { return Success(E->getValue(), E); } bool VisitExpressionTraitExpr(const ExpressionTraitExpr *E) { return Success(E->getValue(), E); } bool VisitUnaryReal(const UnaryOperator *E); bool VisitUnaryImag(const UnaryOperator *E); bool VisitCXXNoexceptExpr(const CXXNoexceptExpr *E); bool VisitSizeOfPackExpr(const SizeOfPackExpr *E); // FIXME: Missing: array subscript of vector, member of vector }; } // end anonymous namespace /// EvaluateIntegerOrLValue - Evaluate an rvalue integral-typed expression, and /// produce either the integer value or a pointer. /// /// GCC has a heinous extension which folds casts between pointer types and /// pointer-sized integral types. We support this by allowing the evaluation of /// an integer rvalue to produce a pointer (represented as an lvalue) instead. /// Some simple arithmetic on such values is supported (they are treated much /// like char*). static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isIntegralOrEnumerationType()); return IntExprEvaluator(Info, Result).Visit(E); } static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info) { APValue Val; if (!EvaluateIntegerOrLValue(E, Val, Info)) return false; if (!Val.isInt()) { // FIXME: It would be better to produce the diagnostic for casting // a pointer to an integer. Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } Result = Val.getInt(); return true; } /// Check whether the given declaration can be directly converted to an integral /// rvalue. If not, no diagnostic is produced; there are other things we can /// try. bool IntExprEvaluator::CheckReferencedDecl(const Expr* E, const Decl* D) { // Enums are integer constant exprs. if (const EnumConstantDecl *ECD = dyn_cast(D)) { // Check for signedness/width mismatches between E type and ECD value. bool SameSign = (ECD->getInitVal().isSigned() == E->getType()->isSignedIntegerOrEnumerationType()); bool SameWidth = (ECD->getInitVal().getBitWidth() == Info.Ctx.getIntWidth(E->getType())); if (SameSign && SameWidth) return Success(ECD->getInitVal(), E); else { // Get rid of mismatch (otherwise Success assertions will fail) // by computing a new value matching the type of E. llvm::APSInt Val = ECD->getInitVal(); if (!SameSign) Val.setIsSigned(!ECD->getInitVal().isSigned()); if (!SameWidth) Val = Val.extOrTrunc(Info.Ctx.getIntWidth(E->getType())); return Success(Val, E); } } return false; } /// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way /// as GCC. static int EvaluateBuiltinClassifyType(const CallExpr *E, const LangOptions &LangOpts) { // The following enum mimics the values returned by GCC. // FIXME: Does GCC differ between lvalue and rvalue references here? enum gcc_type_class { no_type_class = -1, void_type_class, integer_type_class, char_type_class, enumeral_type_class, boolean_type_class, pointer_type_class, reference_type_class, offset_type_class, real_type_class, complex_type_class, function_type_class, method_type_class, record_type_class, union_type_class, array_type_class, string_type_class, lang_type_class }; // If no argument was supplied, default to "no_type_class". This isn't // ideal, however it is what gcc does. if (E->getNumArgs() == 0) return no_type_class; QualType CanTy = E->getArg(0)->getType().getCanonicalType(); const BuiltinType *BT = dyn_cast(CanTy); switch (CanTy->getTypeClass()) { #define TYPE(ID, BASE) #define DEPENDENT_TYPE(ID, BASE) case Type::ID: #define NON_CANONICAL_TYPE(ID, BASE) case Type::ID: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(ID, BASE) case Type::ID: #include "clang/AST/TypeNodes.def" llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type"); case Type::Builtin: switch (BT->getKind()) { #define BUILTIN_TYPE(ID, SINGLETON_ID) #define SIGNED_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: return integer_type_class; #define FLOATING_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: return real_type_class; #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: break; #include "clang/AST/BuiltinTypes.def" case BuiltinType::Void: return void_type_class; case BuiltinType::Bool: return boolean_type_class; case BuiltinType::Char_U: // gcc doesn't appear to use char_type_class case BuiltinType::UChar: case BuiltinType::UShort: case BuiltinType::UInt: case BuiltinType::ULong: case BuiltinType::ULongLong: case BuiltinType::UInt128: return integer_type_class; case BuiltinType::NullPtr: return pointer_type_class; case BuiltinType::WChar_U: case BuiltinType::Char16: case BuiltinType::Char32: case BuiltinType::ObjCId: case BuiltinType::ObjCClass: case BuiltinType::ObjCSel: #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::Dependent: llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type"); }; case Type::Enum: return LangOpts.CPlusPlus ? enumeral_type_class : integer_type_class; break; case Type::Pointer: return pointer_type_class; break; case Type::MemberPointer: if (CanTy->isMemberDataPointerType()) return offset_type_class; else { // We expect member pointers to be either data or function pointers, // nothing else. assert(CanTy->isMemberFunctionPointerType()); return method_type_class; } case Type::Complex: return complex_type_class; case Type::FunctionNoProto: case Type::FunctionProto: return LangOpts.CPlusPlus ? function_type_class : pointer_type_class; case Type::Record: if (const RecordType *RT = CanTy->getAs()) { switch (RT->getDecl()->getTagKind()) { case TagTypeKind::TTK_Struct: case TagTypeKind::TTK_Class: case TagTypeKind::TTK_Interface: return record_type_class; case TagTypeKind::TTK_Enum: return LangOpts.CPlusPlus ? enumeral_type_class : integer_type_class; case TagTypeKind::TTK_Union: return union_type_class; } } llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type"); case Type::ConstantArray: case Type::VariableArray: case Type::IncompleteArray: return LangOpts.CPlusPlus ? array_type_class : pointer_type_class; case Type::BlockPointer: case Type::LValueReference: case Type::RValueReference: case Type::Vector: case Type::ExtVector: case Type::Auto: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::Pipe: case Type::Atomic: llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type"); } llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type"); } /// EvaluateBuiltinConstantPForLValue - Determine the result of /// __builtin_constant_p when applied to the given lvalue. /// /// An lvalue is only "constant" if it is a pointer or reference to the first /// character of a string literal. template static bool EvaluateBuiltinConstantPForLValue(const LValue &LV) { const Expr *E = LV.getLValueBase().template dyn_cast(); return E && isa(E) && LV.getLValueOffset().isZero(); } /// EvaluateBuiltinConstantP - Evaluate __builtin_constant_p as similarly to /// GCC as we can manage. static bool EvaluateBuiltinConstantP(ASTContext &Ctx, const Expr *Arg) { QualType ArgType = Arg->getType(); // __builtin_constant_p always has one operand. The rules which gcc follows // are not precisely documented, but are as follows: // // - If the operand is of integral, floating, complex or enumeration type, // and can be folded to a known value of that type, it returns 1. // - If the operand and can be folded to a pointer to the first character // of a string literal (or such a pointer cast to an integral type), it // returns 1. // // Otherwise, it returns 0. // // FIXME: GCC also intends to return 1 for literals of aggregate types, but // its support for this does not currently work. if (ArgType->isIntegralOrEnumerationType()) { Expr::EvalResult Result; if (!Arg->EvaluateAsRValue(Result, Ctx) || Result.HasSideEffects) return false; APValue &V = Result.Val; if (V.getKind() == APValue::Int) return true; if (V.getKind() == APValue::LValue) return EvaluateBuiltinConstantPForLValue(V); } else if (ArgType->isFloatingType() || ArgType->isAnyComplexType()) { return Arg->isEvaluatable(Ctx); } else if (ArgType->isPointerType() || Arg->isGLValue()) { LValue LV; Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); if ((Arg->isGLValue() ? EvaluateLValue(Arg, LV, Info) : EvaluatePointer(Arg, LV, Info)) && !Status.HasSideEffects) return EvaluateBuiltinConstantPForLValue(LV); } // Anything else isn't considered to be sufficiently constant. return false; } /// Retrieves the "underlying object type" of the given expression, /// as used by __builtin_object_size. static QualType getObjectType(APValue::LValueBase B) { if (const ValueDecl *D = B.dyn_cast()) { if (const VarDecl *VD = dyn_cast(D)) return VD->getType(); } else if (const Expr *E = B.get()) { if (isa(E)) return E->getType(); } return QualType(); } /// A more selective version of E->IgnoreParenCasts for /// tryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only /// to change the type of E. /// Ex. For E = `(short*)((char*)(&foo))`, returns `&foo` /// /// Always returns an RValue with a pointer representation. static const Expr *ignorePointerCastsAndParens(const Expr *E) { assert(E->isRValue() && E->getType()->hasPointerRepresentation()); auto *NoParens = E->IgnoreParens(); auto *Cast = dyn_cast(NoParens); if (Cast == nullptr) return NoParens; // We only conservatively allow a few kinds of casts, because this code is // inherently a simple solution that seeks to support the common case. auto CastKind = Cast->getCastKind(); if (CastKind != CK_NoOp && CastKind != CK_BitCast && CastKind != CK_AddressSpaceConversion) return NoParens; auto *SubExpr = Cast->getSubExpr(); if (!SubExpr->getType()->hasPointerRepresentation() || !SubExpr->isRValue()) return NoParens; return ignorePointerCastsAndParens(SubExpr); } /// Checks to see if the given LValue's Designator is at the end of the LValue's /// record layout. e.g. /// struct { struct { int a, b; } fst, snd; } obj; /// obj.fst // no /// obj.snd // yes /// obj.fst.a // no /// obj.fst.b // no /// obj.snd.a // no /// obj.snd.b // yes /// /// Please note: this function is specialized for how __builtin_object_size /// views "objects". /// /// If this encounters an invalid RecordDecl, it will always return true. static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { assert(!LVal.Designator.Invalid); auto IsLastOrInvalidFieldDecl = [&Ctx](const FieldDecl *FD, bool &Invalid) { const RecordDecl *Parent = FD->getParent(); Invalid = Parent->isInvalidDecl(); if (Invalid || Parent->isUnion()) return true; const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(Parent); return FD->getFieldIndex() + 1 == Layout.getFieldCount(); }; auto &Base = LVal.getLValueBase(); if (auto *ME = dyn_cast_or_null(Base.dyn_cast())) { if (auto *FD = dyn_cast(ME->getMemberDecl())) { bool Invalid; if (!IsLastOrInvalidFieldDecl(FD, Invalid)) return Invalid; } else if (auto *IFD = dyn_cast(ME->getMemberDecl())) { for (auto *FD : IFD->chain()) { bool Invalid; if (!IsLastOrInvalidFieldDecl(cast(FD), Invalid)) return Invalid; } } } unsigned I = 0; QualType BaseType = getType(Base); if (LVal.Designator.FirstEntryIsAnUnsizedArray) { assert(isBaseAnAllocSizeCall(Base) && "Unsized array in non-alloc_size call?"); // If this is an alloc_size base, we should ignore the initial array index ++I; BaseType = BaseType->castAs()->getPointeeType(); } for (unsigned E = LVal.Designator.Entries.size(); I != E; ++I) { const auto &Entry = LVal.Designator.Entries[I]; if (BaseType->isArrayType()) { // Because __builtin_object_size treats arrays as objects, we can ignore // the index iff this is the last array in the Designator. if (I + 1 == E) return true; const auto *CAT = cast(Ctx.getAsArrayType(BaseType)); uint64_t Index = Entry.ArrayIndex; if (Index + 1 != CAT->getSize()) return false; BaseType = CAT->getElementType(); } else if (BaseType->isAnyComplexType()) { const auto *CT = BaseType->castAs(); uint64_t Index = Entry.ArrayIndex; if (Index != 1) return false; BaseType = CT->getElementType(); } else if (auto *FD = getAsField(Entry)) { bool Invalid; if (!IsLastOrInvalidFieldDecl(FD, Invalid)) return Invalid; BaseType = FD->getType(); } else { assert(getAsBaseClass(Entry) && "Expecting cast to a base class"); return false; } } return true; } /// Tests to see if the LValue has a user-specified designator (that isn't /// necessarily valid). Note that this always returns 'true' if the LValue has /// an unsized array as its first designator entry, because there's currently no /// way to tell if the user typed *foo or foo[0]. static bool refersToCompleteObject(const LValue &LVal) { if (LVal.Designator.Invalid) return false; if (!LVal.Designator.Entries.empty()) return LVal.Designator.isMostDerivedAnUnsizedArray(); if (!LVal.InvalidBase) return true; // If `E` is a MemberExpr, then the first part of the designator is hiding in // the LValueBase. const auto *E = LVal.Base.dyn_cast(); return !E || !isa(E); } /// Attempts to detect a user writing into a piece of memory that's impossible /// to figure out the size of by just using types. static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const LValue &LVal) { const SubobjectDesignator &Designator = LVal.Designator; // Notes: // - Users can only write off of the end when we have an invalid base. Invalid // bases imply we don't know where the memory came from. // - We used to be a bit more aggressive here; we'd only be conservative if // the array at the end was flexible, or if it had 0 or 1 elements. This // broke some common standard library extensions (PR30346), but was // otherwise seemingly fine. It may be useful to reintroduce this behavior // with some sort of whitelist. OTOH, it seems that GCC is always // conservative with the last element in structs (if it's an array), so our // current behavior is more compatible than a whitelisting approach would // be. return LVal.InvalidBase && Designator.Entries.size() == Designator.MostDerivedPathLength && Designator.MostDerivedIsArrayElement && isDesignatorAtObjectEnd(Ctx, LVal); } /// Converts the given APInt to CharUnits, assuming the APInt is unsigned. /// Fails if the conversion would cause loss of precision. static bool convertUnsignedAPIntToCharUnits(const llvm::APInt &Int, CharUnits &Result) { auto CharUnitsMax = std::numeric_limits::max(); if (Int.ugt(CharUnitsMax)) return false; Result = CharUnits::fromQuantity(Int.getZExtValue()); return true; } /// Helper for tryEvaluateBuiltinObjectSize -- Given an LValue, this will /// determine how many bytes exist from the beginning of the object to either /// the end of the current subobject, or the end of the object itself, depending /// on what the LValue looks like + the value of Type. /// /// If this returns false, the value of Result is undefined. static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, unsigned Type, const LValue &LVal, CharUnits &EndOffset) { bool DetermineForCompleteObject = refersToCompleteObject(LVal); auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) { if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType()) return false; return HandleSizeof(Info, ExprLoc, Ty, Result); }; // We want to evaluate the size of the entire object. This is a valid fallback // for when Type=1 and the designator is invalid, because we're asked for an // upper-bound. if (!(Type & 1) || LVal.Designator.Invalid || DetermineForCompleteObject) { // Type=3 wants a lower bound, so we can't fall back to this. if (Type == 3 && !DetermineForCompleteObject) return false; llvm::APInt APEndOffset; if (isBaseAnAllocSizeCall(LVal.getLValueBase()) && getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset)) return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset); if (LVal.InvalidBase) return false; QualType BaseTy = getObjectType(LVal.getLValueBase()); return CheckedHandleSizeof(BaseTy, EndOffset); } // We want to evaluate the size of a subobject. const SubobjectDesignator &Designator = LVal.Designator; // The following is a moderately common idiom in C: // // struct Foo { int a; char c[1]; }; // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar)); // strcpy(&F->c[0], Bar); // // In order to not break too much legacy code, we need to support it. if (isUserWritingOffTheEnd(Info.Ctx, LVal)) { // If we can resolve this to an alloc_size call, we can hand that back, // because we know for certain how many bytes there are to write to. llvm::APInt APEndOffset; if (isBaseAnAllocSizeCall(LVal.getLValueBase()) && getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset)) return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset); // If we cannot determine the size of the initial allocation, then we can't // given an accurate upper-bound. However, we are still able to give // conservative lower-bounds for Type=3. if (Type == 1) return false; } CharUnits BytesPerElem; if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem)) return false; // According to the GCC documentation, we want the size of the subobject // denoted by the pointer. But that's not quite right -- what we actually // want is the size of the immediately-enclosing array, if there is one. int64_t ElemsRemaining; if (Designator.MostDerivedIsArrayElement && Designator.Entries.size() == Designator.MostDerivedPathLength) { uint64_t ArraySize = Designator.getMostDerivedArraySize(); uint64_t ArrayIndex = Designator.Entries.back().ArrayIndex; ElemsRemaining = ArraySize <= ArrayIndex ? 0 : ArraySize - ArrayIndex; } else { ElemsRemaining = Designator.isOnePastTheEnd() ? 0 : 1; } EndOffset = LVal.getLValueOffset() + BytesPerElem * ElemsRemaining; return true; } /// \brief Tries to evaluate the __builtin_object_size for @p E. If successful, /// returns true and stores the result in @p Size. /// /// If @p WasError is non-null, this will report whether the failure to evaluate /// is to be treated as an Error in IntExprEvaluator. static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type, EvalInfo &Info, uint64_t &Size) { // Determine the denoted object. LValue LVal; { // The operand of __builtin_object_size is never evaluated for side-effects. // If there are any, but we can determine the pointed-to object anyway, then // ignore the side-effects. SpeculativeEvaluationRAII SpeculativeEval(Info); FoldOffsetRAII Fold(Info); if (E->isGLValue()) { // It's possible for us to be given GLValues if we're called via // Expr::tryEvaluateObjectSize. APValue RVal; if (!EvaluateAsRValue(Info, E, RVal)) return false; LVal.setFrom(Info.Ctx, RVal); - } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), LVal, Info)) + } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), LVal, Info, + /*InvalidBaseOK=*/true)) return false; } // If we point to before the start of the object, there are no accessible // bytes. if (LVal.getLValueOffset().isNegative()) { Size = 0; return true; } CharUnits EndOffset; if (!determineEndOffset(Info, E->getExprLoc(), Type, LVal, EndOffset)) return false; // If we've fallen outside of the end offset, just pretend there's nothing to // write to/read from. if (EndOffset <= LVal.getLValueOffset()) Size = 0; else Size = (EndOffset - LVal.getLValueOffset()).getQuantity(); return true; } bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { if (unsigned BuiltinOp = E->getBuiltinCallee()) return VisitBuiltinCallExpr(E, BuiltinOp); return ExprEvaluatorBaseTy::VisitCallExpr(E); } bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { switch (unsigned BuiltinOp = E->getBuiltinCallee()) { default: return ExprEvaluatorBaseTy::VisitCallExpr(E); case Builtin::BI__builtin_object_size: { // The type was checked when we built the expression. unsigned Type = E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue(); assert(Type <= 3 && "unexpected type"); uint64_t Size; if (tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size)) return Success(Size, E); if (E->getArg(0)->HasSideEffects(Info.Ctx)) return Success((Type & 2) ? 0 : -1, E); // Expression had no side effects, but we couldn't statically determine the // size of the referenced object. switch (Info.EvalMode) { case EvalInfo::EM_ConstantExpression: case EvalInfo::EM_PotentialConstantExpression: case EvalInfo::EM_ConstantFold: case EvalInfo::EM_EvaluateForOverflow: case EvalInfo::EM_IgnoreSideEffects: case EvalInfo::EM_OffsetFold: // Leave it to IR generation. return Error(E); case EvalInfo::EM_ConstantExpressionUnevaluated: case EvalInfo::EM_PotentialConstantExpressionUnevaluated: // Reduce it to a constant now. return Success((Type & 2) ? 0 : -1, E); } llvm_unreachable("unexpected EvalMode"); } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.byteSwap(), E); } case Builtin::BI__builtin_classify_type: return Success(EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E); // FIXME: BI__builtin_clrsb // FIXME: BI__builtin_clrsbl // FIXME: BI__builtin_clrsbll case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: case Builtin::BI__builtin_clzs: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; if (!Val) return Error(E); return Success(Val.countLeadingZeros(), E); } case Builtin::BI__builtin_constant_p: return Success(EvaluateBuiltinConstantP(Info.Ctx, E->getArg(0)), E); case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: case Builtin::BI__builtin_ctzs: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; if (!Val) return Error(E); return Success(Val.countTrailingZeros(), E); } case Builtin::BI__builtin_eh_return_data_regno: { int Operand = E->getArg(0)->EvaluateKnownConstInt(Info.Ctx).getZExtValue(); Operand = Info.Ctx.getTargetInfo().getEHDataRegisterNumber(Operand); return Success(Operand, E); } case Builtin::BI__builtin_expect: return Visit(E->getArg(0)); case Builtin::BI__builtin_ffs: case Builtin::BI__builtin_ffsl: case Builtin::BI__builtin_ffsll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; unsigned N = Val.countTrailingZeros(); return Success(N == Val.getBitWidth() ? 0 : N + 1, E); } case Builtin::BI__builtin_fpclassify: { APFloat Val(0.0); if (!EvaluateFloat(E->getArg(5), Val, Info)) return false; unsigned Arg; switch (Val.getCategory()) { case APFloat::fcNaN: Arg = 0; break; case APFloat::fcInfinity: Arg = 1; break; case APFloat::fcNormal: Arg = Val.isDenormal() ? 3 : 2; break; case APFloat::fcZero: Arg = 4; break; } return Visit(E->getArg(Arg)); } case Builtin::BI__builtin_isinf_sign: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isInfinity() ? (Val.isNegative() ? -1 : 1) : 0, E); } case Builtin::BI__builtin_isinf: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isInfinity() ? 1 : 0, E); } case Builtin::BI__builtin_isfinite: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isFinite() ? 1 : 0, E); } case Builtin::BI__builtin_isnan: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isNaN() ? 1 : 0, E); } case Builtin::BI__builtin_isnormal: { APFloat Val(0.0); return EvaluateFloat(E->getArg(0), Val, Info) && Success(Val.isNormal() ? 1 : 0, E); } case Builtin::BI__builtin_parity: case Builtin::BI__builtin_parityl: case Builtin::BI__builtin_parityll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.countPopulation() % 2, E); } case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: { APSInt Val; if (!EvaluateInteger(E->getArg(0), Val, Info)) return false; return Success(Val.countPopulation(), E); } case Builtin::BIstrlen: case Builtin::BIwcslen: // A call to strlen is not a constant expression. if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); // Fall through. case Builtin::BI__builtin_strlen: case Builtin::BI__builtin_wcslen: { // As an extension, we support __builtin_strlen() as a constant expression, // and support folding strlen() to a constant. LValue String; if (!EvaluatePointer(E->getArg(0), String, Info)) return false; QualType CharTy = E->getArg(0)->getType()->getPointeeType(); // Fast path: if it's a string literal, search the string value. if (const StringLiteral *S = dyn_cast_or_null( String.getLValueBase().dyn_cast())) { // The string literal may have embedded null characters. Find the first // one and truncate there. StringRef Str = S->getBytes(); int64_t Off = String.Offset.getQuantity(); if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() && S->getCharByteWidth() == 1 && // FIXME: Add fast-path for wchar_t too. Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { Str = Str.substr(Off); StringRef::size_type Pos = Str.find(0); if (Pos != StringRef::npos) Str = Str.substr(0, Pos); return Success(Str.size(), E); } // Fall through to slow path to issue appropriate diagnostic. } // Slow path: scan the bytes of the string looking for the terminating 0. for (uint64_t Strlen = 0; /**/; ++Strlen) { APValue Char; if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || !Char.isInt()) return false; if (!Char.getInt()) return Success(Strlen, E); if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1)) return false; } } case Builtin::BIstrcmp: case Builtin::BIwcscmp: case Builtin::BIstrncmp: case Builtin::BIwcsncmp: case Builtin::BImemcmp: case Builtin::BIwmemcmp: // A call to strlen is not a constant expression. if (Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_invalid_function) << /*isConstexpr*/0 << /*isConstructor*/0 << (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'"); else Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr); // Fall through. case Builtin::BI__builtin_strcmp: case Builtin::BI__builtin_wcscmp: case Builtin::BI__builtin_strncmp: case Builtin::BI__builtin_wcsncmp: case Builtin::BI__builtin_memcmp: case Builtin::BI__builtin_wmemcmp: { LValue String1, String2; if (!EvaluatePointer(E->getArg(0), String1, Info) || !EvaluatePointer(E->getArg(1), String2, Info)) return false; QualType CharTy = E->getArg(0)->getType()->getPointeeType(); uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrcmp && BuiltinOp != Builtin::BIwcscmp && BuiltinOp != Builtin::BI__builtin_strcmp && BuiltinOp != Builtin::BI__builtin_wcscmp) { APSInt N; if (!EvaluateInteger(E->getArg(2), N, Info)) return false; MaxLength = N.getExtValue(); } bool StopAtNull = (BuiltinOp != Builtin::BImemcmp && BuiltinOp != Builtin::BIwmemcmp && BuiltinOp != Builtin::BI__builtin_memcmp && BuiltinOp != Builtin::BI__builtin_wmemcmp); for (; MaxLength; --MaxLength) { APValue Char1, Char2; if (!handleLValueToRValueConversion(Info, E, CharTy, String1, Char1) || !handleLValueToRValueConversion(Info, E, CharTy, String2, Char2) || !Char1.isInt() || !Char2.isInt()) return false; if (Char1.getInt() != Char2.getInt()) return Success(Char1.getInt() < Char2.getInt() ? -1 : 1, E); if (StopAtNull && !Char1.getInt()) return Success(0, E); assert(!(StopAtNull && !Char2.getInt())); if (!HandleLValueArrayAdjustment(Info, E, String1, CharTy, 1) || !HandleLValueArrayAdjustment(Info, E, String2, CharTy, 1)) return false; } // We hit the strncmp / memcmp limit. return Success(0, E); } case Builtin::BI__atomic_always_lock_free: case Builtin::BI__atomic_is_lock_free: case Builtin::BI__c11_atomic_is_lock_free: { APSInt SizeVal; if (!EvaluateInteger(E->getArg(0), SizeVal, Info)) return false; // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power // of two less than the maximum inline atomic width, we know it is // lock-free. If the size isn't a power of two, or greater than the // maximum alignment where we promote atomics, we know it is not lock-free // (at least not in the sense of atomic_is_lock_free). Otherwise, // the answer can only be determined at runtime; for example, 16-byte // atomics have lock-free implementations on some, but not all, // x86-64 processors. // Check power-of-two. CharUnits Size = CharUnits::fromQuantity(SizeVal.getZExtValue()); if (Size.isPowerOfTwo()) { // Check against inlining width. unsigned InlineWidthBits = Info.Ctx.getTargetInfo().getMaxAtomicInlineWidth(); if (Size <= Info.Ctx.toCharUnitsFromBits(InlineWidthBits)) { if (BuiltinOp == Builtin::BI__c11_atomic_is_lock_free || Size == CharUnits::One() || E->getArg(1)->isNullPointerConstant(Info.Ctx, Expr::NPC_NeverValueDependent)) // OK, we will inline appropriately-aligned operations of this size, // and _Atomic(T) is appropriately-aligned. return Success(1, E); QualType PointeeType = E->getArg(1)->IgnoreImpCasts()->getType()-> castAs()->getPointeeType(); if (!PointeeType->isIncompleteType() && Info.Ctx.getTypeAlignInChars(PointeeType) >= Size) { // OK, we will inline operations on this object. return Success(1, E); } } } return BuiltinOp == Builtin::BI__atomic_always_lock_free ? Success(0, E) : Error(E); } } } static bool HasSameBase(const LValue &A, const LValue &B) { if (!A.getLValueBase()) return !B.getLValueBase(); if (!B.getLValueBase()) return false; if (A.getLValueBase().getOpaqueValue() != B.getLValueBase().getOpaqueValue()) { const Decl *ADecl = GetLValueBaseDecl(A); if (!ADecl) return false; const Decl *BDecl = GetLValueBaseDecl(B); if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl()) return false; } return IsGlobalLValue(A.getLValueBase()) || A.getLValueCallIndex() == B.getLValueCallIndex(); } /// \brief Determine whether this is a pointer past the end of the complete /// object referred to by the lvalue. static bool isOnePastTheEndOfCompleteObject(const ASTContext &Ctx, const LValue &LV) { // A null pointer can be viewed as being "past the end" but we don't // choose to look at it that way here. if (!LV.getLValueBase()) return false; // If the designator is valid and refers to a subobject, we're not pointing // past the end. if (!LV.getLValueDesignator().Invalid && !LV.getLValueDesignator().isOnePastTheEnd()) return false; // A pointer to an incomplete type might be past-the-end if the type's size is // zero. We cannot tell because the type is incomplete. QualType Ty = getType(LV.getLValueBase()); if (Ty->isIncompleteType()) return true; // We're a past-the-end pointer if we point to the byte after the object, // no matter what our type or path is. auto Size = Ctx.getTypeSizeInChars(Ty); return LV.getLValueOffset() == Size; } namespace { /// \brief Data recursive integer evaluator of certain binary operators. /// /// We use a data recursive algorithm for binary operators so that we are able /// to handle extreme cases of chained binary operators without causing stack /// overflow. class DataRecursiveIntBinOpEvaluator { struct EvalResult { APValue Val; bool Failed; EvalResult() : Failed(false) { } void swap(EvalResult &RHS) { Val.swap(RHS.Val); Failed = RHS.Failed; RHS.Failed = false; } }; struct Job { const Expr *E; EvalResult LHSResult; // meaningful only for binary operator expression. enum { AnyExprKind, BinOpKind, BinOpVisitedLHSKind } Kind; Job() = default; Job(Job &&) = default; void startSpeculativeEval(EvalInfo &Info) { SpecEvalRAII = SpeculativeEvaluationRAII(Info); } private: SpeculativeEvaluationRAII SpecEvalRAII; }; SmallVector Queue; IntExprEvaluator &IntEval; EvalInfo &Info; APValue &FinalResult; public: DataRecursiveIntBinOpEvaluator(IntExprEvaluator &IntEval, APValue &Result) : IntEval(IntEval), Info(IntEval.getEvalInfo()), FinalResult(Result) { } /// \brief True if \param E is a binary operator that we are going to handle /// data recursively. /// We handle binary operators that are comma, logical, or that have operands /// with integral or enumeration type. static bool shouldEnqueue(const BinaryOperator *E) { return E->getOpcode() == BO_Comma || E->isLogicalOp() || (E->isRValue() && E->getType()->isIntegralOrEnumerationType() && E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); } bool Traverse(const BinaryOperator *E) { enqueue(E); EvalResult PrevResult; while (!Queue.empty()) process(PrevResult); if (PrevResult.Failed) return false; FinalResult.swap(PrevResult.Val); return true; } private: bool Success(uint64_t Value, const Expr *E, APValue &Result) { return IntEval.Success(Value, E, Result); } bool Success(const APSInt &Value, const Expr *E, APValue &Result) { return IntEval.Success(Value, E, Result); } bool Error(const Expr *E) { return IntEval.Error(E); } bool Error(const Expr *E, diag::kind D) { return IntEval.Error(E, D); } OptionalDiagnostic CCEDiag(const Expr *E, diag::kind D) { return Info.CCEDiag(E, D); } // \brief Returns true if visiting the RHS is necessary, false otherwise. bool VisitBinOpLHSOnly(EvalResult &LHSResult, const BinaryOperator *E, bool &SuppressRHSDiags); bool VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult, const BinaryOperator *E, APValue &Result); void EvaluateExpr(const Expr *E, EvalResult &Result) { Result.Failed = !Evaluate(Result.Val, Info, E); if (Result.Failed) Result.Val = APValue(); } void process(EvalResult &Result); void enqueue(const Expr *E) { E = E->IgnoreParens(); Queue.resize(Queue.size()+1); Queue.back().E = E; Queue.back().Kind = Job::AnyExprKind; } }; } bool DataRecursiveIntBinOpEvaluator:: VisitBinOpLHSOnly(EvalResult &LHSResult, const BinaryOperator *E, bool &SuppressRHSDiags) { if (E->getOpcode() == BO_Comma) { // Ignore LHS but note if we could not evaluate it. if (LHSResult.Failed) return Info.noteSideEffect(); return true; } if (E->isLogicalOp()) { bool LHSAsBool; if (!LHSResult.Failed && HandleConversionToBool(LHSResult.Val, LHSAsBool)) { // We were able to evaluate the LHS, see if we can get away with not // evaluating the RHS: 0 && X -> 0, 1 || X -> 1 if (LHSAsBool == (E->getOpcode() == BO_LOr)) { Success(LHSAsBool, E, LHSResult.Val); return false; // Ignore RHS } } else { LHSResult.Failed = true; // Since we weren't able to evaluate the left hand side, it // might have had side effects. if (!Info.noteSideEffect()) return false; // We can't evaluate the LHS; however, sometimes the result // is determined by the RHS: X && 0 -> 0, X || 1 -> 1. // Don't ignore RHS and suppress diagnostics from this arm. SuppressRHSDiags = true; } return true; } assert(E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); if (LHSResult.Failed && !Info.noteFailure()) return false; // Ignore RHS; return true; } bool DataRecursiveIntBinOpEvaluator:: VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult, const BinaryOperator *E, APValue &Result) { if (E->getOpcode() == BO_Comma) { if (RHSResult.Failed) return false; Result = RHSResult.Val; return true; } if (E->isLogicalOp()) { bool lhsResult, rhsResult; bool LHSIsOK = HandleConversionToBool(LHSResult.Val, lhsResult); bool RHSIsOK = HandleConversionToBool(RHSResult.Val, rhsResult); if (LHSIsOK) { if (RHSIsOK) { if (E->getOpcode() == BO_LOr) return Success(lhsResult || rhsResult, E, Result); else return Success(lhsResult && rhsResult, E, Result); } } else { if (RHSIsOK) { // We can't evaluate the LHS; however, sometimes the result // is determined by the RHS: X && 0 -> 0, X || 1 -> 1. if (rhsResult == (E->getOpcode() == BO_LOr)) return Success(rhsResult, E, Result); } } return false; } assert(E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); if (LHSResult.Failed || RHSResult.Failed) return false; const APValue &LHSVal = LHSResult.Val; const APValue &RHSVal = RHSResult.Val; // Handle cases like (unsigned long)&a + 4. if (E->isAdditiveOp() && LHSVal.isLValue() && RHSVal.isInt()) { Result = LHSVal; CharUnits AdditionalOffset = CharUnits::fromQuantity(RHSVal.getInt().getZExtValue()); if (E->getOpcode() == BO_Add) Result.getLValueOffset() += AdditionalOffset; else Result.getLValueOffset() -= AdditionalOffset; return true; } // Handle cases like 4 + (unsigned long)&a if (E->getOpcode() == BO_Add && RHSVal.isLValue() && LHSVal.isInt()) { Result = RHSVal; Result.getLValueOffset() += CharUnits::fromQuantity(LHSVal.getInt().getZExtValue()); return true; } if (E->getOpcode() == BO_Sub && LHSVal.isLValue() && RHSVal.isLValue()) { // Handle (intptr_t)&&A - (intptr_t)&&B. if (!LHSVal.getLValueOffset().isZero() || !RHSVal.getLValueOffset().isZero()) return false; const Expr *LHSExpr = LHSVal.getLValueBase().dyn_cast(); const Expr *RHSExpr = RHSVal.getLValueBase().dyn_cast(); if (!LHSExpr || !RHSExpr) return false; const AddrLabelExpr *LHSAddrExpr = dyn_cast(LHSExpr); const AddrLabelExpr *RHSAddrExpr = dyn_cast(RHSExpr); if (!LHSAddrExpr || !RHSAddrExpr) return false; // Make sure both labels come from the same function. if (LHSAddrExpr->getLabel()->getDeclContext() != RHSAddrExpr->getLabel()->getDeclContext()) return false; Result = APValue(LHSAddrExpr, RHSAddrExpr); return true; } // All the remaining cases expect both operands to be an integer if (!LHSVal.isInt() || !RHSVal.isInt()) return Error(E); // Set up the width and signedness manually, in case it can't be deduced // from the operation we're performing. // FIXME: Don't do this in the cases where we can deduce it. APSInt Value(Info.Ctx.getIntWidth(E->getType()), E->getType()->isUnsignedIntegerOrEnumerationType()); if (!handleIntIntBinOp(Info, E, LHSVal.getInt(), E->getOpcode(), RHSVal.getInt(), Value)) return false; return Success(Value, E, Result); } void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) { Job &job = Queue.back(); switch (job.Kind) { case Job::AnyExprKind: { if (const BinaryOperator *Bop = dyn_cast(job.E)) { if (shouldEnqueue(Bop)) { job.Kind = Job::BinOpKind; enqueue(Bop->getLHS()); return; } } EvaluateExpr(job.E, Result); Queue.pop_back(); return; } case Job::BinOpKind: { const BinaryOperator *Bop = cast(job.E); bool SuppressRHSDiags = false; if (!VisitBinOpLHSOnly(Result, Bop, SuppressRHSDiags)) { Queue.pop_back(); return; } if (SuppressRHSDiags) job.startSpeculativeEval(Info); job.LHSResult.swap(Result); job.Kind = Job::BinOpVisitedLHSKind; enqueue(Bop->getRHS()); return; } case Job::BinOpVisitedLHSKind: { const BinaryOperator *Bop = cast(job.E); EvalResult RHS; RHS.swap(Result); Result.Failed = !VisitBinOp(job.LHSResult, RHS, Bop, Result.Val); Queue.pop_back(); return; } } llvm_unreachable("Invalid Job::Kind!"); } namespace { /// Used when we determine that we should fail, but can keep evaluating prior to /// noting that we had a failure. class DelayedNoteFailureRAII { EvalInfo &Info; bool NoteFailure; public: DelayedNoteFailureRAII(EvalInfo &Info, bool NoteFailure = true) : Info(Info), NoteFailure(NoteFailure) {} ~DelayedNoteFailureRAII() { if (NoteFailure) { bool ContinueAfterFailure = Info.noteFailure(); (void)ContinueAfterFailure; assert(ContinueAfterFailure && "Shouldn't have kept evaluating on failure."); } } }; } bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { // We don't call noteFailure immediately because the assignment happens after // we evaluate LHS and RHS. if (!Info.keepEvaluatingAfterFailure() && E->isAssignmentOp()) return Error(E); DelayedNoteFailureRAII MaybeNoteFailureLater(Info, E->isAssignmentOp()); if (DataRecursiveIntBinOpEvaluator::shouldEnqueue(E)) return DataRecursiveIntBinOpEvaluator(*this, Result).Traverse(E); QualType LHSTy = E->getLHS()->getType(); QualType RHSTy = E->getRHS()->getType(); if (LHSTy->isAnyComplexType() || RHSTy->isAnyComplexType()) { ComplexValue LHS, RHS; bool LHSOK; if (E->isAssignmentOp()) { LValue LV; EvaluateLValue(E->getLHS(), LV, Info); LHSOK = false; } else if (LHSTy->isRealFloatingType()) { LHSOK = EvaluateFloat(E->getLHS(), LHS.FloatReal, Info); if (LHSOK) { LHS.makeComplexFloat(); LHS.FloatImag = APFloat(LHS.FloatReal.getSemantics()); } } else { LHSOK = EvaluateComplex(E->getLHS(), LHS, Info); } if (!LHSOK && !Info.noteFailure()) return false; if (E->getRHS()->getType()->isRealFloatingType()) { if (!EvaluateFloat(E->getRHS(), RHS.FloatReal, Info) || !LHSOK) return false; RHS.makeComplexFloat(); RHS.FloatImag = APFloat(RHS.FloatReal.getSemantics()); } else if (!EvaluateComplex(E->getRHS(), RHS, Info) || !LHSOK) return false; if (LHS.isComplexFloat()) { APFloat::cmpResult CR_r = LHS.getComplexFloatReal().compare(RHS.getComplexFloatReal()); APFloat::cmpResult CR_i = LHS.getComplexFloatImag().compare(RHS.getComplexFloatImag()); if (E->getOpcode() == BO_EQ) return Success((CR_r == APFloat::cmpEqual && CR_i == APFloat::cmpEqual), E); else { assert(E->getOpcode() == BO_NE && "Invalid complex comparison."); return Success(((CR_r == APFloat::cmpGreaterThan || CR_r == APFloat::cmpLessThan || CR_r == APFloat::cmpUnordered) || (CR_i == APFloat::cmpGreaterThan || CR_i == APFloat::cmpLessThan || CR_i == APFloat::cmpUnordered)), E); } } else { if (E->getOpcode() == BO_EQ) return Success((LHS.getComplexIntReal() == RHS.getComplexIntReal() && LHS.getComplexIntImag() == RHS.getComplexIntImag()), E); else { assert(E->getOpcode() == BO_NE && "Invalid compex comparison."); return Success((LHS.getComplexIntReal() != RHS.getComplexIntReal() || LHS.getComplexIntImag() != RHS.getComplexIntImag()), E); } } } if (LHSTy->isRealFloatingType() && RHSTy->isRealFloatingType()) { APFloat RHS(0.0), LHS(0.0); bool LHSOK = EvaluateFloat(E->getRHS(), RHS, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluateFloat(E->getLHS(), LHS, Info) || !LHSOK) return false; APFloat::cmpResult CR = LHS.compare(RHS); switch (E->getOpcode()) { default: llvm_unreachable("Invalid binary operator!"); case BO_LT: return Success(CR == APFloat::cmpLessThan, E); case BO_GT: return Success(CR == APFloat::cmpGreaterThan, E); case BO_LE: return Success(CR == APFloat::cmpLessThan || CR == APFloat::cmpEqual, E); case BO_GE: return Success(CR == APFloat::cmpGreaterThan || CR == APFloat::cmpEqual, E); case BO_EQ: return Success(CR == APFloat::cmpEqual, E); case BO_NE: return Success(CR == APFloat::cmpGreaterThan || CR == APFloat::cmpLessThan || CR == APFloat::cmpUnordered, E); } } if (LHSTy->isPointerType() && RHSTy->isPointerType()) { if (E->getOpcode() == BO_Sub || E->isComparisonOp()) { LValue LHSValue, RHSValue; bool LHSOK = EvaluatePointer(E->getLHS(), LHSValue, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK) return false; // Reject differing bases from the normal codepath; we special-case // comparisons to null. if (!HasSameBase(LHSValue, RHSValue)) { if (E->getOpcode() == BO_Sub) { // Handle &&A - &&B. if (!LHSValue.Offset.isZero() || !RHSValue.Offset.isZero()) return Error(E); const Expr *LHSExpr = LHSValue.Base.dyn_cast(); const Expr *RHSExpr = RHSValue.Base.dyn_cast(); if (!LHSExpr || !RHSExpr) return Error(E); const AddrLabelExpr *LHSAddrExpr = dyn_cast(LHSExpr); const AddrLabelExpr *RHSAddrExpr = dyn_cast(RHSExpr); if (!LHSAddrExpr || !RHSAddrExpr) return Error(E); // Make sure both labels come from the same function. if (LHSAddrExpr->getLabel()->getDeclContext() != RHSAddrExpr->getLabel()->getDeclContext()) return Error(E); return Success(APValue(LHSAddrExpr, RHSAddrExpr), E); } // Inequalities and subtractions between unrelated pointers have // unspecified or undefined behavior. if (!E->isEqualityOp()) return Error(E); // A constant address may compare equal to the address of a symbol. // The one exception is that address of an object cannot compare equal // to a null pointer constant. if ((!LHSValue.Base && !LHSValue.Offset.isZero()) || (!RHSValue.Base && !RHSValue.Offset.isZero())) return Error(E); // It's implementation-defined whether distinct literals will have // distinct addresses. In clang, the result of such a comparison is // unspecified, so it is not a constant expression. However, we do know // that the address of a literal will be non-null. if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) && LHSValue.Base && RHSValue.Base) return Error(E); // We can't tell whether weak symbols will end up pointing to the same // object. if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue)) return Error(E); // We can't compare the address of the start of one object with the // past-the-end address of another object, per C++ DR1652. if ((LHSValue.Base && LHSValue.Offset.isZero() && isOnePastTheEndOfCompleteObject(Info.Ctx, RHSValue)) || (RHSValue.Base && RHSValue.Offset.isZero() && isOnePastTheEndOfCompleteObject(Info.Ctx, LHSValue))) return Error(E); // We can't tell whether an object is at the same address as another // zero sized object. if ((RHSValue.Base && isZeroSized(LHSValue)) || (LHSValue.Base && isZeroSized(RHSValue))) return Error(E); // Pointers with different bases cannot represent the same object. // (Note that clang defaults to -fmerge-all-constants, which can // lead to inconsistent results for comparisons involving the address // of a constant; this generally doesn't matter in practice.) return Success(E->getOpcode() == BO_NE, E); } const CharUnits &LHSOffset = LHSValue.getLValueOffset(); const CharUnits &RHSOffset = RHSValue.getLValueOffset(); SubobjectDesignator &LHSDesignator = LHSValue.getLValueDesignator(); SubobjectDesignator &RHSDesignator = RHSValue.getLValueDesignator(); if (E->getOpcode() == BO_Sub) { // C++11 [expr.add]p6: // Unless both pointers point to elements of the same array object, or // one past the last element of the array object, the behavior is // undefined. if (!LHSDesignator.Invalid && !RHSDesignator.Invalid && !AreElementsOfSameArray(getType(LHSValue.Base), LHSDesignator, RHSDesignator)) CCEDiag(E, diag::note_constexpr_pointer_subtraction_not_same_array); QualType Type = E->getLHS()->getType(); QualType ElementType = Type->getAs()->getPointeeType(); CharUnits ElementSize; if (!HandleSizeof(Info, E->getExprLoc(), ElementType, ElementSize)) return false; // As an extension, a type may have zero size (empty struct or union in // C, array of zero length). Pointer subtraction in such cases has // undefined behavior, so is not constant. if (ElementSize.isZero()) { Info.FFDiag(E, diag::note_constexpr_pointer_subtraction_zero_size) << ElementType; return false; } // FIXME: LLVM and GCC both compute LHSOffset - RHSOffset at runtime, // and produce incorrect results when it overflows. Such behavior // appears to be non-conforming, but is common, so perhaps we should // assume the standard intended for such cases to be undefined behavior // and check for them. // Compute (LHSOffset - RHSOffset) / Size carefully, checking for // overflow in the final conversion to ptrdiff_t. APSInt LHS( llvm::APInt(65, (int64_t)LHSOffset.getQuantity(), true), false); APSInt RHS( llvm::APInt(65, (int64_t)RHSOffset.getQuantity(), true), false); APSInt ElemSize( llvm::APInt(65, (int64_t)ElementSize.getQuantity(), true), false); APSInt TrueResult = (LHS - RHS) / ElemSize; APSInt Result = TrueResult.trunc(Info.Ctx.getIntWidth(E->getType())); if (Result.extend(65) != TrueResult && !HandleOverflow(Info, E, TrueResult, E->getType())) return false; return Success(Result, E); } // C++11 [expr.rel]p3: // Pointers to void (after pointer conversions) can be compared, with a // result defined as follows: If both pointers represent the same // address or are both the null pointer value, the result is true if the // operator is <= or >= and false otherwise; otherwise the result is // unspecified. // We interpret this as applying to pointers to *cv* void. if (LHSTy->isVoidPointerType() && LHSOffset != RHSOffset && E->isRelationalOp()) CCEDiag(E, diag::note_constexpr_void_comparison); // C++11 [expr.rel]p2: // - If two pointers point to non-static data members of the same object, // or to subobjects or array elements fo such members, recursively, the // pointer to the later declared member compares greater provided the // two members have the same access control and provided their class is // not a union. // [...] // - Otherwise pointer comparisons are unspecified. if (!LHSDesignator.Invalid && !RHSDesignator.Invalid && E->isRelationalOp()) { bool WasArrayIndex; unsigned Mismatch = FindDesignatorMismatch(getType(LHSValue.Base), LHSDesignator, RHSDesignator, WasArrayIndex); // At the point where the designators diverge, the comparison has a // specified value if: // - we are comparing array indices // - we are comparing fields of a union, or fields with the same access // Otherwise, the result is unspecified and thus the comparison is not a // constant expression. if (!WasArrayIndex && Mismatch < LHSDesignator.Entries.size() && Mismatch < RHSDesignator.Entries.size()) { const FieldDecl *LF = getAsField(LHSDesignator.Entries[Mismatch]); const FieldDecl *RF = getAsField(RHSDesignator.Entries[Mismatch]); if (!LF && !RF) CCEDiag(E, diag::note_constexpr_pointer_comparison_base_classes); else if (!LF) CCEDiag(E, diag::note_constexpr_pointer_comparison_base_field) << getAsBaseClass(LHSDesignator.Entries[Mismatch]) << RF->getParent() << RF; else if (!RF) CCEDiag(E, diag::note_constexpr_pointer_comparison_base_field) << getAsBaseClass(RHSDesignator.Entries[Mismatch]) << LF->getParent() << LF; else if (!LF->getParent()->isUnion() && LF->getAccess() != RF->getAccess()) CCEDiag(E, diag::note_constexpr_pointer_comparison_differing_access) << LF << LF->getAccess() << RF << RF->getAccess() << LF->getParent(); } } // The comparison here must be unsigned, and performed with the same // width as the pointer. unsigned PtrSize = Info.Ctx.getTypeSize(LHSTy); uint64_t CompareLHS = LHSOffset.getQuantity(); uint64_t CompareRHS = RHSOffset.getQuantity(); assert(PtrSize <= 64 && "Unexpected pointer width"); uint64_t Mask = ~0ULL >> (64 - PtrSize); CompareLHS &= Mask; CompareRHS &= Mask; // If there is a base and this is a relational operator, we can only // compare pointers within the object in question; otherwise, the result // depends on where the object is located in memory. if (!LHSValue.Base.isNull() && E->isRelationalOp()) { QualType BaseTy = getType(LHSValue.Base); if (BaseTy->isIncompleteType()) return Error(E); CharUnits Size = Info.Ctx.getTypeSizeInChars(BaseTy); uint64_t OffsetLimit = Size.getQuantity(); if (CompareLHS > OffsetLimit || CompareRHS > OffsetLimit) return Error(E); } switch (E->getOpcode()) { default: llvm_unreachable("missing comparison operator"); case BO_LT: return Success(CompareLHS < CompareRHS, E); case BO_GT: return Success(CompareLHS > CompareRHS, E); case BO_LE: return Success(CompareLHS <= CompareRHS, E); case BO_GE: return Success(CompareLHS >= CompareRHS, E); case BO_EQ: return Success(CompareLHS == CompareRHS, E); case BO_NE: return Success(CompareLHS != CompareRHS, E); } } } if (LHSTy->isMemberPointerType()) { assert(E->isEqualityOp() && "unexpected member pointer operation"); assert(RHSTy->isMemberPointerType() && "invalid comparison"); MemberPtr LHSValue, RHSValue; bool LHSOK = EvaluateMemberPointer(E->getLHS(), LHSValue, Info); if (!LHSOK && !Info.noteFailure()) return false; if (!EvaluateMemberPointer(E->getRHS(), RHSValue, Info) || !LHSOK) return false; // C++11 [expr.eq]p2: // If both operands are null, they compare equal. Otherwise if only one is // null, they compare unequal. if (!LHSValue.getDecl() || !RHSValue.getDecl()) { bool Equal = !LHSValue.getDecl() && !RHSValue.getDecl(); return Success(E->getOpcode() == BO_EQ ? Equal : !Equal, E); } // Otherwise if either is a pointer to a virtual member function, the // result is unspecified. if (const CXXMethodDecl *MD = dyn_cast(LHSValue.getDecl())) if (MD->isVirtual()) CCEDiag(E, diag::note_constexpr_compare_virtual_mem_ptr) << MD; if (const CXXMethodDecl *MD = dyn_cast(RHSValue.getDecl())) if (MD->isVirtual()) CCEDiag(E, diag::note_constexpr_compare_virtual_mem_ptr) << MD; // Otherwise they compare equal if and only if they would refer to the // same member of the same most derived object or the same subobject if // they were dereferenced with a hypothetical object of the associated // class type. bool Equal = LHSValue == RHSValue; return Success(E->getOpcode() == BO_EQ ? Equal : !Equal, E); } if (LHSTy->isNullPtrType()) { assert(E->isComparisonOp() && "unexpected nullptr operation"); assert(RHSTy->isNullPtrType() && "missing pointer conversion"); // C++11 [expr.rel]p4, [expr.eq]p3: If two operands of type std::nullptr_t // are compared, the result is true of the operator is <=, >= or ==, and // false otherwise. BinaryOperator::Opcode Opcode = E->getOpcode(); return Success(Opcode == BO_EQ || Opcode == BO_LE || Opcode == BO_GE, E); } assert((!LHSTy->isIntegralOrEnumerationType() || !RHSTy->isIntegralOrEnumerationType()) && "DataRecursiveIntBinOpEvaluator should have handled integral types"); // We can't continue from here for non-integral types. return ExprEvaluatorBaseTy::VisitBinaryOperator(E); } /// VisitUnaryExprOrTypeTraitExpr - Evaluate a sizeof, alignof or vec_step with /// a result as the expression's type. bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr( const UnaryExprOrTypeTraitExpr *E) { switch(E->getKind()) { case UETT_AlignOf: { if (E->isArgumentType()) return Success(GetAlignOfType(Info, E->getArgumentType()), E); else return Success(GetAlignOfExpr(Info, E->getArgumentExpr()), E); } case UETT_VecStep: { QualType Ty = E->getTypeOfArgument(); if (Ty->isVectorType()) { unsigned n = Ty->castAs()->getNumElements(); // The vec_step built-in functions that take a 3-component // vector return 4. (OpenCL 1.1 spec 6.11.12) if (n == 3) n = 4; return Success(n, E); } else return Success(1, E); } case UETT_SizeOf: { QualType SrcTy = E->getTypeOfArgument(); // C++ [expr.sizeof]p2: "When applied to a reference or a reference type, // the result is the size of the referenced type." if (const ReferenceType *Ref = SrcTy->getAs()) SrcTy = Ref->getPointeeType(); CharUnits Sizeof; if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof)) return false; return Success(Sizeof, E); } case UETT_OpenMPRequiredSimdAlign: assert(E->isArgumentType()); return Success( Info.Ctx.toCharUnitsFromBits( Info.Ctx.getOpenMPDefaultSimdAlign(E->getArgumentType())) .getQuantity(), E); } llvm_unreachable("unknown expr/type trait"); } bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits Result; unsigned n = OOE->getNumComponents(); if (n == 0) return Error(OOE); QualType CurrentType = OOE->getTypeSourceInfo()->getType(); for (unsigned i = 0; i != n; ++i) { OffsetOfNode ON = OOE->getComponent(i); switch (ON.getKind()) { case OffsetOfNode::Array: { const Expr *Idx = OOE->getIndexExpr(ON.getArrayExprIndex()); APSInt IdxResult; if (!EvaluateInteger(Idx, IdxResult, Info)) return false; const ArrayType *AT = Info.Ctx.getAsArrayType(CurrentType); if (!AT) return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); Result += IdxResult.getSExtValue() * ElementSize; break; } case OffsetOfNode::Field: { FieldDecl *MemberDecl = ON.getField(); const RecordType *RT = CurrentType->getAs(); if (!RT) return Error(OOE); RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD); unsigned i = MemberDecl->getFieldIndex(); assert(i < RL.getFieldCount() && "offsetof field in wrong type"); Result += Info.Ctx.toCharUnitsFromBits(RL.getFieldOffset(i)); CurrentType = MemberDecl->getType().getNonReferenceType(); break; } case OffsetOfNode::Identifier: llvm_unreachable("dependent __builtin_offsetof"); case OffsetOfNode::Base: { CXXBaseSpecifier *BaseSpec = ON.getBase(); if (BaseSpec->isVirtual()) return Error(OOE); // Find the layout of the class whose base we are looking into. const RecordType *RT = CurrentType->getAs(); if (!RT) return Error(OOE); RecordDecl *RD = RT->getDecl(); if (RD->isInvalidDecl()) return false; const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD); // Find the base class itself. CurrentType = BaseSpec->getType(); const RecordType *BaseRT = CurrentType->getAs(); if (!BaseRT) return Error(OOE); // Add the offset to the base. Result += RL.getBaseClassOffset(cast(BaseRT->getDecl())); break; } } } return Success(Result, OOE); } bool IntExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { switch (E->getOpcode()) { default: // Address, indirect, pre/post inc/dec, etc are not valid constant exprs. // See C99 6.6p3. return Error(E); case UO_Extension: // FIXME: Should extension allow i-c-e extension expressions in its scope? // If so, we could clear the diagnostic ID. return Visit(E->getSubExpr()); case UO_Plus: // The result is just the value. return Visit(E->getSubExpr()); case UO_Minus: { if (!Visit(E->getSubExpr())) return false; if (!Result.isInt()) return Error(E); const APSInt &Value = Result.getInt(); if (Value.isSigned() && Value.isMinSignedValue() && !HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1), E->getType())) return false; return Success(-Value, E); } case UO_Not: { if (!Visit(E->getSubExpr())) return false; if (!Result.isInt()) return Error(E); return Success(~Result.getInt(), E); } case UO_LNot: { bool bres; if (!EvaluateAsBooleanCondition(E->getSubExpr(), bres, Info)) return false; return Success(!bres, E); } } } /// HandleCast - This is used to evaluate implicit or explicit casts where the /// result type is integer. bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { const Expr *SubExpr = E->getSubExpr(); QualType DestType = E->getType(); QualType SrcType = SubExpr->getType(); switch (E->getCastKind()) { case CK_BaseToDerived: case CK_DerivedToBase: case CK_UncheckedDerivedToBase: case CK_Dynamic: case CK_ToUnion: case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_NullToPointer: case CK_NullToMemberPointer: case CK_BaseToDerivedMemberPointer: case CK_DerivedToBaseMemberPointer: case CK_ReinterpretMemberPointer: case CK_ConstructorConversion: case CK_IntegralToPointer: case CK_ToVoid: case CK_VectorSplat: case CK_IntegralToFloating: case CK_FloatingCast: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: case CK_ObjCObjectLValueCast: case CK_FloatingRealToComplex: case CK_FloatingComplexToReal: case CK_FloatingComplexCast: case CK_FloatingComplexToIntegralComplex: case CK_IntegralRealToComplex: case CK_IntegralComplexCast: case CK_IntegralComplexToFloatingComplex: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLEvent: case CK_ZeroToOCLQueue: case CK_NonAtomicToAtomic: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: llvm_unreachable("invalid cast kind for integral value"); case CK_BitCast: case CK_Dependent: case CK_LValueBitCast: case CK_ARCProduceObject: case CK_ARCConsumeObject: case CK_ARCReclaimReturnedObject: case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: return Error(E); case CK_UserDefinedConversion: case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NoOp: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: case CK_PointerToBoolean: case CK_IntegralToBoolean: case CK_FloatingToBoolean: case CK_BooleanToSignedIntegral: case CK_FloatingComplexToBoolean: case CK_IntegralComplexToBoolean: { bool BoolResult; if (!EvaluateAsBooleanCondition(SubExpr, BoolResult, Info)) return false; uint64_t IntResult = BoolResult; if (BoolResult && E->getCastKind() == CK_BooleanToSignedIntegral) IntResult = (uint64_t)-1; return Success(IntResult, E); } case CK_IntegralCast: { if (!Visit(SubExpr)) return false; if (!Result.isInt()) { // Allow casts of address-of-label differences if they are no-ops // or narrowing. (The narrowing case isn't actually guaranteed to // be constant-evaluatable except in some narrow cases which are hard // to detect here. We let it through on the assumption the user knows // what they are doing.) if (Result.isAddrLabelDiff()) return Info.Ctx.getTypeSize(DestType) <= Info.Ctx.getTypeSize(SrcType); // Only allow casts of lvalues if they are lossless. return Info.Ctx.getTypeSize(DestType) == Info.Ctx.getTypeSize(SrcType); } return Success(HandleIntToIntCast(Info, E, DestType, SrcType, Result.getInt()), E); } case CK_PointerToIntegral: { CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; LValue LV; if (!EvaluatePointer(SubExpr, LV, Info)) return false; if (LV.getLValueBase()) { // Only allow based lvalue casts if they are lossless. // FIXME: Allow a larger integer size than the pointer size, and allow // narrowing back down to pointer width in subsequent integral casts. // FIXME: Check integer type's active bits, not its type size. if (Info.Ctx.getTypeSize(DestType) != Info.Ctx.getTypeSize(SrcType)) return Error(E); LV.Designator.setInvalid(); LV.moveInto(Result); return true; } uint64_t V; if (LV.isNullPointer()) V = Info.Ctx.getTargetNullPointerValue(SrcType); else V = LV.getLValueOffset().getQuantity(); APSInt AsInt = Info.Ctx.MakeIntValue(V, SrcType); return Success(HandleIntToIntCast(Info, E, DestType, SrcType, AsInt), E); } case CK_IntegralComplexToReal: { ComplexValue C; if (!EvaluateComplex(SubExpr, C, Info)) return false; return Success(C.getComplexIntReal(), E); } case CK_FloatingToIntegral: { APFloat F(0.0); if (!EvaluateFloat(SubExpr, F, Info)) return false; APSInt Value; if (!HandleFloatToIntCast(Info, E, SrcType, F, DestType, Value)) return false; return Success(Value, E); } } llvm_unreachable("unknown cast resulting in integral value"); } bool IntExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isAnyComplexType()) { ComplexValue LV; if (!EvaluateComplex(E->getSubExpr(), LV, Info)) return false; if (!LV.isComplexInt()) return Error(E); return Success(LV.getComplexIntReal(), E); } return Visit(E->getSubExpr()); } bool IntExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isComplexIntegerType()) { ComplexValue LV; if (!EvaluateComplex(E->getSubExpr(), LV, Info)) return false; if (!LV.isComplexInt()) return Error(E); return Success(LV.getComplexIntImag(), E); } VisitIgnoredValue(E->getSubExpr()); return Success(0, E); } bool IntExprEvaluator::VisitSizeOfPackExpr(const SizeOfPackExpr *E) { return Success(E->getPackLength(), E); } bool IntExprEvaluator::VisitCXXNoexceptExpr(const CXXNoexceptExpr *E) { return Success(E->getValue(), E); } //===----------------------------------------------------------------------===// // Float Evaluation //===----------------------------------------------------------------------===// namespace { class FloatExprEvaluator : public ExprEvaluatorBase { APFloat &Result; public: FloatExprEvaluator(EvalInfo &info, APFloat &result) : ExprEvaluatorBaseTy(info), Result(result) {} bool Success(const APValue &V, const Expr *e) { Result = V.getFloat(); return true; } bool ZeroInitialization(const Expr *E) { Result = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(E->getType())); return true; } bool VisitCallExpr(const CallExpr *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitFloatingLiteral(const FloatingLiteral *E); bool VisitCastExpr(const CastExpr *E); bool VisitUnaryReal(const UnaryOperator *E); bool VisitUnaryImag(const UnaryOperator *E); // FIXME: Missing: array subscript of vector, member of vector }; } // end anonymous namespace static bool EvaluateFloat(const Expr* E, APFloat& Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRealFloatingType()); return FloatExprEvaluator(Info, Result).Visit(E); } static bool TryEvaluateBuiltinNaN(const ASTContext &Context, QualType ResultTy, const Expr *Arg, bool SNaN, llvm::APFloat &Result) { const StringLiteral *S = dyn_cast(Arg->IgnoreParenCasts()); if (!S) return false; const llvm::fltSemantics &Sem = Context.getFloatTypeSemantics(ResultTy); llvm::APInt fill; // Treat empty strings as if they were zero. if (S->getString().empty()) fill = llvm::APInt(32, 0); else if (S->getString().getAsInteger(0, fill)) return false; if (Context.getTargetInfo().isNan2008()) { if (SNaN) Result = llvm::APFloat::getSNaN(Sem, false, &fill); else Result = llvm::APFloat::getQNaN(Sem, false, &fill); } else { // Prior to IEEE 754-2008, architectures were allowed to choose whether // the first bit of their significand was set for qNaN or sNaN. MIPS chose // a different encoding to what became a standard in 2008, and for pre- // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as // sNaN. This is now known as "legacy NaN" encoding. if (SNaN) Result = llvm::APFloat::getQNaN(Sem, false, &fill); else Result = llvm::APFloat::getSNaN(Sem, false, &fill); } return true; } bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { default: return ExprEvaluatorBaseTy::VisitCallExpr(E); case Builtin::BI__builtin_huge_val: case Builtin::BI__builtin_huge_valf: case Builtin::BI__builtin_huge_vall: case Builtin::BI__builtin_inf: case Builtin::BI__builtin_inff: case Builtin::BI__builtin_infl: { const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(E->getType()); Result = llvm::APFloat::getInf(Sem); return true; } case Builtin::BI__builtin_nans: case Builtin::BI__builtin_nansf: case Builtin::BI__builtin_nansl: if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0), true, Result)) return Error(E); return true; case Builtin::BI__builtin_nan: case Builtin::BI__builtin_nanf: case Builtin::BI__builtin_nanl: // If this is __builtin_nan() turn this into a nan, otherwise we // can't constant fold it. if (!TryEvaluateBuiltinNaN(Info.Ctx, E->getType(), E->getArg(0), false, Result)) return Error(E); return true; case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: case Builtin::BI__builtin_fabsl: if (!EvaluateFloat(E->getArg(0), Result, Info)) return false; if (Result.isNegative()) Result.changeSign(); return true; // FIXME: Builtin::BI__builtin_powi // FIXME: Builtin::BI__builtin_powif // FIXME: Builtin::BI__builtin_powil case Builtin::BI__builtin_copysign: case Builtin::BI__builtin_copysignf: case Builtin::BI__builtin_copysignl: { APFloat RHS(0.); if (!EvaluateFloat(E->getArg(0), Result, Info) || !EvaluateFloat(E->getArg(1), RHS, Info)) return false; Result.copySign(RHS); return true; } } } bool FloatExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isAnyComplexType()) { ComplexValue CV; if (!EvaluateComplex(E->getSubExpr(), CV, Info)) return false; Result = CV.FloatReal; return true; } return Visit(E->getSubExpr()); } bool FloatExprEvaluator::VisitUnaryImag(const UnaryOperator *E) { if (E->getSubExpr()->getType()->isAnyComplexType()) { ComplexValue CV; if (!EvaluateComplex(E->getSubExpr(), CV, Info)) return false; Result = CV.FloatImag; return true; } VisitIgnoredValue(E->getSubExpr()); const llvm::fltSemantics &Sem = Info.Ctx.getFloatTypeSemantics(E->getType()); Result = llvm::APFloat::getZero(Sem); return true; } bool FloatExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { switch (E->getOpcode()) { default: return Error(E); case UO_Plus: return EvaluateFloat(E->getSubExpr(), Result, Info); case UO_Minus: if (!EvaluateFloat(E->getSubExpr(), Result, Info)) return false; Result.changeSign(); return true; } } bool FloatExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); APFloat RHS(0.0); bool LHSOK = EvaluateFloat(E->getLHS(), Result, Info); if (!LHSOK && !Info.noteFailure()) return false; return EvaluateFloat(E->getRHS(), RHS, Info) && LHSOK && handleFloatFloatBinOp(Info, E, Result, E->getOpcode(), RHS); } bool FloatExprEvaluator::VisitFloatingLiteral(const FloatingLiteral *E) { Result = E->getValue(); return true; } bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) { const Expr* SubExpr = E->getSubExpr(); switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_IntegralToFloating: { APSInt IntResult; return EvaluateInteger(SubExpr, IntResult, Info) && HandleIntToFloatCast(Info, E, SubExpr->getType(), IntResult, E->getType(), Result); } case CK_FloatingCast: { if (!Visit(SubExpr)) return false; return HandleFloatToFloatCast(Info, E, SubExpr->getType(), E->getType(), Result); } case CK_FloatingComplexToReal: { ComplexValue V; if (!EvaluateComplex(SubExpr, V, Info)) return false; Result = V.getComplexFloatReal(); return true; } } } //===----------------------------------------------------------------------===// // Complex Evaluation (for float and integer) //===----------------------------------------------------------------------===// namespace { class ComplexExprEvaluator : public ExprEvaluatorBase { ComplexValue &Result; public: ComplexExprEvaluator(EvalInfo &info, ComplexValue &Result) : ExprEvaluatorBaseTy(info), Result(Result) {} bool Success(const APValue &V, const Expr *e) { Result.setFrom(V); return true; } bool ZeroInitialization(const Expr *E); //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// bool VisitImaginaryLiteral(const ImaginaryLiteral *E); bool VisitCastExpr(const CastExpr *E); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitUnaryOperator(const UnaryOperator *E); bool VisitInitListExpr(const InitListExpr *E); }; } // end anonymous namespace static bool EvaluateComplex(const Expr *E, ComplexValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isAnyComplexType()); return ComplexExprEvaluator(Info, Result).Visit(E); } bool ComplexExprEvaluator::ZeroInitialization(const Expr *E) { QualType ElemTy = E->getType()->castAs()->getElementType(); if (ElemTy->isRealFloatingType()) { Result.makeComplexFloat(); APFloat Zero = APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)); Result.FloatReal = Zero; Result.FloatImag = Zero; } else { Result.makeComplexInt(); APSInt Zero = Info.Ctx.MakeIntValue(0, ElemTy); Result.IntReal = Zero; Result.IntImag = Zero; } return true; } bool ComplexExprEvaluator::VisitImaginaryLiteral(const ImaginaryLiteral *E) { const Expr* SubExpr = E->getSubExpr(); if (SubExpr->getType()->isRealFloatingType()) { Result.makeComplexFloat(); APFloat &Imag = Result.FloatImag; if (!EvaluateFloat(SubExpr, Imag, Info)) return false; Result.FloatReal = APFloat(Imag.getSemantics()); return true; } else { assert(SubExpr->getType()->isIntegerType() && "Unexpected imaginary literal."); Result.makeComplexInt(); APSInt &Imag = Result.IntImag; if (!EvaluateInteger(SubExpr, Imag, Info)) return false; Result.IntReal = APSInt(Imag.getBitWidth(), !Imag.isSigned()); return true; } } bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { case CK_BitCast: case CK_BaseToDerived: case CK_DerivedToBase: case CK_UncheckedDerivedToBase: case CK_Dynamic: case CK_ToUnion: case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_NullToPointer: case CK_NullToMemberPointer: case CK_BaseToDerivedMemberPointer: case CK_DerivedToBaseMemberPointer: case CK_MemberPointerToBoolean: case CK_ReinterpretMemberPointer: case CK_ConstructorConversion: case CK_IntegralToPointer: case CK_PointerToIntegral: case CK_PointerToBoolean: case CK_ToVoid: case CK_VectorSplat: case CK_IntegralCast: case CK_BooleanToSignedIntegral: case CK_IntegralToBoolean: case CK_IntegralToFloating: case CK_FloatingToIntegral: case CK_FloatingToBoolean: case CK_FloatingCast: case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: case CK_ObjCObjectLValueCast: case CK_FloatingComplexToReal: case CK_FloatingComplexToBoolean: case CK_IntegralComplexToReal: case CK_IntegralComplexToBoolean: case CK_ARCProduceObject: case CK_ARCConsumeObject: case CK_ARCReclaimReturnedObject: case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLEvent: case CK_ZeroToOCLQueue: case CK_NonAtomicToAtomic: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: llvm_unreachable("invalid cast kind for complex value"); case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NoOp: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_Dependent: case CK_LValueBitCast: case CK_UserDefinedConversion: return Error(E); case CK_FloatingRealToComplex: { APFloat &Real = Result.FloatReal; if (!EvaluateFloat(E->getSubExpr(), Real, Info)) return false; Result.makeComplexFloat(); Result.FloatImag = APFloat(Real.getSemantics()); return true; } case CK_FloatingComplexCast: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->getAs()->getElementType(); QualType From = E->getSubExpr()->getType()->getAs()->getElementType(); return HandleFloatToFloatCast(Info, E, From, To, Result.FloatReal) && HandleFloatToFloatCast(Info, E, From, To, Result.FloatImag); } case CK_FloatingComplexToIntegralComplex: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->getAs()->getElementType(); QualType From = E->getSubExpr()->getType()->getAs()->getElementType(); Result.makeComplexInt(); return HandleFloatToIntCast(Info, E, From, Result.FloatReal, To, Result.IntReal) && HandleFloatToIntCast(Info, E, From, Result.FloatImag, To, Result.IntImag); } case CK_IntegralRealToComplex: { APSInt &Real = Result.IntReal; if (!EvaluateInteger(E->getSubExpr(), Real, Info)) return false; Result.makeComplexInt(); Result.IntImag = APSInt(Real.getBitWidth(), !Real.isSigned()); return true; } case CK_IntegralComplexCast: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->getAs()->getElementType(); QualType From = E->getSubExpr()->getType()->getAs()->getElementType(); Result.IntReal = HandleIntToIntCast(Info, E, To, From, Result.IntReal); Result.IntImag = HandleIntToIntCast(Info, E, To, From, Result.IntImag); return true; } case CK_IntegralComplexToFloatingComplex: { if (!Visit(E->getSubExpr())) return false; QualType To = E->getType()->castAs()->getElementType(); QualType From = E->getSubExpr()->getType()->castAs()->getElementType(); Result.makeComplexFloat(); return HandleIntToFloatCast(Info, E, From, Result.IntReal, To, Result.FloatReal) && HandleIntToFloatCast(Info, E, From, Result.IntImag, To, Result.FloatImag); } } llvm_unreachable("unknown cast resulting in complex value"); } bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (E->isPtrMemOp() || E->isAssignmentOp() || E->getOpcode() == BO_Comma) return ExprEvaluatorBaseTy::VisitBinaryOperator(E); // Track whether the LHS or RHS is real at the type system level. When this is // the case we can simplify our evaluation strategy. bool LHSReal = false, RHSReal = false; bool LHSOK; if (E->getLHS()->getType()->isRealFloatingType()) { LHSReal = true; APFloat &Real = Result.FloatReal; LHSOK = EvaluateFloat(E->getLHS(), Real, Info); if (LHSOK) { Result.makeComplexFloat(); Result.FloatImag = APFloat(Real.getSemantics()); } } else { LHSOK = Visit(E->getLHS()); } if (!LHSOK && !Info.noteFailure()) return false; ComplexValue RHS; if (E->getRHS()->getType()->isRealFloatingType()) { RHSReal = true; APFloat &Real = RHS.FloatReal; if (!EvaluateFloat(E->getRHS(), Real, Info) || !LHSOK) return false; RHS.makeComplexFloat(); RHS.FloatImag = APFloat(Real.getSemantics()); } else if (!EvaluateComplex(E->getRHS(), RHS, Info) || !LHSOK) return false; assert(!(LHSReal && RHSReal) && "Cannot have both operands of a complex operation be real."); switch (E->getOpcode()) { default: return Error(E); case BO_Add: if (Result.isComplexFloat()) { Result.getComplexFloatReal().add(RHS.getComplexFloatReal(), APFloat::rmNearestTiesToEven); if (LHSReal) Result.getComplexFloatImag() = RHS.getComplexFloatImag(); else if (!RHSReal) Result.getComplexFloatImag().add(RHS.getComplexFloatImag(), APFloat::rmNearestTiesToEven); } else { Result.getComplexIntReal() += RHS.getComplexIntReal(); Result.getComplexIntImag() += RHS.getComplexIntImag(); } break; case BO_Sub: if (Result.isComplexFloat()) { Result.getComplexFloatReal().subtract(RHS.getComplexFloatReal(), APFloat::rmNearestTiesToEven); if (LHSReal) { Result.getComplexFloatImag() = RHS.getComplexFloatImag(); Result.getComplexFloatImag().changeSign(); } else if (!RHSReal) { Result.getComplexFloatImag().subtract(RHS.getComplexFloatImag(), APFloat::rmNearestTiesToEven); } } else { Result.getComplexIntReal() -= RHS.getComplexIntReal(); Result.getComplexIntImag() -= RHS.getComplexIntImag(); } break; case BO_Mul: if (Result.isComplexFloat()) { // This is an implementation of complex multiplication according to the // constraints laid out in C11 Annex G. The implemantion uses the // following naming scheme: // (a + ib) * (c + id) ComplexValue LHS = Result; APFloat &A = LHS.getComplexFloatReal(); APFloat &B = LHS.getComplexFloatImag(); APFloat &C = RHS.getComplexFloatReal(); APFloat &D = RHS.getComplexFloatImag(); APFloat &ResR = Result.getComplexFloatReal(); APFloat &ResI = Result.getComplexFloatImag(); if (LHSReal) { assert(!RHSReal && "Cannot have two real operands for a complex op!"); ResR = A * C; ResI = A * D; } else if (RHSReal) { ResR = C * A; ResI = C * B; } else { // In the fully general case, we need to handle NaNs and infinities // robustly. APFloat AC = A * C; APFloat BD = B * D; APFloat AD = A * D; APFloat BC = B * C; ResR = AC - BD; ResI = AD + BC; if (ResR.isNaN() && ResI.isNaN()) { bool Recalc = false; if (A.isInfinity() || B.isInfinity()) { A = APFloat::copySign( APFloat(A.getSemantics(), A.isInfinity() ? 1 : 0), A); B = APFloat::copySign( APFloat(B.getSemantics(), B.isInfinity() ? 1 : 0), B); if (C.isNaN()) C = APFloat::copySign(APFloat(C.getSemantics()), C); if (D.isNaN()) D = APFloat::copySign(APFloat(D.getSemantics()), D); Recalc = true; } if (C.isInfinity() || D.isInfinity()) { C = APFloat::copySign( APFloat(C.getSemantics(), C.isInfinity() ? 1 : 0), C); D = APFloat::copySign( APFloat(D.getSemantics(), D.isInfinity() ? 1 : 0), D); if (A.isNaN()) A = APFloat::copySign(APFloat(A.getSemantics()), A); if (B.isNaN()) B = APFloat::copySign(APFloat(B.getSemantics()), B); Recalc = true; } if (!Recalc && (AC.isInfinity() || BD.isInfinity() || AD.isInfinity() || BC.isInfinity())) { if (A.isNaN()) A = APFloat::copySign(APFloat(A.getSemantics()), A); if (B.isNaN()) B = APFloat::copySign(APFloat(B.getSemantics()), B); if (C.isNaN()) C = APFloat::copySign(APFloat(C.getSemantics()), C); if (D.isNaN()) D = APFloat::copySign(APFloat(D.getSemantics()), D); Recalc = true; } if (Recalc) { ResR = APFloat::getInf(A.getSemantics()) * (A * C - B * D); ResI = APFloat::getInf(A.getSemantics()) * (A * D + B * C); } } } } else { ComplexValue LHS = Result; Result.getComplexIntReal() = (LHS.getComplexIntReal() * RHS.getComplexIntReal() - LHS.getComplexIntImag() * RHS.getComplexIntImag()); Result.getComplexIntImag() = (LHS.getComplexIntReal() * RHS.getComplexIntImag() + LHS.getComplexIntImag() * RHS.getComplexIntReal()); } break; case BO_Div: if (Result.isComplexFloat()) { // This is an implementation of complex division according to the // constraints laid out in C11 Annex G. The implemantion uses the // following naming scheme: // (a + ib) / (c + id) ComplexValue LHS = Result; APFloat &A = LHS.getComplexFloatReal(); APFloat &B = LHS.getComplexFloatImag(); APFloat &C = RHS.getComplexFloatReal(); APFloat &D = RHS.getComplexFloatImag(); APFloat &ResR = Result.getComplexFloatReal(); APFloat &ResI = Result.getComplexFloatImag(); if (RHSReal) { ResR = A / C; ResI = B / C; } else { if (LHSReal) { // No real optimizations we can do here, stub out with zero. B = APFloat::getZero(A.getSemantics()); } int DenomLogB = 0; APFloat MaxCD = maxnum(abs(C), abs(D)); if (MaxCD.isFinite()) { DenomLogB = ilogb(MaxCD); C = scalbn(C, -DenomLogB, APFloat::rmNearestTiesToEven); D = scalbn(D, -DenomLogB, APFloat::rmNearestTiesToEven); } APFloat Denom = C * C + D * D; ResR = scalbn((A * C + B * D) / Denom, -DenomLogB, APFloat::rmNearestTiesToEven); ResI = scalbn((B * C - A * D) / Denom, -DenomLogB, APFloat::rmNearestTiesToEven); if (ResR.isNaN() && ResI.isNaN()) { if (Denom.isPosZero() && (!A.isNaN() || !B.isNaN())) { ResR = APFloat::getInf(ResR.getSemantics(), C.isNegative()) * A; ResI = APFloat::getInf(ResR.getSemantics(), C.isNegative()) * B; } else if ((A.isInfinity() || B.isInfinity()) && C.isFinite() && D.isFinite()) { A = APFloat::copySign( APFloat(A.getSemantics(), A.isInfinity() ? 1 : 0), A); B = APFloat::copySign( APFloat(B.getSemantics(), B.isInfinity() ? 1 : 0), B); ResR = APFloat::getInf(ResR.getSemantics()) * (A * C + B * D); ResI = APFloat::getInf(ResI.getSemantics()) * (B * C - A * D); } else if (MaxCD.isInfinity() && A.isFinite() && B.isFinite()) { C = APFloat::copySign( APFloat(C.getSemantics(), C.isInfinity() ? 1 : 0), C); D = APFloat::copySign( APFloat(D.getSemantics(), D.isInfinity() ? 1 : 0), D); ResR = APFloat::getZero(ResR.getSemantics()) * (A * C + B * D); ResI = APFloat::getZero(ResI.getSemantics()) * (B * C - A * D); } } } } else { if (RHS.getComplexIntReal() == 0 && RHS.getComplexIntImag() == 0) return Error(E, diag::note_expr_divide_by_zero); ComplexValue LHS = Result; APSInt Den = RHS.getComplexIntReal() * RHS.getComplexIntReal() + RHS.getComplexIntImag() * RHS.getComplexIntImag(); Result.getComplexIntReal() = (LHS.getComplexIntReal() * RHS.getComplexIntReal() + LHS.getComplexIntImag() * RHS.getComplexIntImag()) / Den; Result.getComplexIntImag() = (LHS.getComplexIntImag() * RHS.getComplexIntReal() - LHS.getComplexIntReal() * RHS.getComplexIntImag()) / Den; } break; } return true; } bool ComplexExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { // Get the operand value into 'Result'. if (!Visit(E->getSubExpr())) return false; switch (E->getOpcode()) { default: return Error(E); case UO_Extension: return true; case UO_Plus: // The result is always just the subexpr. return true; case UO_Minus: if (Result.isComplexFloat()) { Result.getComplexFloatReal().changeSign(); Result.getComplexFloatImag().changeSign(); } else { Result.getComplexIntReal() = -Result.getComplexIntReal(); Result.getComplexIntImag() = -Result.getComplexIntImag(); } return true; case UO_Not: if (Result.isComplexFloat()) Result.getComplexFloatImag().changeSign(); else Result.getComplexIntImag() = -Result.getComplexIntImag(); return true; } } bool ComplexExprEvaluator::VisitInitListExpr(const InitListExpr *E) { if (E->getNumInits() == 2) { if (E->getType()->isComplexType()) { Result.makeComplexFloat(); if (!EvaluateFloat(E->getInit(0), Result.FloatReal, Info)) return false; if (!EvaluateFloat(E->getInit(1), Result.FloatImag, Info)) return false; } else { Result.makeComplexInt(); if (!EvaluateInteger(E->getInit(0), Result.IntReal, Info)) return false; if (!EvaluateInteger(E->getInit(1), Result.IntImag, Info)) return false; } return true; } return ExprEvaluatorBaseTy::VisitInitListExpr(E); } //===----------------------------------------------------------------------===// // Atomic expression evaluation, essentially just handling the NonAtomicToAtomic // implicit conversion. //===----------------------------------------------------------------------===// namespace { class AtomicExprEvaluator : public ExprEvaluatorBase { APValue &Result; public: AtomicExprEvaluator(EvalInfo &Info, APValue &Result) : ExprEvaluatorBaseTy(Info), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result = V; return true; } bool ZeroInitialization(const Expr *E) { ImplicitValueInitExpr VIE( E->getType()->castAs()->getValueType()); return Evaluate(Result, Info, &VIE); } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_NonAtomicToAtomic: return Evaluate(Result, Info, E->getSubExpr()); } } }; } // end anonymous namespace static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isAtomicType()); return AtomicExprEvaluator(Info, Result).Visit(E); } //===----------------------------------------------------------------------===// // Void expression evaluation, primarily for a cast to void on the LHS of a // comma operator //===----------------------------------------------------------------------===// namespace { class VoidExprEvaluator : public ExprEvaluatorBase { public: VoidExprEvaluator(EvalInfo &Info) : ExprEvaluatorBaseTy(Info) {} bool Success(const APValue &V, const Expr *e) { return true; } bool VisitCastExpr(const CastExpr *E) { switch (E->getCastKind()) { default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_ToVoid: VisitIgnoredValue(E->getSubExpr()); return true; } } bool VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { default: return ExprEvaluatorBaseTy::VisitCallExpr(E); case Builtin::BI__assume: case Builtin::BI__builtin_assume: // The argument is not evaluated! return true; } } }; } // end anonymous namespace static bool EvaluateVoid(const Expr *E, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isVoidType()); return VoidExprEvaluator(Info).Visit(E); } //===----------------------------------------------------------------------===// // Top level Expr::EvaluateAsRValue method. //===----------------------------------------------------------------------===// static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) { // In C, function designators are not lvalues, but we evaluate them as if they // are. QualType T = E->getType(); if (E->isGLValue() || T->isFunctionType()) { LValue LV; if (!EvaluateLValue(E, LV, Info)) return false; LV.moveInto(Result); } else if (T->isVectorType()) { if (!EvaluateVector(E, Result, Info)) return false; } else if (T->isIntegralOrEnumerationType()) { if (!IntExprEvaluator(Info, Result).Visit(E)) return false; } else if (T->hasPointerRepresentation()) { LValue LV; if (!EvaluatePointer(E, LV, Info)) return false; LV.moveInto(Result); } else if (T->isRealFloatingType()) { llvm::APFloat F(0.0); if (!EvaluateFloat(E, F, Info)) return false; Result = APValue(F); } else if (T->isAnyComplexType()) { ComplexValue C; if (!EvaluateComplex(E, C, Info)) return false; C.moveInto(Result); } else if (T->isMemberPointerType()) { MemberPtr P; if (!EvaluateMemberPointer(E, P, Info)) return false; P.moveInto(Result); return true; } else if (T->isArrayType()) { LValue LV; LV.set(E, Info.CurrentCall->Index); APValue &Value = Info.CurrentCall->createTemporary(E, false); if (!EvaluateArray(E, LV, Value, Info)) return false; Result = Value; } else if (T->isRecordType()) { LValue LV; LV.set(E, Info.CurrentCall->Index); APValue &Value = Info.CurrentCall->createTemporary(E, false); if (!EvaluateRecord(E, LV, Value, Info)) return false; Result = Value; } else if (T->isVoidType()) { if (!Info.getLangOpts().CPlusPlus11) Info.CCEDiag(E, diag::note_constexpr_nonliteral) << E->getType(); if (!EvaluateVoid(E, Info)) return false; } else if (T->isAtomicType()) { if (!EvaluateAtomic(E, Result, Info)) return false; } else if (Info.getLangOpts().CPlusPlus11) { Info.FFDiag(E, diag::note_constexpr_nonliteral) << E->getType(); return false; } else { Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr); return false; } return true; } /// EvaluateInPlace - Evaluate an expression in-place in an APValue. In some /// cases, the in-place evaluation is essential, since later initializers for /// an object can indirectly refer to subobjects which were initialized earlier. static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, const Expr *E, bool AllowNonLiteralTypes) { assert(!E->isValueDependent()); if (!AllowNonLiteralTypes && !CheckLiteralType(Info, E, &This)) return false; if (E->isRValue()) { // Evaluate arrays and record types in-place, so that later initializers can // refer to earlier-initialized members of the object. if (E->getType()->isArrayType()) return EvaluateArray(E, This, Result, Info); else if (E->getType()->isRecordType()) return EvaluateRecord(E, This, Result, Info); } // For any other type, in-place evaluation is unimportant. return Evaluate(Result, Info, E); } /// EvaluateAsRValue - Try to evaluate this expression, performing an implicit /// lvalue-to-rvalue cast if it is an lvalue. static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { if (E->getType().isNull()) return false; if (!CheckLiteralType(Info, E)) return false; if (!::Evaluate(Result, Info, E)) return false; if (E->isGLValue()) { LValue LV; LV.setFrom(Info.Ctx, Result); if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) return false; } // Check this core constant expression is a constant expression. return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result); } static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, const ASTContext &Ctx, bool &IsConst) { // Fast-path evaluations of integer literals, since we sometimes see files // containing vast quantities of these. if (const IntegerLiteral *L = dyn_cast(Exp)) { Result.Val = APValue(APSInt(L->getValue(), L->getType()->isUnsignedIntegerType())); IsConst = true; return true; } // This case should be rare, but we need to check it before we check on // the type below. if (Exp->getType().isNull()) { IsConst = false; return true; } // FIXME: Evaluating values of large array and record types can cause // performance problems. Only do so in C++11 for now. if (Exp->isRValue() && (Exp->getType()->isArrayType() || Exp->getType()->isRecordType()) && !Ctx.getLangOpts().CPlusPlus11) { IsConst = false; return true; } return false; } /// EvaluateAsRValue - Return true if this is a constant which we can fold using /// any crazy technique (that has nothing to do with language standards) that /// we want to. If this function returns true, it returns the folded constant /// in Result. If this expression is a glvalue, an lvalue-to-rvalue conversion /// will be applied to the result. bool Expr::EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const { bool IsConst; if (FastEvaluateAsRValue(this, Result, Ctx, IsConst)) return IsConst; EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); return ::EvaluateAsRValue(Info, this, Result.Val); } bool Expr::EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx) const { EvalResult Scratch; return EvaluateAsRValue(Scratch, Ctx) && HandleConversionToBool(Scratch.Val, Result); } static bool hasUnacceptableSideEffect(Expr::EvalStatus &Result, Expr::SideEffectsKind SEK) { return (SEK < Expr::SE_AllowSideEffects && Result.HasSideEffects) || (SEK < Expr::SE_AllowUndefinedBehavior && Result.HasUndefinedBehavior); } bool Expr::EvaluateAsInt(APSInt &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects) const { if (!getType()->isIntegralOrEnumerationType()) return false; EvalResult ExprResult; if (!EvaluateAsRValue(ExprResult, Ctx) || !ExprResult.Val.isInt() || hasUnacceptableSideEffect(ExprResult, AllowSideEffects)) return false; Result = ExprResult.Val.getInt(); return true; } bool Expr::EvaluateAsFloat(APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects) const { if (!getType()->isRealFloatingType()) return false; EvalResult ExprResult; if (!EvaluateAsRValue(ExprResult, Ctx) || !ExprResult.Val.isFloat() || hasUnacceptableSideEffect(ExprResult, AllowSideEffects)) return false; Result = ExprResult.Val.getFloat(); return true; } bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx) const { EvalInfo Info(Ctx, Result, EvalInfo::EM_ConstantFold); LValue LV; if (!EvaluateLValue(this, LV, Info) || Result.HasSideEffects || !CheckLValueConstantExpression(Info, getExprLoc(), Ctx.getLValueReferenceType(getType()), LV)) return false; LV.moveInto(Result.Val); return true; } bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, const VarDecl *VD, SmallVectorImpl &Notes) const { // FIXME: Evaluating initializers for large array and record types can cause // performance problems. Only do so in C++11 for now. if (isRValue() && (getType()->isArrayType() || getType()->isRecordType()) && !Ctx.getLangOpts().CPlusPlus11) return false; Expr::EvalStatus EStatus; EStatus.Diag = &Notes; EvalInfo InitInfo(Ctx, EStatus, VD->isConstexpr() ? EvalInfo::EM_ConstantExpression : EvalInfo::EM_ConstantFold); InitInfo.setEvaluatingDecl(VD, Value); LValue LVal; LVal.set(VD); // C++11 [basic.start.init]p2: // Variables with static storage duration or thread storage duration shall be // zero-initialized before any other initialization takes place. // This behavior is not present in C. if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() && !VD->getType()->isReferenceType()) { ImplicitValueInitExpr VIE(VD->getType()); if (!EvaluateInPlace(Value, InitInfo, LVal, &VIE, /*AllowNonLiteralTypes=*/true)) return false; } if (!EvaluateInPlace(Value, InitInfo, LVal, this, /*AllowNonLiteralTypes=*/true) || EStatus.HasSideEffects) return false; return CheckConstantExpression(InitInfo, VD->getLocation(), VD->getType(), Value); } /// isEvaluatable - Call EvaluateAsRValue to see if this expression can be /// constant folded, but discard the result. bool Expr::isEvaluatable(const ASTContext &Ctx, SideEffectsKind SEK) const { EvalResult Result; return EvaluateAsRValue(Result, Ctx) && !hasUnacceptableSideEffect(Result, SEK); } APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl *Diag) const { EvalResult EvalResult; EvalResult.Diag = Diag; bool Result = EvaluateAsRValue(EvalResult, Ctx); (void)Result; assert(Result && "Could not evaluate expression"); assert(EvalResult.Val.isInt() && "Expression did not evaluate to integer"); return EvalResult.Val.getInt(); } void Expr::EvaluateForOverflow(const ASTContext &Ctx) const { bool IsConst; EvalResult EvalResult; if (!FastEvaluateAsRValue(this, EvalResult, Ctx, IsConst)) { EvalInfo Info(Ctx, EvalResult, EvalInfo::EM_EvaluateForOverflow); (void)::EvaluateAsRValue(Info, this, EvalResult.Val); } } bool Expr::EvalResult::isGlobalLValue() const { assert(Val.isLValue()); return IsGlobalLValue(Val.getLValueBase()); } /// isIntegerConstantExpr - this recursive routine will test if an expression is /// an integer constant expression. /// FIXME: Pass up a reason why! Invalid operation in i-c-e, division by zero, /// comma, etc // CheckICE - This function does the fundamental ICE checking: the returned // ICEDiag contains an ICEKind indicating whether the expression is an ICE, // and a (possibly null) SourceLocation indicating the location of the problem. // // Note that to reduce code duplication, this helper does no evaluation // itself; the caller checks whether the expression is evaluatable, and // in the rare cases where CheckICE actually cares about the evaluated // value, it calls into Evalute. namespace { enum ICEKind { /// This expression is an ICE. IK_ICE, /// This expression is not an ICE, but if it isn't evaluated, it's /// a legal subexpression for an ICE. This return value is used to handle /// the comma operator in C99 mode, and non-constant subexpressions. IK_ICEIfUnevaluated, /// This expression is not an ICE, and is not a legal subexpression for one. IK_NotICE }; struct ICEDiag { ICEKind Kind; SourceLocation Loc; ICEDiag(ICEKind IK, SourceLocation l) : Kind(IK), Loc(l) {} }; } static ICEDiag NoDiag() { return ICEDiag(IK_ICE, SourceLocation()); } static ICEDiag Worst(ICEDiag A, ICEDiag B) { return A.Kind >= B.Kind ? A : B; } static ICEDiag CheckEvalInICE(const Expr* E, const ASTContext &Ctx) { Expr::EvalResult EVResult; if (!E->EvaluateAsRValue(EVResult, Ctx) || EVResult.HasSideEffects || !EVResult.Val.isInt()) return ICEDiag(IK_NotICE, E->getLocStart()); return NoDiag(); } static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { assert(!E->isValueDependent() && "Should not see value dependent exprs!"); if (!E->getType()->isIntegralOrEnumerationType()) return ICEDiag(IK_NotICE, E->getLocStart()); switch (E->getStmtClass()) { #define ABSTRACT_STMT(Node) #define STMT(Node, Base) case Expr::Node##Class: #define EXPR(Node, Base) #include "clang/AST/StmtNodes.inc" case Expr::PredefinedExprClass: case Expr::FloatingLiteralClass: case Expr::ImaginaryLiteralClass: case Expr::StringLiteralClass: case Expr::ArraySubscriptExprClass: case Expr::OMPArraySectionExprClass: case Expr::MemberExprClass: case Expr::CompoundAssignOperatorClass: case Expr::CompoundLiteralExprClass: case Expr::ExtVectorElementExprClass: case Expr::DesignatedInitExprClass: case Expr::ArrayInitLoopExprClass: case Expr::ArrayInitIndexExprClass: case Expr::NoInitExprClass: case Expr::DesignatedInitUpdateExprClass: case Expr::ImplicitValueInitExprClass: case Expr::ParenListExprClass: case Expr::VAArgExprClass: case Expr::AddrLabelExprClass: case Expr::StmtExprClass: case Expr::CXXMemberCallExprClass: case Expr::CUDAKernelCallExprClass: case Expr::CXXDynamicCastExprClass: case Expr::CXXTypeidExprClass: case Expr::CXXUuidofExprClass: case Expr::MSPropertyRefExprClass: case Expr::MSPropertySubscriptExprClass: case Expr::CXXNullPtrLiteralExprClass: case Expr::UserDefinedLiteralClass: case Expr::CXXThisExprClass: case Expr::CXXThrowExprClass: case Expr::CXXNewExprClass: case Expr::CXXDeleteExprClass: case Expr::CXXPseudoDestructorExprClass: case Expr::UnresolvedLookupExprClass: case Expr::TypoExprClass: case Expr::DependentScopeDeclRefExprClass: case Expr::CXXConstructExprClass: case Expr::CXXInheritedCtorInitExprClass: case Expr::CXXStdInitializerListExprClass: case Expr::CXXBindTemporaryExprClass: case Expr::ExprWithCleanupsClass: case Expr::CXXTemporaryObjectExprClass: case Expr::CXXUnresolvedConstructExprClass: case Expr::CXXDependentScopeMemberExprClass: case Expr::UnresolvedMemberExprClass: case Expr::ObjCStringLiteralClass: case Expr::ObjCBoxedExprClass: case Expr::ObjCArrayLiteralClass: case Expr::ObjCDictionaryLiteralClass: case Expr::ObjCEncodeExprClass: case Expr::ObjCMessageExprClass: case Expr::ObjCSelectorExprClass: case Expr::ObjCProtocolExprClass: case Expr::ObjCIvarRefExprClass: case Expr::ObjCPropertyRefExprClass: case Expr::ObjCSubscriptRefExprClass: case Expr::ObjCIsaExprClass: case Expr::ObjCAvailabilityCheckExprClass: case Expr::ShuffleVectorExprClass: case Expr::ConvertVectorExprClass: case Expr::BlockExprClass: case Expr::NoStmtClass: case Expr::OpaqueValueExprClass: case Expr::PackExpansionExprClass: case Expr::SubstNonTypeTemplateParmPackExprClass: case Expr::FunctionParmPackExprClass: case Expr::AsTypeExprClass: case Expr::ObjCIndirectCopyRestoreExprClass: case Expr::MaterializeTemporaryExprClass: case Expr::PseudoObjectExprClass: case Expr::AtomicExprClass: case Expr::LambdaExprClass: case Expr::CXXFoldExprClass: case Expr::CoawaitExprClass: case Expr::CoyieldExprClass: return ICEDiag(IK_NotICE, E->getLocStart()); case Expr::InitListExprClass: { // C++03 [dcl.init]p13: If T is a scalar type, then a declaration of the // form "T x = { a };" is equivalent to "T x = a;". // Unless we're initializing a reference, T is a scalar as it is known to be // of integral or enumeration type. if (E->isRValue()) if (cast(E)->getNumInits() == 1) return CheckICE(cast(E)->getInit(0), Ctx); return ICEDiag(IK_NotICE, E->getLocStart()); } case Expr::SizeOfPackExprClass: case Expr::GNUNullExprClass: // GCC considers the GNU __null value to be an integral constant expression. return NoDiag(); case Expr::SubstNonTypeTemplateParmExprClass: return CheckICE(cast(E)->getReplacement(), Ctx); case Expr::ParenExprClass: return CheckICE(cast(E)->getSubExpr(), Ctx); case Expr::GenericSelectionExprClass: return CheckICE(cast(E)->getResultExpr(), Ctx); case Expr::IntegerLiteralClass: case Expr::CharacterLiteralClass: case Expr::ObjCBoolLiteralExprClass: case Expr::CXXBoolLiteralExprClass: case Expr::CXXScalarValueInitExprClass: case Expr::TypeTraitExprClass: case Expr::ArrayTypeTraitExprClass: case Expr::ExpressionTraitExprClass: case Expr::CXXNoexceptExprClass: return NoDiag(); case Expr::CallExprClass: case Expr::CXXOperatorCallExprClass: { // C99 6.6/3 allows function calls within unevaluated subexpressions of // constant expressions, but they can never be ICEs because an ICE cannot // contain an operand of (pointer to) function type. const CallExpr *CE = cast(E); if (CE->getBuiltinCallee()) return CheckEvalInICE(E, Ctx); return ICEDiag(IK_NotICE, E->getLocStart()); } case Expr::DeclRefExprClass: { if (isa(cast(E)->getDecl())) return NoDiag(); const ValueDecl *D = dyn_cast(cast(E)->getDecl()); if (Ctx.getLangOpts().CPlusPlus && D && IsConstNonVolatile(D->getType())) { // Parameter variables are never constants. Without this check, // getAnyInitializer() can find a default argument, which leads // to chaos. if (isa(D)) return ICEDiag(IK_NotICE, cast(E)->getLocation()); // C++ 7.1.5.1p2 // A variable of non-volatile const-qualified integral or enumeration // type initialized by an ICE can be used in ICEs. if (const VarDecl *Dcl = dyn_cast(D)) { if (!Dcl->getType()->isIntegralOrEnumerationType()) return ICEDiag(IK_NotICE, cast(E)->getLocation()); const VarDecl *VD; // Look for a declaration of this variable that has an initializer, and // check whether it is an ICE. if (Dcl->getAnyInitializer(VD) && VD->checkInitIsICE()) return NoDiag(); else return ICEDiag(IK_NotICE, cast(E)->getLocation()); } } return ICEDiag(IK_NotICE, E->getLocStart()); } case Expr::UnaryOperatorClass: { const UnaryOperator *Exp = cast(E); switch (Exp->getOpcode()) { case UO_PostInc: case UO_PostDec: case UO_PreInc: case UO_PreDec: case UO_AddrOf: case UO_Deref: case UO_Coawait: // C99 6.6/3 allows increment and decrement within unevaluated // subexpressions of constant expressions, but they can never be ICEs // because an ICE cannot contain an lvalue operand. return ICEDiag(IK_NotICE, E->getLocStart()); case UO_Extension: case UO_LNot: case UO_Plus: case UO_Minus: case UO_Not: case UO_Real: case UO_Imag: return CheckICE(Exp->getSubExpr(), Ctx); } // OffsetOf falls through here. } case Expr::OffsetOfExprClass: { // Note that per C99, offsetof must be an ICE. And AFAIK, using // EvaluateAsRValue matches the proposed gcc behavior for cases like // "offsetof(struct s{int x[4];}, x[1.0])". This doesn't affect // compliance: we should warn earlier for offsetof expressions with // array subscripts that aren't ICEs, and if the array subscripts // are ICEs, the value of the offsetof must be an integer constant. return CheckEvalInICE(E, Ctx); } case Expr::UnaryExprOrTypeTraitExprClass: { const UnaryExprOrTypeTraitExpr *Exp = cast(E); if ((Exp->getKind() == UETT_SizeOf) && Exp->getTypeOfArgument()->isVariableArrayType()) return ICEDiag(IK_NotICE, E->getLocStart()); return NoDiag(); } case Expr::BinaryOperatorClass: { const BinaryOperator *Exp = cast(E); switch (Exp->getOpcode()) { case BO_PtrMemD: case BO_PtrMemI: case BO_Assign: case BO_MulAssign: case BO_DivAssign: case BO_RemAssign: case BO_AddAssign: case BO_SubAssign: case BO_ShlAssign: case BO_ShrAssign: case BO_AndAssign: case BO_XorAssign: case BO_OrAssign: // C99 6.6/3 allows assignments within unevaluated subexpressions of // constant expressions, but they can never be ICEs because an ICE cannot // contain an lvalue operand. return ICEDiag(IK_NotICE, E->getLocStart()); case BO_Mul: case BO_Div: case BO_Rem: case BO_Add: case BO_Sub: case BO_Shl: case BO_Shr: case BO_LT: case BO_GT: case BO_LE: case BO_GE: case BO_EQ: case BO_NE: case BO_And: case BO_Xor: case BO_Or: case BO_Comma: { ICEDiag LHSResult = CheckICE(Exp->getLHS(), Ctx); ICEDiag RHSResult = CheckICE(Exp->getRHS(), Ctx); if (Exp->getOpcode() == BO_Div || Exp->getOpcode() == BO_Rem) { // EvaluateAsRValue gives an error for undefined Div/Rem, so make sure // we don't evaluate one. if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICE) { llvm::APSInt REval = Exp->getRHS()->EvaluateKnownConstInt(Ctx); if (REval == 0) return ICEDiag(IK_ICEIfUnevaluated, E->getLocStart()); if (REval.isSigned() && REval.isAllOnesValue()) { llvm::APSInt LEval = Exp->getLHS()->EvaluateKnownConstInt(Ctx); if (LEval.isMinSignedValue()) return ICEDiag(IK_ICEIfUnevaluated, E->getLocStart()); } } } if (Exp->getOpcode() == BO_Comma) { if (Ctx.getLangOpts().C99) { // C99 6.6p3 introduces a strange edge case: comma can be in an ICE // if it isn't evaluated. if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICE) return ICEDiag(IK_ICEIfUnevaluated, E->getLocStart()); } else { // In both C89 and C++, commas in ICEs are illegal. return ICEDiag(IK_NotICE, E->getLocStart()); } } return Worst(LHSResult, RHSResult); } case BO_LAnd: case BO_LOr: { ICEDiag LHSResult = CheckICE(Exp->getLHS(), Ctx); ICEDiag RHSResult = CheckICE(Exp->getRHS(), Ctx); if (LHSResult.Kind == IK_ICE && RHSResult.Kind == IK_ICEIfUnevaluated) { // Rare case where the RHS has a comma "side-effect"; we need // to actually check the condition to see whether the side // with the comma is evaluated. if ((Exp->getOpcode() == BO_LAnd) != (Exp->getLHS()->EvaluateKnownConstInt(Ctx) == 0)) return RHSResult; return NoDiag(); } return Worst(LHSResult, RHSResult); } } } case Expr::ImplicitCastExprClass: case Expr::CStyleCastExprClass: case Expr::CXXFunctionalCastExprClass: case Expr::CXXStaticCastExprClass: case Expr::CXXReinterpretCastExprClass: case Expr::CXXConstCastExprClass: case Expr::ObjCBridgedCastExprClass: { const Expr *SubExpr = cast(E)->getSubExpr(); if (isa(E)) { if (const FloatingLiteral *FL = dyn_cast(SubExpr->IgnoreParenImpCasts())) { unsigned DestWidth = Ctx.getIntWidth(E->getType()); bool DestSigned = E->getType()->isSignedIntegerOrEnumerationType(); APSInt IgnoredVal(DestWidth, !DestSigned); bool Ignored; // If the value does not fit in the destination type, the behavior is // undefined, so we are not required to treat it as a constant // expression. if (FL->getValue().convertToInteger(IgnoredVal, llvm::APFloat::rmTowardZero, &Ignored) & APFloat::opInvalidOp) return ICEDiag(IK_NotICE, E->getLocStart()); return NoDiag(); } } switch (cast(E)->getCastKind()) { case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: case CK_NoOp: case CK_IntegralToBoolean: case CK_IntegralCast: return CheckICE(SubExpr, Ctx); default: return ICEDiag(IK_NotICE, E->getLocStart()); } } case Expr::BinaryConditionalOperatorClass: { const BinaryConditionalOperator *Exp = cast(E); ICEDiag CommonResult = CheckICE(Exp->getCommon(), Ctx); if (CommonResult.Kind == IK_NotICE) return CommonResult; ICEDiag FalseResult = CheckICE(Exp->getFalseExpr(), Ctx); if (FalseResult.Kind == IK_NotICE) return FalseResult; if (CommonResult.Kind == IK_ICEIfUnevaluated) return CommonResult; if (FalseResult.Kind == IK_ICEIfUnevaluated && Exp->getCommon()->EvaluateKnownConstInt(Ctx) != 0) return NoDiag(); return FalseResult; } case Expr::ConditionalOperatorClass: { const ConditionalOperator *Exp = cast(E); // If the condition (ignoring parens) is a __builtin_constant_p call, // then only the true side is actually considered in an integer constant // expression, and it is fully evaluated. This is an important GNU // extension. See GCC PR38377 for discussion. if (const CallExpr *CallCE = dyn_cast(Exp->getCond()->IgnoreParenCasts())) if (CallCE->getBuiltinCallee() == Builtin::BI__builtin_constant_p) return CheckEvalInICE(E, Ctx); ICEDiag CondResult = CheckICE(Exp->getCond(), Ctx); if (CondResult.Kind == IK_NotICE) return CondResult; ICEDiag TrueResult = CheckICE(Exp->getTrueExpr(), Ctx); ICEDiag FalseResult = CheckICE(Exp->getFalseExpr(), Ctx); if (TrueResult.Kind == IK_NotICE) return TrueResult; if (FalseResult.Kind == IK_NotICE) return FalseResult; if (CondResult.Kind == IK_ICEIfUnevaluated) return CondResult; if (TrueResult.Kind == IK_ICE && FalseResult.Kind == IK_ICE) return NoDiag(); // Rare case where the diagnostics depend on which side is evaluated // Note that if we get here, CondResult is 0, and at least one of // TrueResult and FalseResult is non-zero. if (Exp->getCond()->EvaluateKnownConstInt(Ctx) == 0) return FalseResult; return TrueResult; } case Expr::CXXDefaultArgExprClass: return CheckICE(cast(E)->getExpr(), Ctx); case Expr::CXXDefaultInitExprClass: return CheckICE(cast(E)->getExpr(), Ctx); case Expr::ChooseExprClass: { return CheckICE(cast(E)->getChosenSubExpr(), Ctx); } } llvm_unreachable("Invalid StmtClass!"); } /// Evaluate an expression as a C++11 integral constant expression. static bool EvaluateCPlusPlus11IntegralConstantExpr(const ASTContext &Ctx, const Expr *E, llvm::APSInt *Value, SourceLocation *Loc) { if (!E->getType()->isIntegralOrEnumerationType()) { if (Loc) *Loc = E->getExprLoc(); return false; } APValue Result; if (!E->isCXX11ConstantExpr(Ctx, &Result, Loc)) return false; if (!Result.isInt()) { if (Loc) *Loc = E->getExprLoc(); return false; } if (Value) *Value = Result.getInt(); return true; } bool Expr::isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc) const { if (Ctx.getLangOpts().CPlusPlus11) return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, nullptr, Loc); ICEDiag D = CheckICE(this, Ctx); if (D.Kind != IK_ICE) { if (Loc) *Loc = D.Loc; return false; } return true; } bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx, SourceLocation *Loc, bool isEvaluated) const { if (Ctx.getLangOpts().CPlusPlus11) return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc); if (!isIntegerConstantExpr(Ctx, Loc)) return false; // The only possible side-effects here are due to UB discovered in the // evaluation (for instance, INT_MAX + 1). In such a case, we are still // required to treat the expression as an ICE, so we produce the folded // value. if (!EvaluateAsInt(Value, Ctx, SE_AllowSideEffects)) llvm_unreachable("ICE cannot be evaluated!"); return true; } bool Expr::isCXX98IntegralConstantExpr(const ASTContext &Ctx) const { return CheckICE(this, Ctx).Kind == IK_ICE; } bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, SourceLocation *Loc) const { // We support this checking in C++98 mode in order to diagnose compatibility // issues. assert(Ctx.getLangOpts().CPlusPlus); // Build evaluation settings. Expr::EvalStatus Status; SmallVector Diags; Status.Diag = &Diags; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); APValue Scratch; bool IsConstExpr = ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch); if (!Diags.empty()) { IsConstExpr = false; if (Loc) *Loc = Diags[0].first; } else if (!IsConstExpr) { // FIXME: This shouldn't happen. if (Loc) *Loc = getExprLoc(); } return IsConstExpr; } bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, const FunctionDecl *Callee, ArrayRef Args, const Expr *This) const { Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated); LValue ThisVal; const LValue *ThisPtr = nullptr; if (This) { #ifndef NDEBUG auto *MD = dyn_cast(Callee); assert(MD && "Don't provide `this` for non-methods."); assert(!MD->isStatic() && "Don't provide `this` for static methods."); #endif if (EvaluateObjectArgument(Info, This, ThisVal)) ThisPtr = &ThisVal; if (Info.EvalStatus.HasSideEffects) return false; } ArgVector ArgValues(Args.size()); for (ArrayRef::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { if ((*I)->isValueDependent() || !Evaluate(ArgValues[I - Args.begin()], Info, *I)) // If evaluation fails, throw away the argument entirely. ArgValues[I - Args.begin()] = APValue(); if (Info.EvalStatus.HasSideEffects) return false; } // Build fake call to Callee. CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr, ArgValues.data()); return Evaluate(Value, Info, this) && !Info.EvalStatus.HasSideEffects; } bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, SmallVectorImpl< PartialDiagnosticAt> &Diags) { // FIXME: It would be useful to check constexpr function templates, but at the // moment the constant expression evaluator cannot cope with the non-rigorous // ASTs which we build for dependent expressions. if (FD->isDependentContext()) return true; Expr::EvalStatus Status; Status.Diag = &Diags; EvalInfo Info(FD->getASTContext(), Status, EvalInfo::EM_PotentialConstantExpression); const CXXMethodDecl *MD = dyn_cast(FD); const CXXRecordDecl *RD = MD ? MD->getParent()->getCanonicalDecl() : nullptr; // Fabricate an arbitrary expression on the stack and pretend that it // is a temporary being used as the 'this' pointer. LValue This; ImplicitValueInitExpr VIE(RD ? Info.Ctx.getRecordType(RD) : Info.Ctx.IntTy); This.set(&VIE, Info.CurrentCall->Index); ArrayRef Args; APValue Scratch; if (const CXXConstructorDecl *CD = dyn_cast(FD)) { // Evaluate the call as a constant initializer, to allow the construction // of objects of non-literal types. Info.setEvaluatingDecl(This.getLValueBase(), Scratch); HandleConstructorCall(&VIE, This, Args, CD, Info, Scratch); } else { SourceLocation Loc = FD->getLocation(); HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr, Args, FD->getBody(), Info, Scratch, nullptr); } return Diags.empty(); } bool Expr::isPotentialConstantExprUnevaluated(Expr *E, const FunctionDecl *FD, SmallVectorImpl< PartialDiagnosticAt> &Diags) { Expr::EvalStatus Status; Status.Diag = &Diags; EvalInfo Info(FD->getASTContext(), Status, EvalInfo::EM_PotentialConstantExpressionUnevaluated); // Fabricate a call stack frame to give the arguments a plausible cover story. ArrayRef Args; ArgVector ArgValues(0); bool Success = EvaluateArgs(Args, ArgValues, Info); (void)Success; assert(Success && "Failed to set up arguments for potential constant evaluation"); CallStackFrame Frame(Info, SourceLocation(), FD, nullptr, ArgValues.data()); APValue ResultScratch; Evaluate(ResultScratch, Info, E); return Diags.empty(); } bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const { if (!getType()->isPointerType()) return false; Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h (revision 313894) @@ -1,1311 +1,1311 @@ //===--- CodeGenModule.h - Per-Module state for LLVM CodeGen ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is the internal per-translation-unit state used for llvm translation. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENMODULE_H #define LLVM_CLANG_LIB_CODEGEN_CODEGENMODULE_H #include "CGVTables.h" #include "CodeGenTypeCache.h" #include "CodeGenTypes.h" #include "SanitizerMetadata.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/Mangle.h" #include "clang/Basic/ABI.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/SanitizerBlacklist.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Transforms/Utils/SanitizerStats.h" namespace llvm { class Module; class Constant; class ConstantInt; class Function; class GlobalValue; class DataLayout; class FunctionType; class LLVMContext; class IndexedInstrProfReader; } namespace clang { class ASTContext; class AtomicType; class FunctionDecl; class IdentifierInfo; class ObjCMethodDecl; class ObjCImplementationDecl; class ObjCCategoryImplDecl; class ObjCProtocolDecl; class ObjCEncodeExpr; class BlockExpr; class CharUnits; class Decl; class Expr; class Stmt; class InitListExpr; class StringLiteral; class NamedDecl; class ValueDecl; class VarDecl; class LangOptions; class CodeGenOptions; class HeaderSearchOptions; class PreprocessorOptions; class DiagnosticsEngine; class AnnotateAttr; class CXXDestructorDecl; class Module; class CoverageSourceInfo; namespace CodeGen { class CallArgList; class CodeGenFunction; class CodeGenTBAA; class CGCXXABI; class CGDebugInfo; class CGObjCRuntime; class CGOpenCLRuntime; class CGOpenMPRuntime; class CGCUDARuntime; class BlockFieldFlags; class FunctionArgList; class CoverageMappingModuleGen; class TargetCodeGenInfo; enum ForDefinition_t : bool { NotForDefinition = false, ForDefinition = true }; struct OrderGlobalInits { unsigned int priority; unsigned int lex_order; OrderGlobalInits(unsigned int p, unsigned int l) : priority(p), lex_order(l) {} bool operator==(const OrderGlobalInits &RHS) const { return priority == RHS.priority && lex_order == RHS.lex_order; } bool operator<(const OrderGlobalInits &RHS) const { return std::tie(priority, lex_order) < std::tie(RHS.priority, RHS.lex_order); } }; struct ObjCEntrypoints { ObjCEntrypoints() { memset(this, 0, sizeof(*this)); } /// void objc_autoreleasePoolPop(void*); llvm::Constant *objc_autoreleasePoolPop; /// void *objc_autoreleasePoolPush(void); llvm::Constant *objc_autoreleasePoolPush; /// id objc_autorelease(id); llvm::Constant *objc_autorelease; /// id objc_autoreleaseReturnValue(id); llvm::Constant *objc_autoreleaseReturnValue; /// void objc_copyWeak(id *dest, id *src); llvm::Constant *objc_copyWeak; /// void objc_destroyWeak(id*); llvm::Constant *objc_destroyWeak; /// id objc_initWeak(id*, id); llvm::Constant *objc_initWeak; /// id objc_loadWeak(id*); llvm::Constant *objc_loadWeak; /// id objc_loadWeakRetained(id*); llvm::Constant *objc_loadWeakRetained; /// void objc_moveWeak(id *dest, id *src); llvm::Constant *objc_moveWeak; /// id objc_retain(id); llvm::Constant *objc_retain; /// id objc_retainAutorelease(id); llvm::Constant *objc_retainAutorelease; /// id objc_retainAutoreleaseReturnValue(id); llvm::Constant *objc_retainAutoreleaseReturnValue; /// id objc_retainAutoreleasedReturnValue(id); llvm::Constant *objc_retainAutoreleasedReturnValue; /// id objc_retainBlock(id); llvm::Constant *objc_retainBlock; /// void objc_release(id); llvm::Constant *objc_release; - /// id objc_storeStrong(id*, id); + /// void objc_storeStrong(id*, id); llvm::Constant *objc_storeStrong; /// id objc_storeWeak(id*, id); llvm::Constant *objc_storeWeak; /// id objc_unsafeClaimAutoreleasedReturnValue(id); llvm::Constant *objc_unsafeClaimAutoreleasedReturnValue; /// A void(void) inline asm to use to mark that the return value of /// a call will be immediately retain. llvm::InlineAsm *retainAutoreleasedReturnValueMarker; /// void clang.arc.use(...); llvm::Constant *clang_arc_use; }; /// This class records statistics on instrumentation based profiling. class InstrProfStats { uint32_t VisitedInMainFile; uint32_t MissingInMainFile; uint32_t Visited; uint32_t Missing; uint32_t Mismatched; public: InstrProfStats() : VisitedInMainFile(0), MissingInMainFile(0), Visited(0), Missing(0), Mismatched(0) {} /// Record that we've visited a function and whether or not that function was /// in the main source file. void addVisited(bool MainFile) { if (MainFile) ++VisitedInMainFile; ++Visited; } /// Record that a function we've visited has no profile data. void addMissing(bool MainFile) { if (MainFile) ++MissingInMainFile; ++Missing; } /// Record that a function we've visited has mismatched profile data. void addMismatched(bool MainFile) { ++Mismatched; } /// Whether or not the stats we've gathered indicate any potential problems. bool hasDiagnostics() { return Missing || Mismatched; } /// Report potential problems we've found to \c Diags. void reportDiagnostics(DiagnosticsEngine &Diags, StringRef MainFile); }; /// A pair of helper functions for a __block variable. class BlockByrefHelpers : public llvm::FoldingSetNode { // MSVC requires this type to be complete in order to process this // header. public: llvm::Constant *CopyHelper; llvm::Constant *DisposeHelper; /// The alignment of the field. This is important because /// different offsets to the field within the byref struct need to /// have different helper functions. CharUnits Alignment; BlockByrefHelpers(CharUnits alignment) : Alignment(alignment) {} BlockByrefHelpers(const BlockByrefHelpers &) = default; virtual ~BlockByrefHelpers(); void Profile(llvm::FoldingSetNodeID &id) const { id.AddInteger(Alignment.getQuantity()); profileImpl(id); } virtual void profileImpl(llvm::FoldingSetNodeID &id) const = 0; virtual bool needsCopy() const { return true; } virtual void emitCopy(CodeGenFunction &CGF, Address dest, Address src) = 0; virtual bool needsDispose() const { return true; } virtual void emitDispose(CodeGenFunction &CGF, Address field) = 0; }; /// This class organizes the cross-function state that is used while generating /// LLVM code. class CodeGenModule : public CodeGenTypeCache { CodeGenModule(const CodeGenModule &) = delete; void operator=(const CodeGenModule &) = delete; public: struct Structor { Structor() : Priority(0), Initializer(nullptr), AssociatedData(nullptr) {} Structor(int Priority, llvm::Constant *Initializer, llvm::Constant *AssociatedData) : Priority(Priority), Initializer(Initializer), AssociatedData(AssociatedData) {} int Priority; llvm::Constant *Initializer; llvm::Constant *AssociatedData; }; typedef std::vector CtorList; private: ASTContext &Context; const LangOptions &LangOpts; const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info. const PreprocessorOptions &PreprocessorOpts; // Only used for debug info. const CodeGenOptions &CodeGenOpts; llvm::Module &TheModule; DiagnosticsEngine &Diags; const TargetInfo &Target; std::unique_ptr ABI; llvm::LLVMContext &VMContext; std::unique_ptr TBAA; mutable std::unique_ptr TheTargetCodeGenInfo; // This should not be moved earlier, since its initialization depends on some // of the previous reference members being already initialized and also checks // if TheTargetCodeGenInfo is NULL CodeGenTypes Types; /// Holds information about C++ vtables. CodeGenVTables VTables; std::unique_ptr ObjCRuntime; std::unique_ptr OpenCLRuntime; std::unique_ptr OpenMPRuntime; std::unique_ptr CUDARuntime; std::unique_ptr DebugInfo; std::unique_ptr ObjCData; llvm::MDNode *NoObjCARCExceptionsMetadata = nullptr; std::unique_ptr PGOReader; InstrProfStats PGOStats; std::unique_ptr SanStats; // A set of references that have only been seen via a weakref so far. This is // used to remove the weak of the reference if we ever see a direct reference // or a definition. llvm::SmallPtrSet WeakRefReferences; /// This contains all the decls which have definitions but/ which are deferred /// for emission and therefore should only be output if they are actually /// used. If a decl is in this, then it is known to have not been referenced /// yet. std::map DeferredDecls; /// This is a list of deferred decls which we have seen that *are* actually /// referenced. These get code generated when the module is done. struct DeferredGlobal { DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {} llvm::TrackingVH GV; GlobalDecl GD; }; std::vector DeferredDeclsToEmit; void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) { DeferredDeclsToEmit.emplace_back(GV, GD); } /// List of alias we have emitted. Used to make sure that what they point to /// is defined once we get to the end of the of the translation unit. std::vector Aliases; typedef llvm::StringMap > ReplacementsTy; ReplacementsTy Replacements; /// List of global values to be replaced with something else. Used when we /// want to replace a GlobalValue but can't identify it by its mangled name /// anymore (because the name is already taken). llvm::SmallVector, 8> GlobalValReplacements; /// Set of global decls for which we already diagnosed mangled name conflict. /// Required to not issue a warning (on a mangling conflict) multiple times /// for the same decl. llvm::DenseSet DiagnosedConflictingDefinitions; /// A queue of (optional) vtables to consider emitting. std::vector DeferredVTables; /// List of global values which are required to be present in the object file; /// bitcast to i8*. This is used for forcing visibility of symbols which may /// otherwise be optimized out. std::vector LLVMUsed; std::vector LLVMCompilerUsed; /// Store the list of global constructors and their respective priorities to /// be emitted when the translation unit is complete. CtorList GlobalCtors; /// Store the list of global destructors and their respective priorities to be /// emitted when the translation unit is complete. CtorList GlobalDtors; /// An ordered map of canonical GlobalDecls to their mangled names. llvm::MapVector MangledDeclNames; llvm::StringMap Manglings; /// Global annotations. std::vector Annotations; /// Map used to get unique annotation strings. llvm::StringMap AnnotationStrings; llvm::StringMap CFConstantStringMap; llvm::DenseMap ConstantStringMap; llvm::DenseMap StaticLocalDeclMap; llvm::DenseMap StaticLocalDeclGuardMap; llvm::DenseMap MaterializedGlobalTemporaryMap; llvm::DenseMap AtomicSetterHelperFnMap; llvm::DenseMap AtomicGetterHelperFnMap; /// Map used to get unique type descriptor constants for sanitizers. llvm::DenseMap TypeDescriptorMap; /// Map used to track internal linkage functions declared within /// extern "C" regions. typedef llvm::MapVector StaticExternCMap; StaticExternCMap StaticExternCValues; /// \brief thread_local variables defined or used in this TU. std::vector CXXThreadLocals; /// \brief thread_local variables with initializers that need to run /// before any thread_local variable in this TU is odr-used. std::vector CXXThreadLocalInits; std::vector CXXThreadLocalInitVars; /// Global variables with initializers that need to run before main. std::vector CXXGlobalInits; /// When a C++ decl with an initializer is deferred, null is /// appended to CXXGlobalInits, and the index of that null is placed /// here so that the initializer will be performed in the correct /// order. Once the decl is emitted, the index is replaced with ~0U to ensure /// that we don't re-emit the initializer. llvm::DenseMap DelayedCXXInitPosition; typedef std::pair GlobalInitData; struct GlobalInitPriorityCmp { bool operator()(const GlobalInitData &LHS, const GlobalInitData &RHS) const { return LHS.first.priority < RHS.first.priority; } }; /// Global variables with initializers whose order of initialization is set by /// init_priority attribute. SmallVector PrioritizedCXXGlobalInits; /// Global destructor functions and arguments that need to run on termination. std::vector > CXXGlobalDtors; /// \brief The complete set of modules that has been imported. llvm::SetVector ImportedModules; /// \brief The set of modules for which the module initializers /// have been emitted. llvm::SmallPtrSet EmittedModuleInitializers; /// \brief A vector of metadata strings. SmallVector LinkerOptionsMetadata; /// @name Cache for Objective-C runtime types /// @{ /// Cached reference to the class for constant strings. This value has type /// int * but is actually an Obj-C class pointer. llvm::WeakVH CFConstantStringClassRef; /// \brief The type used to describe the state of a fast enumeration in /// Objective-C's for..in loop. QualType ObjCFastEnumerationStateType; /// @} /// Lazily create the Objective-C runtime void createObjCRuntime(); void createOpenCLRuntime(); void createOpenMPRuntime(); void createCUDARuntime(); bool isTriviallyRecursive(const FunctionDecl *F); bool shouldEmitFunction(GlobalDecl GD); /// Map used to be sure we don't emit the same CompoundLiteral twice. llvm::DenseMap EmittedCompoundLiterals; /// Map of the global blocks we've emitted, so that we don't have to re-emit /// them if the constexpr evaluator gets aggressive. llvm::DenseMap EmittedGlobalBlocks; /// @name Cache for Blocks Runtime Globals /// @{ llvm::Constant *NSConcreteGlobalBlock = nullptr; llvm::Constant *NSConcreteStackBlock = nullptr; llvm::Constant *BlockObjectAssign = nullptr; llvm::Constant *BlockObjectDispose = nullptr; llvm::Type *BlockDescriptorType = nullptr; llvm::Type *GenericBlockLiteralType = nullptr; struct { int GlobalUniqueCount; } Block; /// void @llvm.lifetime.start(i64 %size, i8* nocapture ) llvm::Constant *LifetimeStartFn = nullptr; /// void @llvm.lifetime.end(i64 %size, i8* nocapture ) llvm::Constant *LifetimeEndFn = nullptr; GlobalDecl initializedGlobalDecl; std::unique_ptr SanitizerMD; /// @} llvm::DenseMap DeferredEmptyCoverageMappingDecls; std::unique_ptr CoverageMapping; /// Mapping from canonical types to their metadata identifiers. We need to /// maintain this mapping because identifiers may be formed from distinct /// MDNodes. llvm::DenseMap MetadataIdMap; public: CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts, const PreprocessorOptions &ppopts, const CodeGenOptions &CodeGenOpts, llvm::Module &M, DiagnosticsEngine &Diags, CoverageSourceInfo *CoverageInfo = nullptr); ~CodeGenModule(); void clear(); /// Finalize LLVM code generation. void Release(); /// Return a reference to the configured Objective-C runtime. CGObjCRuntime &getObjCRuntime() { if (!ObjCRuntime) createObjCRuntime(); return *ObjCRuntime; } /// Return true iff an Objective-C runtime has been configured. bool hasObjCRuntime() { return !!ObjCRuntime; } /// Return a reference to the configured OpenCL runtime. CGOpenCLRuntime &getOpenCLRuntime() { assert(OpenCLRuntime != nullptr); return *OpenCLRuntime; } /// Return a reference to the configured OpenMP runtime. CGOpenMPRuntime &getOpenMPRuntime() { assert(OpenMPRuntime != nullptr); return *OpenMPRuntime; } /// Return a reference to the configured CUDA runtime. CGCUDARuntime &getCUDARuntime() { assert(CUDARuntime != nullptr); return *CUDARuntime; } ObjCEntrypoints &getObjCEntrypoints() const { assert(ObjCData != nullptr); return *ObjCData; } InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } CoverageMappingModuleGen *getCoverageMapping() const { return CoverageMapping.get(); } llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) { return StaticLocalDeclMap[D]; } void setStaticLocalDeclAddress(const VarDecl *D, llvm::Constant *C) { StaticLocalDeclMap[D] = C; } llvm::Constant * getOrCreateStaticVarDecl(const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage); llvm::GlobalVariable *getStaticLocalDeclGuardAddress(const VarDecl *D) { return StaticLocalDeclGuardMap[D]; } void setStaticLocalDeclGuardAddress(const VarDecl *D, llvm::GlobalVariable *C) { StaticLocalDeclGuardMap[D] = C; } bool lookupRepresentativeDecl(StringRef MangledName, GlobalDecl &Result) const; llvm::Constant *getAtomicSetterHelperFnMap(QualType Ty) { return AtomicSetterHelperFnMap[Ty]; } void setAtomicSetterHelperFnMap(QualType Ty, llvm::Constant *Fn) { AtomicSetterHelperFnMap[Ty] = Fn; } llvm::Constant *getAtomicGetterHelperFnMap(QualType Ty) { return AtomicGetterHelperFnMap[Ty]; } void setAtomicGetterHelperFnMap(QualType Ty, llvm::Constant *Fn) { AtomicGetterHelperFnMap[Ty] = Fn; } llvm::Constant *getTypeDescriptorFromMap(QualType Ty) { return TypeDescriptorMap[Ty]; } void setTypeDescriptorInMap(QualType Ty, llvm::Constant *C) { TypeDescriptorMap[Ty] = C; } CGDebugInfo *getModuleDebugInfo() { return DebugInfo.get(); } llvm::MDNode *getNoObjCARCExceptionsMetadata() { if (!NoObjCARCExceptionsMetadata) NoObjCARCExceptionsMetadata = llvm::MDNode::get(getLLVMContext(), None); return NoObjCARCExceptionsMetadata; } ASTContext &getContext() const { return Context; } const LangOptions &getLangOpts() const { return LangOpts; } const HeaderSearchOptions &getHeaderSearchOpts() const { return HeaderSearchOpts; } const PreprocessorOptions &getPreprocessorOpts() const { return PreprocessorOpts; } const CodeGenOptions &getCodeGenOpts() const { return CodeGenOpts; } llvm::Module &getModule() const { return TheModule; } DiagnosticsEngine &getDiags() const { return Diags; } const llvm::DataLayout &getDataLayout() const { return TheModule.getDataLayout(); } const TargetInfo &getTarget() const { return Target; } const llvm::Triple &getTriple() const { return Target.getTriple(); } bool supportsCOMDAT() const; void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO); CGCXXABI &getCXXABI() const { return *ABI; } llvm::LLVMContext &getLLVMContext() { return VMContext; } bool shouldUseTBAA() const { return TBAA != nullptr; } const TargetCodeGenInfo &getTargetCodeGenInfo(); CodeGenTypes &getTypes() { return Types; } CodeGenVTables &getVTables() { return VTables; } ItaniumVTableContext &getItaniumVTableContext() { return VTables.getItaniumVTableContext(); } MicrosoftVTableContext &getMicrosoftVTableContext() { return VTables.getMicrosoftVTableContext(); } CtorList &getGlobalCtors() { return GlobalCtors; } CtorList &getGlobalDtors() { return GlobalDtors; } llvm::MDNode *getTBAAInfo(QualType QTy); llvm::MDNode *getTBAAInfoForVTablePtr(); llvm::MDNode *getTBAAStructInfo(QualType QTy); /// Return the path-aware tag for given base type, access node and offset. llvm::MDNode *getTBAAStructTagInfo(QualType BaseTy, llvm::MDNode *AccessN, uint64_t O); bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor); bool isPaddedAtomicType(QualType type); bool isPaddedAtomicType(const AtomicType *type); /// Decorate the instruction with a TBAA tag. For scalar TBAA, the tag /// is the same as the type. For struct-path aware TBAA, the tag /// is different from the type: base type, access type and offset. /// When ConvertTypeToTag is true, we create a tag based on the scalar type. void DecorateInstructionWithTBAA(llvm::Instruction *Inst, llvm::MDNode *TBAAInfo, bool ConvertTypeToTag = true); /// Adds !invariant.barrier !tag to instruction void DecorateInstructionWithInvariantGroup(llvm::Instruction *I, const CXXRecordDecl *RD); /// Emit the given number of characters as a value of type size_t. llvm::ConstantInt *getSize(CharUnits numChars); /// Set the visibility for the given LLVM GlobalValue. void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const; /// Set the TLS mode for the given LLVM GlobalValue for the thread-local /// variable declaration D. void setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const; static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) { switch (V) { case DefaultVisibility: return llvm::GlobalValue::DefaultVisibility; case HiddenVisibility: return llvm::GlobalValue::HiddenVisibility; case ProtectedVisibility: return llvm::GlobalValue::ProtectedVisibility; } llvm_unreachable("unknown visibility!"); } llvm::Constant *GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition = NotForDefinition); /// Will return a global variable of the given type. If a variable with a /// different type already exists then a new variable with the right type /// will be created and all uses of the old variable will be replaced with a /// bitcast to the new variable. llvm::GlobalVariable * CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage); llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc = SourceLocation(), bool TLS = false); /// Return the address space of the underlying global variable for D, as /// determined by its declaration. Normally this is the same as the address /// space of D's type, but in CUDA, address spaces are associated with /// declarations, not types. unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace); /// Return the llvm::Constant for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created /// with the specified type instead of whatever the normal requested type /// would be. If IsForDefinition is true, it is guranteed that an actual /// global with type Ty will be returned, not conversion of a variable with /// the same mangled name but some other type. llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D, llvm::Type *Ty = nullptr, ForDefinition_t IsForDefinition = NotForDefinition); /// Return the address of the given function. If Ty is non-null, then this /// function will use the specified type if it has to create it. llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = nullptr, bool ForVTable = false, bool DontDefer = false, ForDefinition_t IsForDefinition = NotForDefinition); /// Get the address of the RTTI descriptor for the given type. llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false); /// Get the address of a uuid descriptor . ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E); /// Get the address of the thunk for the given global decl. llvm::Constant *GetAddrOfThunk(GlobalDecl GD, const ThunkInfo &Thunk); /// Get a reference to the target of VD. ConstantAddress GetWeakRefReference(const ValueDecl *VD); /// Returns the assumed alignment of an opaque pointer to the given class. CharUnits getClassPointerAlignment(const CXXRecordDecl *CD); /// Returns the assumed alignment of a virtual base of a class. CharUnits getVBaseAlignment(CharUnits DerivedAlign, const CXXRecordDecl *Derived, const CXXRecordDecl *VBase); /// Given a class pointer with an actual known alignment, and the /// expected alignment of an object at a dynamic offset w.r.t that /// pointer, return the alignment to assume at the offset. CharUnits getDynamicOffsetAlignment(CharUnits ActualAlign, const CXXRecordDecl *Class, CharUnits ExpectedTargetAlign); CharUnits computeNonVirtualBaseClassOffset(const CXXRecordDecl *DerivedClass, CastExpr::path_const_iterator Start, CastExpr::path_const_iterator End); /// Returns the offset from a derived class to a class. Returns null if the /// offset is 0. llvm::Constant * GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl, CastExpr::path_const_iterator PathBegin, CastExpr::path_const_iterator PathEnd); llvm::FoldingSet ByrefHelpersCache; /// Fetches the global unique block count. int getUniqueBlockCount() { return ++Block.GlobalUniqueCount; } /// Fetches the type of a generic block descriptor. llvm::Type *getBlockDescriptorType(); /// The type of a generic block literal. llvm::Type *getGenericBlockLiteralType(); /// Gets the address of a block which requires no captures. llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, StringRef Name); /// Returns the address of a block which requires no caputres, or null if /// we've yet to emit the block for BE. llvm::Constant *getAddrOfGlobalBlockIfEmitted(const BlockExpr *BE) { return EmittedGlobalBlocks.lookup(BE); } /// Notes that BE's global block is available via Addr. Asserts that BE /// isn't already emitted. void setAddrOfGlobalBlock(const BlockExpr *BE, llvm::Constant *Addr); /// Return a pointer to a constant CFString object for the given string. ConstantAddress GetAddrOfConstantCFString(const StringLiteral *Literal); /// Return a pointer to a constant NSString object for the given string. Or a /// user defined String object as defined via /// -fconstant-string-class=class_name option. ConstantAddress GetAddrOfConstantString(const StringLiteral *Literal); /// Return a constant array for the given string. llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E); /// Return a pointer to a constant array for the given string literal. ConstantAddress GetAddrOfConstantStringFromLiteral(const StringLiteral *S, StringRef Name = ".str"); /// Return a pointer to a constant array for the given ObjCEncodeExpr node. ConstantAddress GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *); /// Returns a pointer to a character array containing the literal and a /// terminating '\0' character. The result has pointer to array type. /// /// \param GlobalName If provided, the name to use for the global (if one is /// created). ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName = nullptr); /// Returns a pointer to a constant global variable for the given file-scope /// compound literal expression. ConstantAddress GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr*E); /// If it's been emitted already, returns the GlobalVariable corresponding to /// a compound literal. Otherwise, returns null. llvm::GlobalVariable * getAddrOfConstantCompoundLiteralIfEmitted(const CompoundLiteralExpr *E); /// Notes that CLE's GlobalVariable is GV. Asserts that CLE isn't already /// emitted. void setAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *CLE, llvm::GlobalVariable *GV); /// \brief Returns a pointer to a global variable representing a temporary /// with static or thread storage duration. ConstantAddress GetAddrOfGlobalTemporary(const MaterializeTemporaryExpr *E, const Expr *Inner); /// \brief Retrieve the record type that describes the state of an /// Objective-C fast enumeration loop (for..in). QualType getObjCFastEnumerationStateType(); // Produce code for this constructor/destructor. This method doesn't try // to apply any ABI rules about which other constructors/destructors // are needed or if they are alias to each other. llvm::Function *codegenCXXStructor(const CXXMethodDecl *MD, StructorType Type); /// Return the address of the constructor/destructor of the given type. llvm::Constant * getAddrOfCXXStructor(const CXXMethodDecl *MD, StructorType Type, const CGFunctionInfo *FnInfo = nullptr, llvm::FunctionType *FnType = nullptr, bool DontDefer = false, ForDefinition_t IsForDefinition = NotForDefinition); /// Given a builtin id for a function like "__builtin_fabsf", return a /// Function* for "fabsf". llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID); llvm::Function *getIntrinsic(unsigned IID, ArrayRef Tys = None); /// Emit code for a single top level declaration. void EmitTopLevelDecl(Decl *D); /// \brief Stored a deferred empty coverage mapping for an unused /// and thus uninstrumented top level declaration. void AddDeferredUnusedCoverageMapping(Decl *D); /// \brief Remove the deferred empty coverage mapping as this /// declaration is actually instrumented. void ClearUnusedCoverageMapping(const Decl *D); /// \brief Emit all the deferred coverage mappings /// for the uninstrumented functions. void EmitDeferredUnusedCoverageMappings(); /// Tell the consumer that this variable has been instantiated. void HandleCXXStaticMemberVarInstantiation(VarDecl *VD); /// \brief If the declaration has internal linkage but is inside an /// extern "C" linkage specification, prepare to emit an alias for it /// to the expected name. template void MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV); /// Add a global to a list to be added to the llvm.used metadata. void addUsedGlobal(llvm::GlobalValue *GV); /// Add a global to a list to be added to the llvm.compiler.used metadata. void addCompilerUsedGlobal(llvm::GlobalValue *GV); /// Add a destructor and object to add to the C++ global destructor function. void AddCXXDtorEntry(llvm::Constant *DtorFn, llvm::Constant *Object) { CXXGlobalDtors.emplace_back(DtorFn, Object); } /// Create a new runtime function with the specified type and name. llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), bool Local = false); /// Create a new compiler builtin function with the specified type and name. llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeSet ExtraAttrs = llvm::AttributeSet()); /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); ///@name Custom Blocks Runtime Interfaces ///@{ llvm::Constant *getNSConcreteGlobalBlock(); llvm::Constant *getNSConcreteStackBlock(); llvm::Constant *getBlockObjectAssign(); llvm::Constant *getBlockObjectDispose(); ///@} llvm::Constant *getLLVMLifetimeStartFn(); llvm::Constant *getLLVMLifetimeEndFn(); // Make sure that this type is translated. void UpdateCompletedType(const TagDecl *TD); llvm::Constant *getMemberPointerConstant(const UnaryOperator *e); /// Try to emit the initializer for the given declaration as a constant; /// returns 0 if the expression cannot be emitted as a constant. llvm::Constant *EmitConstantInit(const VarDecl &D, CodeGenFunction *CGF = nullptr); /// Try to emit the given expression as a constant; returns 0 if the /// expression cannot be emitted as a constant. llvm::Constant *EmitConstantExpr(const Expr *E, QualType DestType, CodeGenFunction *CGF = nullptr); /// Emit the given constant value as a constant, in the type's scalar /// representation. llvm::Constant *EmitConstantValue(const APValue &Value, QualType DestType, CodeGenFunction *CGF = nullptr); /// Emit the given constant value as a constant, in the type's memory /// representation. llvm::Constant *EmitConstantValueForMemory(const APValue &Value, QualType DestType, CodeGenFunction *CGF = nullptr); /// \brief Emit type info if type of an expression is a variably modified /// type. Also emit proper debug info for cast types. void EmitExplicitCastExprType(const ExplicitCastExpr *E, CodeGenFunction *CGF = nullptr); /// Return the result of value-initializing the given type, i.e. a null /// expression of the given type. This is usually, but not always, an LLVM /// null constant. llvm::Constant *EmitNullConstant(QualType T); /// Return a null constant appropriate for zero-initializing a base class with /// the given type. This is usually, but not always, an LLVM null constant. llvm::Constant *EmitNullConstantForBase(const CXXRecordDecl *Record); /// Emit a general error that something can't be done. void Error(SourceLocation loc, StringRef error); /// Print out an error that codegen doesn't support the specified stmt yet. void ErrorUnsupported(const Stmt *S, const char *Type); /// Print out an error that codegen doesn't support the specified decl yet. void ErrorUnsupported(const Decl *D, const char *Type); /// Set the attributes on the LLVM function for the given decl and function /// info. This applies attributes necessary for handling the ABI as well as /// user specified attributes like section. void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F, const CGFunctionInfo &FI); /// Set the LLVM function attributes (sext, zext, etc). void SetLLVMFunctionAttributes(const Decl *D, const CGFunctionInfo &Info, llvm::Function *F); /// Set the LLVM function attributes which only apply to a function /// definition. void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F); /// Return true iff the given type uses 'sret' when used as a return type. bool ReturnTypeUsesSRet(const CGFunctionInfo &FI); /// Return true iff the given type uses an argument slot when 'sret' is used /// as a return type. bool ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI); /// Return true iff the given type uses 'fpret' when used as a return type. bool ReturnTypeUsesFPRet(QualType ResultType); /// Return true iff the given type uses 'fp2ret' when used as a return type. bool ReturnTypeUsesFP2Ret(QualType ResultType); /// Get the LLVM attributes and calling convention to use for a particular /// function type. /// /// \param Name - The function name. /// \param Info - The function type information. /// \param CalleeInfo - The callee information these attributes are being /// constructed for. If valid, the attributes applied to this decl may /// contribute to the function attributes and calling convention. /// \param PAL [out] - On return, the attribute list to use. /// \param CallingConv [out] - On return, the LLVM calling convention to use. void ConstructAttributeList(StringRef Name, const CGFunctionInfo &Info, CGCalleeInfo CalleeInfo, AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite); // Fills in the supplied string map with the set of target features for the // passed in function. void getFunctionFeatureMap(llvm::StringMap &FeatureMap, const FunctionDecl *FD); StringRef getMangledName(GlobalDecl GD); StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD); void EmitTentativeDefinition(const VarDecl *D); void EmitVTable(CXXRecordDecl *Class); void RefreshTypeCacheForClass(const CXXRecordDecl *Class); /// \brief Appends Opts to the "Linker Options" metadata value. void AppendLinkerOptions(StringRef Opts); /// \brief Appends a detect mismatch command to the linker options. void AddDetectMismatch(StringRef Name, StringRef Value); /// \brief Appends a dependent lib to the "Linker Options" metadata value. void AddDependentLib(StringRef Lib); llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD); void setFunctionLinkage(GlobalDecl GD, llvm::Function *F) { F->setLinkage(getFunctionLinkage(GD)); } /// Set the DLL storage class on F. void setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F); /// Return the appropriate linkage for the vtable, VTT, and type information /// of the given class. llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD); /// Return the store size, in character units, of the given LLVM type. CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const; /// Returns LLVM linkage for a declarator. llvm::GlobalValue::LinkageTypes getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable); /// Returns LLVM linkage for a declarator. llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant); /// Emit all the global annotations. void EmitGlobalAnnotations(); /// Emit an annotation string. llvm::Constant *EmitAnnotationString(StringRef Str); /// Emit the annotation's translation unit. llvm::Constant *EmitAnnotationUnit(SourceLocation Loc); /// Emit the annotation line number. llvm::Constant *EmitAnnotationLineNo(SourceLocation L); /// Generate the llvm::ConstantStruct which contains the annotation /// information for a given GlobalValue. The annotation struct is /// {i8 *, i8 *, i8 *, i32}. The first field is a constant expression, the /// GlobalValue being annotated. The second field is the constant string /// created from the AnnotateAttr's annotation. The third field is a constant /// string containing the name of the translation unit. The fourth field is /// the line number in the file of the annotated value declaration. llvm::Constant *EmitAnnotateAttr(llvm::GlobalValue *GV, const AnnotateAttr *AA, SourceLocation L); /// Add global annotations that are set on D, for the global GV. Those /// annotations are emitted during finalization of the LLVM code. void AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV); bool isInSanitizerBlacklist(llvm::Function *Fn, SourceLocation Loc) const; bool isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category = StringRef()) const; SanitizerMetadata *getSanitizerMetadata() { return SanitizerMD.get(); } void addDeferredVTable(const CXXRecordDecl *RD) { DeferredVTables.push_back(RD); } /// Emit code for a singal global function or var decl. Forward declarations /// are emitted lazily. void EmitGlobal(GlobalDecl D); bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target, bool InEveryTU); bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D); /// Set attributes for a global definition. void setFunctionDefinitionAttributes(const FunctionDecl *D, llvm::Function *F); llvm::GlobalValue *GetGlobalValue(StringRef Ref); /// Set attributes which are common to any form of a global definition (alias, /// Objective-C method, function, global variable). /// /// NOTE: This should only be called for definitions. void SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV); /// Set attributes which must be preserved by an alias. This includes common /// attributes (i.e. it includes a call to SetCommonAttributes). /// /// NOTE: This should only be called for definitions. void setAliasAttributes(const Decl *D, llvm::GlobalValue *GV); void addReplacement(StringRef Name, llvm::Constant *C); void addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C); /// \brief Emit a code for threadprivate directive. /// \param D Threadprivate declaration. void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D); /// \brief Emit a code for declare reduction construct. void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, CodeGenFunction *CGF = nullptr); /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); /// Emit type metadata for the given vtable using the given layout. void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, const VTableLayout &VTLayout); /// Generate a cross-DSO type identifier for MD. llvm::ConstantInt *CreateCrossDsoCfiTypeId(llvm::Metadata *MD); /// Create a metadata identifier for the given type. This may either be an /// MDString (for external identifiers) or a distinct unnamed MDNode (for /// internal identifiers). llvm::Metadata *CreateMetadataIdentifierForType(QualType T); /// Create and attach type metadata to the given function. void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F); /// Returns whether this module needs the "all-vtables" type identifier. bool NeedAllVtablesTypeId() const; /// Create and attach type metadata for the given vtable. void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD); /// \breif Get the declaration of std::terminate for the platform. llvm::Constant *getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); llvm::Value * createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF); /// Get target specific null pointer. /// \param T is the LLVM type of the null pointer. /// \param QT is the clang QualType of the null pointer. llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT); private: llvm::Constant * GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, bool DontDefer = false, bool IsThunk = false, llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(), ForDefinition_t IsForDefinition = NotForDefinition); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *PTy, const VarDecl *D, ForDefinition_t IsForDefinition = NotForDefinition); void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO); /// Set function attributes for a function declaration. void SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, bool IsThunk); void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); // C++ related functions. void EmitDeclContext(const DeclContext *DC); void EmitLinkageSpec(const LinkageSpecDecl *D); void CompleteDIClassType(const CXXMethodDecl* D); /// \brief Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); /// Emit the function that initializes C++ globals. void EmitCXXGlobalInitFunc(); /// Emit the function that destroys C++ globals. void EmitCXXGlobalDtorFunc(); /// Emit the function that initializes the specified global (if PerformInit is /// true) and registers its destructor. void EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit); void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr, llvm::Function *InitFunc, InitSegAttr *ISA); // FIXME: Hardcoding priority here is gross. void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535, llvm::Constant *AssociatedData = nullptr); void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535); /// EmitCtorList - Generates a global array of functions and priorities using /// the given list and name. This array will have appending linkage and is /// suitable for use as a LLVM constructor or destructor array. Clears Fns. void EmitCtorList(CtorList &Fns, const char *GlobalName); /// Emit any needed decls for which code generation was deferred. void EmitDeferred(); /// Call replaceAllUsesWith on all pairs in Replacements. void applyReplacements(); /// Call replaceAllUsesWith on all pairs in GlobalValReplacements. void applyGlobalValReplacements(); void checkAliases(); /// Emit any vtables which we deferred and still have a use for. void EmitDeferredVTables(); /// Emit the llvm.used and llvm.compiler.used metadata. void emitLLVMUsed(); /// \brief Emit the link options introduced by imported modules. void EmitModuleLinkOptions(); /// \brief Emit aliases for internal-linkage declarations inside "C" language /// linkage specifications, giving them the "expected" name where possible. void EmitStaticExternCAliases(); void EmitDeclMetadata(); /// \brief Emit the Clang version as llvm.ident metadata. void EmitVersionIdentMetadata(); /// Emits target specific Metadata for global declarations. void EmitTargetMetadata(); /// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and /// .gcda files in a way that persists in .bc files. void EmitCoverageFile(); /// Emits the initializer for a uuidof string. llvm::Constant *EmitUuidofInitializer(StringRef uuidstr); /// Determine whether the definition must be emitted; if this returns \c /// false, the definition can be emitted lazily if it's used. bool MustBeEmitted(const ValueDecl *D); /// Determine whether the definition can be emitted eagerly, or should be /// delayed until the end of the translation unit. This is relevant for /// definitions whose linkage can change, e.g. implicit function instantions /// which may later be explicitly instantiated. bool MayBeEmittedEagerly(const ValueDecl *D); /// Check whether we can use a "simpler", more core exceptions personality /// function. void SimplifyPersonality(); }; } // end namespace CodeGen } // end namespace clang #endif // LLVM_CLANG_LIB_CODEGEN_CODEGENMODULE_H Index: projects/clang400-import/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp (revision 313894) @@ -1,2998 +1,2998 @@ //===--- ParseExpr.cpp - Expression Parsing -------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Provides the Expression parsing implementation. /// /// Expressions in C99 basically consist of a bunch of binary operators with /// unary operators and other random stuff at the leaves. /// /// In the C99 grammar, these unary operators bind tightest and are represented /// as the 'cast-expression' production. Everything else is either a binary /// operator (e.g. '/') or a ternary operator ("?:"). The unary leaves are /// handled by ParseCastExpression, the higher level pieces are handled by /// ParseBinaryExpression. /// //===----------------------------------------------------------------------===// #include "RAIIObjectsForParser.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Parse/Parser.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/SmallVector.h" using namespace clang; /// \brief Simple precedence-based parser for binary/ternary operators. /// /// Note: we diverge from the C99 grammar when parsing the assignment-expression /// production. C99 specifies that the LHS of an assignment operator should be /// parsed as a unary-expression, but consistency dictates that it be a /// conditional-expession. In practice, the important thing here is that the /// LHS of an assignment has to be an l-value, which productions between /// unary-expression and conditional-expression don't produce. Because we want /// consistency, we parse the LHS as a conditional-expression, then check for /// l-value-ness in semantic analysis stages. /// /// \verbatim /// pm-expression: [C++ 5.5] /// cast-expression /// pm-expression '.*' cast-expression /// pm-expression '->*' cast-expression /// /// multiplicative-expression: [C99 6.5.5] /// Note: in C++, apply pm-expression instead of cast-expression /// cast-expression /// multiplicative-expression '*' cast-expression /// multiplicative-expression '/' cast-expression /// multiplicative-expression '%' cast-expression /// /// additive-expression: [C99 6.5.6] /// multiplicative-expression /// additive-expression '+' multiplicative-expression /// additive-expression '-' multiplicative-expression /// /// shift-expression: [C99 6.5.7] /// additive-expression /// shift-expression '<<' additive-expression /// shift-expression '>>' additive-expression /// /// relational-expression: [C99 6.5.8] /// shift-expression /// relational-expression '<' shift-expression /// relational-expression '>' shift-expression /// relational-expression '<=' shift-expression /// relational-expression '>=' shift-expression /// /// equality-expression: [C99 6.5.9] /// relational-expression /// equality-expression '==' relational-expression /// equality-expression '!=' relational-expression /// /// AND-expression: [C99 6.5.10] /// equality-expression /// AND-expression '&' equality-expression /// /// exclusive-OR-expression: [C99 6.5.11] /// AND-expression /// exclusive-OR-expression '^' AND-expression /// /// inclusive-OR-expression: [C99 6.5.12] /// exclusive-OR-expression /// inclusive-OR-expression '|' exclusive-OR-expression /// /// logical-AND-expression: [C99 6.5.13] /// inclusive-OR-expression /// logical-AND-expression '&&' inclusive-OR-expression /// /// logical-OR-expression: [C99 6.5.14] /// logical-AND-expression /// logical-OR-expression '||' logical-AND-expression /// /// conditional-expression: [C99 6.5.15] /// logical-OR-expression /// logical-OR-expression '?' expression ':' conditional-expression /// [GNU] logical-OR-expression '?' ':' conditional-expression /// [C++] the third operand is an assignment-expression /// /// assignment-expression: [C99 6.5.16] /// conditional-expression /// unary-expression assignment-operator assignment-expression /// [C++] throw-expression [C++ 15] /// /// assignment-operator: one of /// = *= /= %= += -= <<= >>= &= ^= |= /// /// expression: [C99 6.5.17] /// assignment-expression ...[opt] /// expression ',' assignment-expression ...[opt] /// \endverbatim ExprResult Parser::ParseExpression(TypeCastState isTypeCast) { ExprResult LHS(ParseAssignmentExpression(isTypeCast)); return ParseRHSOfBinaryExpression(LHS, prec::Comma); } /// This routine is called when the '@' is seen and consumed. /// Current token is an Identifier and is not a 'try'. This /// routine is necessary to disambiguate \@try-statement from, /// for example, \@encode-expression. /// ExprResult Parser::ParseExpressionWithLeadingAt(SourceLocation AtLoc) { ExprResult LHS(ParseObjCAtExpression(AtLoc)); return ParseRHSOfBinaryExpression(LHS, prec::Comma); } /// This routine is called when a leading '__extension__' is seen and /// consumed. This is necessary because the token gets consumed in the /// process of disambiguating between an expression and a declaration. ExprResult Parser::ParseExpressionWithLeadingExtension(SourceLocation ExtLoc) { ExprResult LHS(true); { // Silence extension warnings in the sub-expression ExtensionRAIIObject O(Diags); LHS = ParseCastExpression(false); } if (!LHS.isInvalid()) LHS = Actions.ActOnUnaryOp(getCurScope(), ExtLoc, tok::kw___extension__, LHS.get()); return ParseRHSOfBinaryExpression(LHS, prec::Comma); } /// \brief Parse an expr that doesn't include (top-level) commas. ExprResult Parser::ParseAssignmentExpression(TypeCastState isTypeCast) { if (Tok.is(tok::code_completion)) { Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression); cutOffParsing(); return ExprError(); } if (Tok.is(tok::kw_throw)) return ParseThrowExpression(); if (Tok.is(tok::kw_co_yield)) return ParseCoyieldExpression(); ExprResult LHS = ParseCastExpression(/*isUnaryExpression=*/false, /*isAddressOfOperand=*/false, isTypeCast); return ParseRHSOfBinaryExpression(LHS, prec::Assignment); } /// \brief Parse an assignment expression where part of an Objective-C message /// send has already been parsed. /// /// In this case \p LBracLoc indicates the location of the '[' of the message /// send, and either \p ReceiverName or \p ReceiverExpr is non-null indicating /// the receiver of the message. /// /// Since this handles full assignment-expression's, it handles postfix /// expressions and other binary operators for these expressions as well. ExprResult Parser::ParseAssignmentExprWithObjCMessageExprStart(SourceLocation LBracLoc, SourceLocation SuperLoc, ParsedType ReceiverType, Expr *ReceiverExpr) { ExprResult R = ParseObjCMessageExpressionBody(LBracLoc, SuperLoc, ReceiverType, ReceiverExpr); R = ParsePostfixExpressionSuffix(R); return ParseRHSOfBinaryExpression(R, prec::Assignment); } ExprResult Parser::ParseConstantExpression(TypeCastState isTypeCast) { // C++03 [basic.def.odr]p2: // An expression is potentially evaluated unless it appears where an // integral constant expression is required (see 5.19) [...]. // C++98 and C++11 have no such rule, but this is only a defect in C++98. EnterExpressionEvaluationContext ConstantEvaluated(Actions, Sema::ConstantEvaluated); ExprResult LHS(ParseCastExpression(false, false, isTypeCast)); ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::Conditional)); return Actions.ActOnConstantExpression(Res); } /// \brief Parse a constraint-expression. /// /// \verbatim /// constraint-expression: [Concepts TS temp.constr.decl p1] /// logical-or-expression /// \endverbatim ExprResult Parser::ParseConstraintExpression() { // FIXME: this may erroneously consume a function-body as the braced // initializer list of a compound literal // // FIXME: this may erroneously consume a parenthesized rvalue reference // declarator as a parenthesized address-of-label expression ExprResult LHS(ParseCastExpression(/*isUnaryExpression=*/false)); ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::LogicalOr)); return Res; } bool Parser::isNotExpressionStart() { tok::TokenKind K = Tok.getKind(); if (K == tok::l_brace || K == tok::r_brace || K == tok::kw_for || K == tok::kw_while || K == tok::kw_if || K == tok::kw_else || K == tok::kw_goto || K == tok::kw_try) return true; // If this is a decl-specifier, we can't be at the start of an expression. return isKnownToBeDeclarationSpecifier(); } static bool isFoldOperator(prec::Level Level) { return Level > prec::Unknown && Level != prec::Conditional; } static bool isFoldOperator(tok::TokenKind Kind) { return isFoldOperator(getBinOpPrecedence(Kind, false, true)); } /// \brief Parse a binary expression that starts with \p LHS and has a /// precedence of at least \p MinPrec. ExprResult Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { prec::Level NextTokPrec = getBinOpPrecedence(Tok.getKind(), GreaterThanIsOperator, getLangOpts().CPlusPlus11); SourceLocation ColonLoc; while (1) { // If this token has a lower precedence than we are allowed to parse (e.g. // because we are called recursively, or because the token is not a binop), // then we are done! if (NextTokPrec < MinPrec) return LHS; // Consume the operator, saving the operator token for error reporting. Token OpToken = Tok; ConsumeToken(); if (OpToken.is(tok::caretcaret)) { return ExprError(Diag(Tok, diag::err_opencl_logical_exclusive_or)); } // Bail out when encountering a comma followed by a token which can't // possibly be the start of an expression. For instance: // int f() { return 1, } // We can't do this before consuming the comma, because // isNotExpressionStart() looks at the token stream. if (OpToken.is(tok::comma) && isNotExpressionStart()) { PP.EnterToken(Tok); Tok = OpToken; return LHS; } // If the next token is an ellipsis, then this is a fold-expression. Leave // it alone so we can handle it in the paren expression. if (isFoldOperator(NextTokPrec) && Tok.is(tok::ellipsis)) { // FIXME: We can't check this via lookahead before we consume the token // because that tickles a lexer bug. PP.EnterToken(Tok); Tok = OpToken; return LHS; } // Special case handling for the ternary operator. ExprResult TernaryMiddle(true); if (NextTokPrec == prec::Conditional) { if (Tok.isNot(tok::colon)) { // Don't parse FOO:BAR as if it were a typo for FOO::BAR. ColonProtectionRAIIObject X(*this); // Handle this production specially: // logical-OR-expression '?' expression ':' conditional-expression // In particular, the RHS of the '?' is 'expression', not // 'logical-OR-expression' as we might expect. TernaryMiddle = ParseExpression(); if (TernaryMiddle.isInvalid()) { Actions.CorrectDelayedTyposInExpr(LHS); LHS = ExprError(); TernaryMiddle = nullptr; } } else { // Special case handling of "X ? Y : Z" where Y is empty: // logical-OR-expression '?' ':' conditional-expression [GNU] TernaryMiddle = nullptr; Diag(Tok, diag::ext_gnu_conditional_expr); } if (!TryConsumeToken(tok::colon, ColonLoc)) { // Otherwise, we're missing a ':'. Assume that this was a typo that // the user forgot. If we're not in a macro expansion, we can suggest // a fixit hint. If there were two spaces before the current token, // suggest inserting the colon in between them, otherwise insert ": ". SourceLocation FILoc = Tok.getLocation(); const char *FIText = ": "; const SourceManager &SM = PP.getSourceManager(); if (FILoc.isFileID() || PP.isAtStartOfMacroExpansion(FILoc, &FILoc)) { assert(FILoc.isFileID()); bool IsInvalid = false; const char *SourcePtr = SM.getCharacterData(FILoc.getLocWithOffset(-1), &IsInvalid); if (!IsInvalid && *SourcePtr == ' ') { SourcePtr = SM.getCharacterData(FILoc.getLocWithOffset(-2), &IsInvalid); if (!IsInvalid && *SourcePtr == ' ') { FILoc = FILoc.getLocWithOffset(-1); FIText = ":"; } } } Diag(Tok, diag::err_expected) << tok::colon << FixItHint::CreateInsertion(FILoc, FIText); Diag(OpToken, diag::note_matching) << tok::question; ColonLoc = Tok.getLocation(); } } // Code completion for the right-hand side of an assignment expression // goes through a special hook that takes the left-hand side into account. if (Tok.is(tok::code_completion) && NextTokPrec == prec::Assignment) { Actions.CodeCompleteAssignmentRHS(getCurScope(), LHS.get()); cutOffParsing(); return ExprError(); } // Parse another leaf here for the RHS of the operator. // ParseCastExpression works here because all RHS expressions in C have it // as a prefix, at least. However, in C++, an assignment-expression could // be a throw-expression, which is not a valid cast-expression. // Therefore we need some special-casing here. // Also note that the third operand of the conditional operator is // an assignment-expression in C++, and in C++11, we can have a // braced-init-list on the RHS of an assignment. For better diagnostics, // parse as if we were allowed braced-init-lists everywhere, and check that // they only appear on the RHS of assignments later. ExprResult RHS; bool RHSIsInitList = false; if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { RHS = ParseBraceInitializer(); RHSIsInitList = true; } else if (getLangOpts().CPlusPlus && NextTokPrec <= prec::Conditional) RHS = ParseAssignmentExpression(); else RHS = ParseCastExpression(false); if (RHS.isInvalid()) { // FIXME: Errors generated by the delayed typo correction should be // printed before errors from parsing the RHS, not after. Actions.CorrectDelayedTyposInExpr(LHS); if (TernaryMiddle.isUsable()) TernaryMiddle = Actions.CorrectDelayedTyposInExpr(TernaryMiddle); LHS = ExprError(); } // Remember the precedence of this operator and get the precedence of the // operator immediately to the right of the RHS. prec::Level ThisPrec = NextTokPrec; NextTokPrec = getBinOpPrecedence(Tok.getKind(), GreaterThanIsOperator, getLangOpts().CPlusPlus11); // Assignment and conditional expressions are right-associative. bool isRightAssoc = ThisPrec == prec::Conditional || ThisPrec == prec::Assignment; // Get the precedence of the operator to the right of the RHS. If it binds // more tightly with RHS than we do, evaluate it completely first. if (ThisPrec < NextTokPrec || (ThisPrec == NextTokPrec && isRightAssoc)) { if (!RHS.isInvalid() && RHSIsInitList) { Diag(Tok, diag::err_init_list_bin_op) << /*LHS*/0 << PP.getSpelling(Tok) << Actions.getExprRange(RHS.get()); RHS = ExprError(); } // If this is left-associative, only parse things on the RHS that bind // more tightly than the current operator. If it is left-associative, it // is okay, to bind exactly as tightly. For example, compile A=B=C=D as // A=(B=(C=D)), where each paren is a level of recursion here. // The function takes ownership of the RHS. RHS = ParseRHSOfBinaryExpression(RHS, static_cast(ThisPrec + !isRightAssoc)); RHSIsInitList = false; if (RHS.isInvalid()) { // FIXME: Errors generated by the delayed typo correction should be // printed before errors from ParseRHSOfBinaryExpression, not after. Actions.CorrectDelayedTyposInExpr(LHS); if (TernaryMiddle.isUsable()) TernaryMiddle = Actions.CorrectDelayedTyposInExpr(TernaryMiddle); LHS = ExprError(); } NextTokPrec = getBinOpPrecedence(Tok.getKind(), GreaterThanIsOperator, getLangOpts().CPlusPlus11); } if (!RHS.isInvalid() && RHSIsInitList) { if (ThisPrec == prec::Assignment) { Diag(OpToken, diag::warn_cxx98_compat_generalized_initializer_lists) << Actions.getExprRange(RHS.get()); } else { Diag(OpToken, diag::err_init_list_bin_op) << /*RHS*/1 << PP.getSpelling(OpToken) << Actions.getExprRange(RHS.get()); LHS = ExprError(); } } ExprResult OrigLHS = LHS; if (!LHS.isInvalid()) { // Combine the LHS and RHS into the LHS (e.g. build AST). if (TernaryMiddle.isInvalid()) { // If we're using '>>' as an operator within a template // argument list (in C++98), suggest the addition of // parentheses so that the code remains well-formed in C++0x. if (!GreaterThanIsOperator && OpToken.is(tok::greatergreater)) SuggestParentheses(OpToken.getLocation(), diag::warn_cxx11_right_shift_in_template_arg, SourceRange(Actions.getExprRange(LHS.get()).getBegin(), Actions.getExprRange(RHS.get()).getEnd())); LHS = Actions.ActOnBinOp(getCurScope(), OpToken.getLocation(), OpToken.getKind(), LHS.get(), RHS.get()); } else { LHS = Actions.ActOnConditionalOp(OpToken.getLocation(), ColonLoc, LHS.get(), TernaryMiddle.get(), RHS.get()); } // In this case, ActOnBinOp or ActOnConditionalOp performed the // CorrectDelayedTyposInExpr check. if (!getLangOpts().CPlusPlus) continue; } // Ensure potential typos aren't left undiagnosed. if (LHS.isInvalid()) { Actions.CorrectDelayedTyposInExpr(OrigLHS); Actions.CorrectDelayedTyposInExpr(TernaryMiddle); Actions.CorrectDelayedTyposInExpr(RHS); } } } /// \brief Parse a cast-expression, or, if \p isUnaryExpression is true, /// parse a unary-expression. /// /// \p isAddressOfOperand exists because an id-expression that is the /// operand of address-of gets special treatment due to member pointers. /// ExprResult Parser::ParseCastExpression(bool isUnaryExpression, bool isAddressOfOperand, TypeCastState isTypeCast) { bool NotCastExpr; ExprResult Res = ParseCastExpression(isUnaryExpression, isAddressOfOperand, NotCastExpr, isTypeCast); if (NotCastExpr) Diag(Tok, diag::err_expected_expression); return Res; } namespace { class CastExpressionIdValidator : public CorrectionCandidateCallback { public: CastExpressionIdValidator(Token Next, bool AllowTypes, bool AllowNonTypes) : NextToken(Next), AllowNonTypes(AllowNonTypes) { WantTypeSpecifiers = WantFunctionLikeCasts = AllowTypes; } bool ValidateCandidate(const TypoCorrection &candidate) override { NamedDecl *ND = candidate.getCorrectionDecl(); if (!ND) return candidate.isKeyword(); if (isa(ND)) return WantTypeSpecifiers; if (!AllowNonTypes || !CorrectionCandidateCallback::ValidateCandidate(candidate)) return false; if (!NextToken.isOneOf(tok::equal, tok::arrow, tok::period)) return true; for (auto *C : candidate) { NamedDecl *ND = C->getUnderlyingDecl(); if (isa(ND) && !isa(ND)) return true; } return false; } private: Token NextToken; bool AllowNonTypes; }; } /// \brief Parse a cast-expression, or, if \pisUnaryExpression is true, parse /// a unary-expression. /// /// \p isAddressOfOperand exists because an id-expression that is the operand /// of address-of gets special treatment due to member pointers. NotCastExpr /// is set to true if the token is not the start of a cast-expression, and no /// diagnostic is emitted in this case and no tokens are consumed. /// /// \verbatim /// cast-expression: [C99 6.5.4] /// unary-expression /// '(' type-name ')' cast-expression /// /// unary-expression: [C99 6.5.3] /// postfix-expression /// '++' unary-expression /// '--' unary-expression /// [Coro] 'co_await' cast-expression /// unary-operator cast-expression /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' /// [C++11] 'sizeof' '...' '(' identifier ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' /// [C11] '_Alignof' '(' type-name ')' /// [C++11] 'alignof' '(' type-id ')' /// [GNU] '&&' identifier /// [C++11] 'noexcept' '(' expression ')' [C++11 5.3.7] /// [C++] new-expression /// [C++] delete-expression /// /// unary-operator: one of /// '&' '*' '+' '-' '~' '!' /// [GNU] '__extension__' '__real' '__imag' /// /// primary-expression: [C99 6.5.1] /// [C99] identifier /// [C++] id-expression /// constant /// string-literal /// [C++] boolean-literal [C++ 2.13.5] /// [C++11] 'nullptr' [C++11 2.14.7] /// [C++11] user-defined-literal /// '(' expression ')' /// [C11] generic-selection /// '__func__' [C99 6.4.2.2] /// [GNU] '__FUNCTION__' /// [MS] '__FUNCDNAME__' /// [MS] 'L__FUNCTION__' /// [GNU] '__PRETTY_FUNCTION__' /// [GNU] '(' compound-statement ')' /// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' /// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' /// [GNU] '__builtin_choose_expr' '(' assign-expr ',' assign-expr ',' /// assign-expr ')' /// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' /// [GNU] '__null' /// [OBJC] '[' objc-message-expr ']' /// [OBJC] '\@selector' '(' objc-selector-arg ')' /// [OBJC] '\@protocol' '(' identifier ')' /// [OBJC] '\@encode' '(' type-name ')' /// [OBJC] objc-string-literal /// [C++] simple-type-specifier '(' expression-list[opt] ')' [C++ 5.2.3] /// [C++11] simple-type-specifier braced-init-list [C++11 5.2.3] /// [C++] typename-specifier '(' expression-list[opt] ')' [C++ 5.2.3] /// [C++11] typename-specifier braced-init-list [C++11 5.2.3] /// [C++] 'const_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] /// [C++] 'dynamic_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] /// [C++] 'reinterpret_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] /// [C++] 'static_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] /// [C++] 'typeid' '(' expression ')' [C++ 5.2p1] /// [C++] 'typeid' '(' type-id ')' [C++ 5.2p1] /// [C++] 'this' [C++ 9.3.2] /// [G++] unary-type-trait '(' type-id ')' /// [G++] binary-type-trait '(' type-id ',' type-id ')' [TODO] /// [EMBT] array-type-trait '(' type-id ',' integer ')' /// [clang] '^' block-literal /// /// constant: [C99 6.4.4] /// integer-constant /// floating-constant /// enumeration-constant -> identifier /// character-constant /// /// id-expression: [C++ 5.1] /// unqualified-id /// qualified-id /// /// unqualified-id: [C++ 5.1] /// identifier /// operator-function-id /// conversion-function-id /// '~' class-name /// template-id /// /// new-expression: [C++ 5.3.4] /// '::'[opt] 'new' new-placement[opt] new-type-id /// new-initializer[opt] /// '::'[opt] 'new' new-placement[opt] '(' type-id ')' /// new-initializer[opt] /// /// delete-expression: [C++ 5.3.5] /// '::'[opt] 'delete' cast-expression /// '::'[opt] 'delete' '[' ']' cast-expression /// /// [GNU/Embarcadero] unary-type-trait: /// '__is_arithmetic' /// '__is_floating_point' /// '__is_integral' /// '__is_lvalue_expr' /// '__is_rvalue_expr' /// '__is_complete_type' /// '__is_void' /// '__is_array' /// '__is_function' /// '__is_reference' /// '__is_lvalue_reference' /// '__is_rvalue_reference' /// '__is_fundamental' /// '__is_object' /// '__is_scalar' /// '__is_compound' /// '__is_pointer' /// '__is_member_object_pointer' /// '__is_member_function_pointer' /// '__is_member_pointer' /// '__is_const' /// '__is_volatile' /// '__is_trivial' /// '__is_standard_layout' /// '__is_signed' /// '__is_unsigned' /// /// [GNU] unary-type-trait: /// '__has_nothrow_assign' /// '__has_nothrow_copy' /// '__has_nothrow_constructor' /// '__has_trivial_assign' [TODO] /// '__has_trivial_copy' [TODO] /// '__has_trivial_constructor' /// '__has_trivial_destructor' /// '__has_virtual_destructor' /// '__is_abstract' [TODO] /// '__is_class' /// '__is_empty' [TODO] /// '__is_enum' /// '__is_final' /// '__is_pod' /// '__is_polymorphic' /// '__is_sealed' [MS] /// '__is_trivial' /// '__is_union' /// /// [Clang] unary-type-trait: /// '__trivially_copyable' /// /// binary-type-trait: /// [GNU] '__is_base_of' /// [MS] '__is_convertible_to' /// '__is_convertible' /// '__is_same' /// /// [Embarcadero] array-type-trait: /// '__array_rank' /// '__array_extent' /// /// [Embarcadero] expression-trait: /// '__is_lvalue_expr' /// '__is_rvalue_expr' /// \endverbatim /// ExprResult Parser::ParseCastExpression(bool isUnaryExpression, bool isAddressOfOperand, bool &NotCastExpr, TypeCastState isTypeCast) { ExprResult Res; tok::TokenKind SavedKind = Tok.getKind(); NotCastExpr = false; // This handles all of cast-expression, unary-expression, postfix-expression, // and primary-expression. We handle them together like this for efficiency // and to simplify handling of an expression starting with a '(' token: which // may be one of a parenthesized expression, cast-expression, compound literal // expression, or statement expression. // // If the parsed tokens consist of a primary-expression, the cases below // break out of the switch; at the end we call ParsePostfixExpressionSuffix // to handle the postfix expression suffixes. Cases that cannot be followed // by postfix exprs should return without invoking // ParsePostfixExpressionSuffix. switch (SavedKind) { case tok::l_paren: { // If this expression is limited to being a unary-expression, the parent can // not start a cast expression. ParenParseOption ParenExprType = (isUnaryExpression && !getLangOpts().CPlusPlus) ? CompoundLiteral : CastExpr; ParsedType CastTy; SourceLocation RParenLoc; Res = ParseParenExpression(ParenExprType, false/*stopIfCastExr*/, isTypeCast == IsTypeCast, CastTy, RParenLoc); switch (ParenExprType) { case SimpleExpr: break; // Nothing else to do. case CompoundStmt: break; // Nothing else to do. case CompoundLiteral: // We parsed '(' type-name ')' '{' ... '}'. If any suffixes of // postfix-expression exist, parse them now. break; case CastExpr: // We have parsed the cast-expression and no postfix-expr pieces are // following. return Res; } break; } // primary-expression case tok::numeric_constant: // constant: integer-constant // constant: floating-constant Res = Actions.ActOnNumericConstant(Tok, /*UDLScope*/getCurScope()); ConsumeToken(); break; case tok::kw_true: case tok::kw_false: return ParseCXXBoolLiteral(); case tok::kw___objc_yes: case tok::kw___objc_no: return ParseObjCBoolLiteral(); case tok::kw_nullptr: Diag(Tok, diag::warn_cxx98_compat_nullptr); return Actions.ActOnCXXNullPtrLiteral(ConsumeToken()); case tok::annot_primary_expr: assert(Res.get() == nullptr && "Stray primary-expression annotation?"); Res = getExprAnnotation(Tok); ConsumeToken(); break; case tok::kw___super: case tok::kw_decltype: // Annotate the token and tail recurse. if (TryAnnotateTypeOrScopeToken()) return ExprError(); assert(Tok.isNot(tok::kw_decltype) && Tok.isNot(tok::kw___super)); return ParseCastExpression(isUnaryExpression, isAddressOfOperand); case tok::identifier: { // primary-expression: identifier // unqualified-id: identifier // constant: enumeration-constant // Turn a potentially qualified name into a annot_typename or // annot_cxxscope if it would be valid. This handles things like x::y, etc. if (getLangOpts().CPlusPlus) { // Avoid the unnecessary parse-time lookup in the common case // where the syntax forbids a type. const Token &Next = NextToken(); // If this identifier was reverted from a token ID, and the next token // is a parenthesis, this is likely to be a use of a type trait. Check // those tokens. if (Next.is(tok::l_paren) && Tok.is(tok::identifier) && Tok.getIdentifierInfo()->hasRevertedTokenIDToIdentifier()) { IdentifierInfo *II = Tok.getIdentifierInfo(); // Build up the mapping of revertible type traits, for future use. if (RevertibleTypeTraits.empty()) { #define RTT_JOIN(X,Y) X##Y #define REVERTIBLE_TYPE_TRAIT(Name) \ RevertibleTypeTraits[PP.getIdentifierInfo(#Name)] \ = RTT_JOIN(tok::kw_,Name) REVERTIBLE_TYPE_TRAIT(__is_abstract); REVERTIBLE_TYPE_TRAIT(__is_arithmetic); REVERTIBLE_TYPE_TRAIT(__is_array); REVERTIBLE_TYPE_TRAIT(__is_assignable); REVERTIBLE_TYPE_TRAIT(__is_base_of); REVERTIBLE_TYPE_TRAIT(__is_class); REVERTIBLE_TYPE_TRAIT(__is_complete_type); REVERTIBLE_TYPE_TRAIT(__is_compound); REVERTIBLE_TYPE_TRAIT(__is_const); REVERTIBLE_TYPE_TRAIT(__is_constructible); REVERTIBLE_TYPE_TRAIT(__is_convertible); REVERTIBLE_TYPE_TRAIT(__is_convertible_to); REVERTIBLE_TYPE_TRAIT(__is_destructible); REVERTIBLE_TYPE_TRAIT(__is_empty); REVERTIBLE_TYPE_TRAIT(__is_enum); REVERTIBLE_TYPE_TRAIT(__is_floating_point); REVERTIBLE_TYPE_TRAIT(__is_final); REVERTIBLE_TYPE_TRAIT(__is_function); REVERTIBLE_TYPE_TRAIT(__is_fundamental); REVERTIBLE_TYPE_TRAIT(__is_integral); REVERTIBLE_TYPE_TRAIT(__is_interface_class); REVERTIBLE_TYPE_TRAIT(__is_literal); REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr); REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference); REVERTIBLE_TYPE_TRAIT(__is_member_function_pointer); REVERTIBLE_TYPE_TRAIT(__is_member_object_pointer); REVERTIBLE_TYPE_TRAIT(__is_member_pointer); REVERTIBLE_TYPE_TRAIT(__is_nothrow_assignable); REVERTIBLE_TYPE_TRAIT(__is_nothrow_constructible); REVERTIBLE_TYPE_TRAIT(__is_nothrow_destructible); REVERTIBLE_TYPE_TRAIT(__is_object); REVERTIBLE_TYPE_TRAIT(__is_pod); REVERTIBLE_TYPE_TRAIT(__is_pointer); REVERTIBLE_TYPE_TRAIT(__is_polymorphic); REVERTIBLE_TYPE_TRAIT(__is_reference); REVERTIBLE_TYPE_TRAIT(__is_rvalue_expr); REVERTIBLE_TYPE_TRAIT(__is_rvalue_reference); REVERTIBLE_TYPE_TRAIT(__is_same); REVERTIBLE_TYPE_TRAIT(__is_scalar); REVERTIBLE_TYPE_TRAIT(__is_sealed); REVERTIBLE_TYPE_TRAIT(__is_signed); REVERTIBLE_TYPE_TRAIT(__is_standard_layout); REVERTIBLE_TYPE_TRAIT(__is_trivial); REVERTIBLE_TYPE_TRAIT(__is_trivially_assignable); REVERTIBLE_TYPE_TRAIT(__is_trivially_constructible); REVERTIBLE_TYPE_TRAIT(__is_trivially_copyable); REVERTIBLE_TYPE_TRAIT(__is_union); REVERTIBLE_TYPE_TRAIT(__is_unsigned); REVERTIBLE_TYPE_TRAIT(__is_void); REVERTIBLE_TYPE_TRAIT(__is_volatile); #undef REVERTIBLE_TYPE_TRAIT #undef RTT_JOIN } // If we find that this is in fact the name of a type trait, // update the token kind in place and parse again to treat it as // the appropriate kind of type trait. llvm::SmallDenseMap::iterator Known = RevertibleTypeTraits.find(II); if (Known != RevertibleTypeTraits.end()) { Tok.setKind(Known->second); return ParseCastExpression(isUnaryExpression, isAddressOfOperand, NotCastExpr, isTypeCast); } } if ((!ColonIsSacred && Next.is(tok::colon)) || Next.isOneOf(tok::coloncolon, tok::less, tok::l_paren, tok::l_brace)) { // If TryAnnotateTypeOrScopeToken annotates the token, tail recurse. if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Tok.is(tok::identifier)) return ParseCastExpression(isUnaryExpression, isAddressOfOperand); } } // Consume the identifier so that we can see if it is followed by a '(' or // '.'. IdentifierInfo &II = *Tok.getIdentifierInfo(); SourceLocation ILoc = ConsumeToken(); // Support 'Class.property' and 'super.property' notation. if (getLangOpts().ObjC1 && Tok.is(tok::period) && (Actions.getTypeName(II, ILoc, getCurScope()) || // Allow the base to be 'super' if in an objc-method. (&II == Ident_super && getCurScope()->isInObjcMethodScope()))) { ConsumeToken(); if (Tok.is(tok::code_completion) && &II != Ident_super) { Actions.CodeCompleteObjCClassPropertyRefExpr( getCurScope(), II, ILoc, ExprStatementTokLoc == ILoc); cutOffParsing(); return ExprError(); } // Allow either an identifier or the keyword 'class' (in C++). if (Tok.isNot(tok::identifier) && !(getLangOpts().CPlusPlus && Tok.is(tok::kw_class))) { Diag(Tok, diag::err_expected_property_name); return ExprError(); } IdentifierInfo &PropertyName = *Tok.getIdentifierInfo(); SourceLocation PropertyLoc = ConsumeToken(); Res = Actions.ActOnClassPropertyRefExpr(II, PropertyName, ILoc, PropertyLoc); break; } // In an Objective-C method, if we have "super" followed by an identifier, // the token sequence is ill-formed. However, if there's a ':' or ']' after // that identifier, this is probably a message send with a missing open // bracket. Treat it as such. if (getLangOpts().ObjC1 && &II == Ident_super && !InMessageExpression && getCurScope()->isInObjcMethodScope() && ((Tok.is(tok::identifier) && (NextToken().is(tok::colon) || NextToken().is(tok::r_square))) || Tok.is(tok::code_completion))) { Res = ParseObjCMessageExpressionBody(SourceLocation(), ILoc, nullptr, nullptr); break; } // If we have an Objective-C class name followed by an identifier // and either ':' or ']', this is an Objective-C class message // send that's missing the opening '['. Recovery // appropriately. Also take this path if we're performing code // completion after an Objective-C class name. if (getLangOpts().ObjC1 && ((Tok.is(tok::identifier) && !InMessageExpression) || Tok.is(tok::code_completion))) { const Token& Next = NextToken(); if (Tok.is(tok::code_completion) || Next.is(tok::colon) || Next.is(tok::r_square)) if (ParsedType Typ = Actions.getTypeName(II, ILoc, getCurScope())) if (Typ.get()->isObjCObjectOrInterfaceType()) { // Fake up a Declarator to use with ActOnTypeName. DeclSpec DS(AttrFactory); DS.SetRangeStart(ILoc); DS.SetRangeEnd(ILoc); const char *PrevSpec = nullptr; unsigned DiagID; DS.SetTypeSpecType(TST_typename, ILoc, PrevSpec, DiagID, Typ, Actions.getASTContext().getPrintingPolicy()); Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); TypeResult Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo); if (Ty.isInvalid()) break; Res = ParseObjCMessageExpressionBody(SourceLocation(), SourceLocation(), Ty.get(), nullptr); break; } } // Make sure to pass down the right value for isAddressOfOperand. if (isAddressOfOperand && isPostfixExpressionSuffixStart()) isAddressOfOperand = false; // Function designators are allowed to be undeclared (C99 6.5.1p2), so we // need to know whether or not this identifier is a function designator or // not. UnqualifiedId Name; CXXScopeSpec ScopeSpec; SourceLocation TemplateKWLoc; Token Replacement; auto Validator = llvm::make_unique( Tok, isTypeCast != NotTypeCast, isTypeCast != IsTypeCast); Validator->IsAddressOfOperand = isAddressOfOperand; if (Tok.isOneOf(tok::periodstar, tok::arrowstar)) { Validator->WantExpressionKeywords = false; Validator->WantRemainingKeywords = false; } else { Validator->WantRemainingKeywords = Tok.isNot(tok::r_paren); } Name.setIdentifier(&II, ILoc); Res = Actions.ActOnIdExpression( getCurScope(), ScopeSpec, TemplateKWLoc, Name, Tok.is(tok::l_paren), isAddressOfOperand, std::move(Validator), /*IsInlineAsmIdentifier=*/false, Tok.is(tok::r_paren) ? nullptr : &Replacement); if (!Res.isInvalid() && !Res.get()) { UnconsumeToken(Replacement); return ParseCastExpression(isUnaryExpression, isAddressOfOperand, NotCastExpr, isTypeCast); } break; } case tok::char_constant: // constant: character-constant case tok::wide_char_constant: case tok::utf8_char_constant: case tok::utf16_char_constant: case tok::utf32_char_constant: Res = Actions.ActOnCharacterConstant(Tok, /*UDLScope*/getCurScope()); ConsumeToken(); break; case tok::kw___func__: // primary-expression: __func__ [C99 6.4.2.2] case tok::kw___FUNCTION__: // primary-expression: __FUNCTION__ [GNU] case tok::kw___FUNCDNAME__: // primary-expression: __FUNCDNAME__ [MS] case tok::kw___FUNCSIG__: // primary-expression: __FUNCSIG__ [MS] case tok::kw_L__FUNCTION__: // primary-expression: L__FUNCTION__ [MS] case tok::kw___PRETTY_FUNCTION__: // primary-expression: __P..Y_F..N__ [GNU] Res = Actions.ActOnPredefinedExpr(Tok.getLocation(), SavedKind); ConsumeToken(); break; case tok::string_literal: // primary-expression: string-literal case tok::wide_string_literal: case tok::utf8_string_literal: case tok::utf16_string_literal: case tok::utf32_string_literal: Res = ParseStringLiteralExpression(true); break; case tok::kw__Generic: // primary-expression: generic-selection [C11 6.5.1] Res = ParseGenericSelectionExpression(); break; case tok::kw___builtin_available: return ParseAvailabilityCheckExpr(Tok.getLocation()); case tok::kw___builtin_va_arg: case tok::kw___builtin_offsetof: case tok::kw___builtin_choose_expr: case tok::kw___builtin_astype: // primary-expression: [OCL] as_type() case tok::kw___builtin_convertvector: return ParseBuiltinPrimaryExpression(); case tok::kw___null: return Actions.ActOnGNUNullExpr(ConsumeToken()); case tok::plusplus: // unary-expression: '++' unary-expression [C99] case tok::minusminus: { // unary-expression: '--' unary-expression [C99] // C++ [expr.unary] has: // unary-expression: // ++ cast-expression // -- cast-expression Token SavedTok = Tok; ConsumeToken(); // One special case is implicitly handled here: if the preceding tokens are // an ambiguous cast expression, such as "(T())++", then we recurse to // determine whether the '++' is prefix or postfix. Res = ParseCastExpression(!getLangOpts().CPlusPlus, /*isAddressOfOperand*/false, NotCastExpr, NotTypeCast); if (NotCastExpr) { // If we return with NotCastExpr = true, we must not consume any tokens, // so put the token back where we found it. assert(Res.isInvalid()); UnconsumeToken(SavedTok); return ExprError(); } if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedTok.getLocation(), SavedKind, Res.get()); return Res; } case tok::amp: { // unary-expression: '&' cast-expression // Special treatment because of member pointers SourceLocation SavedLoc = ConsumeToken(); Res = ParseCastExpression(false, true); if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return Res; } case tok::star: // unary-expression: '*' cast-expression case tok::plus: // unary-expression: '+' cast-expression case tok::minus: // unary-expression: '-' cast-expression case tok::tilde: // unary-expression: '~' cast-expression case tok::exclaim: // unary-expression: '!' cast-expression case tok::kw___real: // unary-expression: '__real' cast-expression [GNU] case tok::kw___imag: { // unary-expression: '__imag' cast-expression [GNU] SourceLocation SavedLoc = ConsumeToken(); Res = ParseCastExpression(false); if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return Res; } case tok::kw_co_await: { // unary-expression: 'co_await' cast-expression SourceLocation CoawaitLoc = ConsumeToken(); Res = ParseCastExpression(false); if (!Res.isInvalid()) Res = Actions.ActOnCoawaitExpr(getCurScope(), CoawaitLoc, Res.get()); return Res; } case tok::kw___extension__:{//unary-expression:'__extension__' cast-expr [GNU] // __extension__ silences extension warnings in the subexpression. ExtensionRAIIObject O(Diags); // Use RAII to do this. SourceLocation SavedLoc = ConsumeToken(); Res = ParseCastExpression(false); if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return Res; } case tok::kw__Alignof: // unary-expression: '_Alignof' '(' type-name ')' if (!getLangOpts().C11) Diag(Tok, diag::ext_c11_alignment) << Tok.getName(); // fallthrough case tok::kw_alignof: // unary-expression: 'alignof' '(' type-id ')' case tok::kw___alignof: // unary-expression: '__alignof' unary-expression // unary-expression: '__alignof' '(' type-name ')' case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression // unary-expression: 'sizeof' '(' type-name ')' case tok::kw_vec_step: // unary-expression: OpenCL 'vec_step' expression // unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')' case tok::kw___builtin_omp_required_simd_align: return ParseUnaryExprOrTypeTraitExpression(); case tok::ampamp: { // unary-expression: '&&' identifier SourceLocation AmpAmpLoc = ConsumeToken(); if (Tok.isNot(tok::identifier)) return ExprError(Diag(Tok, diag::err_expected) << tok::identifier); if (getCurScope()->getFnParent() == nullptr) return ExprError(Diag(Tok, diag::err_address_of_label_outside_fn)); Diag(AmpAmpLoc, diag::ext_gnu_address_of_label); LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(), Tok.getLocation()); Res = Actions.ActOnAddrLabel(AmpAmpLoc, Tok.getLocation(), LD); ConsumeToken(); return Res; } case tok::kw_const_cast: case tok::kw_dynamic_cast: case tok::kw_reinterpret_cast: case tok::kw_static_cast: Res = ParseCXXCasts(); break; case tok::kw_typeid: Res = ParseCXXTypeid(); break; case tok::kw___uuidof: Res = ParseCXXUuidof(); break; case tok::kw_this: Res = ParseCXXThis(); break; case tok::annot_typename: if (isStartOfObjCClassMessageMissingOpenBracket()) { ParsedType Type = getTypeAnnotation(Tok); // Fake up a Declarator to use with ActOnTypeName. DeclSpec DS(AttrFactory); DS.SetRangeStart(Tok.getLocation()); DS.SetRangeEnd(Tok.getLastLoc()); const char *PrevSpec = nullptr; unsigned DiagID; DS.SetTypeSpecType(TST_typename, Tok.getAnnotationEndLoc(), PrevSpec, DiagID, Type, Actions.getASTContext().getPrintingPolicy()); Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); TypeResult Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo); if (Ty.isInvalid()) break; ConsumeToken(); Res = ParseObjCMessageExpressionBody(SourceLocation(), SourceLocation(), Ty.get(), nullptr); break; } // Fall through case tok::annot_decltype: case tok::kw_char: case tok::kw_wchar_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_bool: case tok::kw_short: case tok::kw_int: case tok::kw_long: case tok::kw___int64: case tok::kw___int128: case tok::kw_signed: case tok::kw_unsigned: case tok::kw_half: case tok::kw_float: case tok::kw_double: case tok::kw___float128: case tok::kw_void: case tok::kw_typename: case tok::kw_typeof: case tok::kw___vector: #define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t: #include "clang/Basic/OpenCLImageTypes.def" { if (!getLangOpts().CPlusPlus) { Diag(Tok, diag::err_expected_expression); return ExprError(); } if (SavedKind == tok::kw_typename) { // postfix-expression: typename-specifier '(' expression-list[opt] ')' // typename-specifier braced-init-list if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) // We are trying to parse a simple-type-specifier but might not get such // a token after error recovery. return ExprError(); } // postfix-expression: simple-type-specifier '(' expression-list[opt] ')' // simple-type-specifier braced-init-list // DeclSpec DS(AttrFactory); ParseCXXSimpleTypeSpecifier(DS); if (Tok.isNot(tok::l_paren) && (!getLangOpts().CPlusPlus11 || Tok.isNot(tok::l_brace))) return ExprError(Diag(Tok, diag::err_expected_lparen_after_type) << DS.getSourceRange()); if (Tok.is(tok::l_brace)) Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Res = ParseCXXTypeConstructExpression(DS); break; } case tok::annot_cxxscope: { // [C++] id-expression: qualified-id // If TryAnnotateTypeOrScopeToken annotates the token, tail recurse. // (We can end up in this situation after tentative parsing.) if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Tok.is(tok::annot_cxxscope)) return ParseCastExpression(isUnaryExpression, isAddressOfOperand, NotCastExpr, isTypeCast); Token Next = NextToken(); if (Next.is(tok::annot_template_id)) { TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Next); if (TemplateId->Kind == TNK_Type_template) { // We have a qualified template-id that we know refers to a // type, translate it into a type and continue parsing as a // cast expression. CXXScopeSpec SS; ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false); AnnotateTemplateIdTokenAsType(); return ParseCastExpression(isUnaryExpression, isAddressOfOperand, NotCastExpr, isTypeCast); } } // Parse as an id-expression. Res = ParseCXXIdExpression(isAddressOfOperand); break; } case tok::annot_template_id: { // [C++] template-id TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok); if (TemplateId->Kind == TNK_Type_template) { // We have a template-id that we know refers to a type, // translate it into a type and continue parsing as a cast // expression. AnnotateTemplateIdTokenAsType(); return ParseCastExpression(isUnaryExpression, isAddressOfOperand, NotCastExpr, isTypeCast); } // Fall through to treat the template-id as an id-expression. } case tok::kw_operator: // [C++] id-expression: operator/conversion-function-id Res = ParseCXXIdExpression(isAddressOfOperand); break; case tok::coloncolon: { // ::foo::bar -> global qualified name etc. If TryAnnotateTypeOrScopeToken // annotates the token, tail recurse. if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Tok.is(tok::coloncolon)) return ParseCastExpression(isUnaryExpression, isAddressOfOperand); // ::new -> [C++] new-expression // ::delete -> [C++] delete-expression SourceLocation CCLoc = ConsumeToken(); if (Tok.is(tok::kw_new)) return ParseCXXNewExpression(true, CCLoc); if (Tok.is(tok::kw_delete)) return ParseCXXDeleteExpression(true, CCLoc); // This is not a type name or scope specifier, it is an invalid expression. Diag(CCLoc, diag::err_expected_expression); return ExprError(); } case tok::kw_new: // [C++] new-expression return ParseCXXNewExpression(false, Tok.getLocation()); case tok::kw_delete: // [C++] delete-expression return ParseCXXDeleteExpression(false, Tok.getLocation()); case tok::kw_noexcept: { // [C++0x] 'noexcept' '(' expression ')' Diag(Tok, diag::warn_cxx98_compat_noexcept_expr); SourceLocation KeyLoc = ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); if (T.expectAndConsume(diag::err_expected_lparen_after, "noexcept")) return ExprError(); // C++11 [expr.unary.noexcept]p1: // The noexcept operator determines whether the evaluation of its operand, // which is an unevaluated operand, can throw an exception. EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated); ExprResult Result = ParseExpression(); T.consumeClose(); if (!Result.isInvalid()) Result = Actions.ActOnNoexceptExpr(KeyLoc, T.getOpenLocation(), Result.get(), T.getCloseLocation()); return Result; } #define TYPE_TRAIT(N,Spelling,K) \ case tok::kw_##Spelling: #include "clang/Basic/TokenKinds.def" return ParseTypeTrait(); case tok::kw___array_rank: case tok::kw___array_extent: return ParseArrayTypeTrait(); case tok::kw___is_lvalue_expr: case tok::kw___is_rvalue_expr: return ParseExpressionTrait(); case tok::at: { SourceLocation AtLoc = ConsumeToken(); return ParseObjCAtExpression(AtLoc); } case tok::caret: Res = ParseBlockLiteralExpression(); break; case tok::code_completion: { Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression); cutOffParsing(); return ExprError(); } case tok::l_square: if (getLangOpts().CPlusPlus11) { if (getLangOpts().ObjC1) { // C++11 lambda expressions and Objective-C message sends both start with a // square bracket. There are three possibilities here: // we have a valid lambda expression, we have an invalid lambda // expression, or we have something that doesn't appear to be a lambda. // If we're in the last case, we fall back to ParseObjCMessageExpression. Res = TryParseLambdaExpression(); if (!Res.isInvalid() && !Res.get()) Res = ParseObjCMessageExpression(); break; } Res = ParseLambdaExpression(); break; } if (getLangOpts().ObjC1) { Res = ParseObjCMessageExpression(); break; } // FALL THROUGH. default: NotCastExpr = true; return ExprError(); } // Check to see whether Res is a function designator only. If it is and we // are compiling for OpenCL, we need to return an error as this implies // that the address of the function is being taken, which is illegal in CL. // These can be followed by postfix-expr pieces. Res = ParsePostfixExpressionSuffix(Res); if (getLangOpts().OpenCL) if (Expr *PostfixExpr = Res.get()) { QualType Ty = PostfixExpr->getType(); if (!Ty.isNull() && Ty->isFunctionType()) { Diag(PostfixExpr->getExprLoc(), diag::err_opencl_taking_function_address_parser); return ExprError(); } } return Res; } /// \brief Once the leading part of a postfix-expression is parsed, this /// method parses any suffixes that apply. /// /// \verbatim /// postfix-expression: [C99 6.5.2] /// primary-expression /// postfix-expression '[' expression ']' /// postfix-expression '[' braced-init-list ']' /// postfix-expression '(' argument-expression-list[opt] ')' /// postfix-expression '.' identifier /// postfix-expression '->' identifier /// postfix-expression '++' /// postfix-expression '--' /// '(' type-name ')' '{' initializer-list '}' /// '(' type-name ')' '{' initializer-list ',' '}' /// /// argument-expression-list: [C99 6.5.2] /// argument-expression ...[opt] /// argument-expression-list ',' assignment-expression ...[opt] /// \endverbatim ExprResult Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { // Now that the primary-expression piece of the postfix-expression has been // parsed, see if there are any postfix-expression pieces here. SourceLocation Loc; while (1) { switch (Tok.getKind()) { case tok::code_completion: if (InMessageExpression) return LHS; Actions.CodeCompletePostfixExpression(getCurScope(), LHS); cutOffParsing(); return ExprError(); case tok::identifier: // If we see identifier: after an expression, and we're not already in a // message send, then this is probably a message send with a missing // opening bracket '['. if (getLangOpts().ObjC1 && !InMessageExpression && (NextToken().is(tok::colon) || NextToken().is(tok::r_square))) { LHS = ParseObjCMessageExpressionBody(SourceLocation(), SourceLocation(), nullptr, LHS.get()); break; } // Fall through; this isn't a message send. default: // Not a postfix-expression suffix. return LHS; case tok::l_square: { // postfix-expression: p-e '[' expression ']' // If we have a array postfix expression that starts on a new line and // Objective-C is enabled, it is highly likely that the user forgot a // semicolon after the base expression and that the array postfix-expr is // actually another message send. In this case, do some look-ahead to see // if the contents of the square brackets are obviously not a valid // expression and recover by pretending there is no suffix. if (getLangOpts().ObjC1 && Tok.isAtStartOfLine() && isSimpleObjCMessageExpression()) return LHS; // Reject array indices starting with a lambda-expression. '[[' is // reserved for attributes. if (CheckProhibitedCXX11Attribute()) { (void)Actions.CorrectDelayedTyposInExpr(LHS); return ExprError(); } BalancedDelimiterTracker T(*this, tok::l_square); T.consumeOpen(); Loc = T.getOpenLocation(); ExprResult Idx, Length; SourceLocation ColonLoc; if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Idx = ParseBraceInitializer(); } else if (getLangOpts().OpenMP) { ColonProtectionRAIIObject RAII(*this); // Parse [: or [ expr or [ expr : if (!Tok.is(tok::colon)) { // [ expr Idx = ParseExpression(); } if (Tok.is(tok::colon)) { // Consume ':' ColonLoc = ConsumeToken(); if (Tok.isNot(tok::r_square)) Length = ParseExpression(); } } else Idx = ParseExpression(); SourceLocation RLoc = Tok.getLocation(); ExprResult OrigLHS = LHS; if (!LHS.isInvalid() && !Idx.isInvalid() && !Length.isInvalid() && Tok.is(tok::r_square)) { if (ColonLoc.isValid()) { LHS = Actions.ActOnOMPArraySectionExpr(LHS.get(), Loc, Idx.get(), ColonLoc, Length.get(), RLoc); } else { LHS = Actions.ActOnArraySubscriptExpr(getCurScope(), LHS.get(), Loc, Idx.get(), RLoc); } } else { LHS = ExprError(); } if (LHS.isInvalid()) { (void)Actions.CorrectDelayedTyposInExpr(OrigLHS); (void)Actions.CorrectDelayedTyposInExpr(Idx); (void)Actions.CorrectDelayedTyposInExpr(Length); LHS = ExprError(); Idx = ExprError(); } // Match the ']'. T.consumeClose(); break; } case tok::l_paren: // p-e: p-e '(' argument-expression-list[opt] ')' case tok::lesslessless: { // p-e: p-e '<<<' argument-expression-list '>>>' // '(' argument-expression-list[opt] ')' tok::TokenKind OpKind = Tok.getKind(); InMessageExpressionRAIIObject InMessage(*this, false); Expr *ExecConfig = nullptr; BalancedDelimiterTracker PT(*this, tok::l_paren); if (OpKind == tok::lesslessless) { ExprVector ExecConfigExprs; CommaLocsTy ExecConfigCommaLocs; SourceLocation OpenLoc = ConsumeToken(); if (ParseSimpleExpressionList(ExecConfigExprs, ExecConfigCommaLocs)) { (void)Actions.CorrectDelayedTyposInExpr(LHS); LHS = ExprError(); } SourceLocation CloseLoc; if (TryConsumeToken(tok::greatergreatergreater, CloseLoc)) { } else if (LHS.isInvalid()) { SkipUntil(tok::greatergreatergreater, StopAtSemi); } else { // There was an error closing the brackets Diag(Tok, diag::err_expected) << tok::greatergreatergreater; Diag(OpenLoc, diag::note_matching) << tok::lesslessless; SkipUntil(tok::greatergreatergreater, StopAtSemi); LHS = ExprError(); } if (!LHS.isInvalid()) { if (ExpectAndConsume(tok::l_paren)) LHS = ExprError(); else Loc = PrevTokLocation; } if (!LHS.isInvalid()) { ExprResult ECResult = Actions.ActOnCUDAExecConfigExpr(getCurScope(), OpenLoc, ExecConfigExprs, CloseLoc); if (ECResult.isInvalid()) LHS = ExprError(); else ExecConfig = ECResult.get(); } } else { PT.consumeOpen(); Loc = PT.getOpenLocation(); } ExprVector ArgExprs; CommaLocsTy CommaLocs; if (Tok.is(tok::code_completion)) { Actions.CodeCompleteCall(getCurScope(), LHS.get(), None); cutOffParsing(); return ExprError(); } if (OpKind == tok::l_paren || !LHS.isInvalid()) { if (Tok.isNot(tok::r_paren)) { if (ParseExpressionList(ArgExprs, CommaLocs, [&] { Actions.CodeCompleteCall(getCurScope(), LHS.get(), ArgExprs); })) { (void)Actions.CorrectDelayedTyposInExpr(LHS); LHS = ExprError(); } else if (LHS.isInvalid()) { for (auto &E : ArgExprs) Actions.CorrectDelayedTyposInExpr(E); } } } // Match the ')'. if (LHS.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); } else if (Tok.isNot(tok::r_paren)) { bool HadDelayedTypo = false; if (Actions.CorrectDelayedTyposInExpr(LHS).get() != LHS.get()) HadDelayedTypo = true; for (auto &E : ArgExprs) if (Actions.CorrectDelayedTyposInExpr(E).get() != E) HadDelayedTypo = true; // If there were delayed typos in the LHS or ArgExprs, call SkipUntil // instead of PT.consumeClose() to avoid emitting extra diagnostics for // the unmatched l_paren. if (HadDelayedTypo) SkipUntil(tok::r_paren, StopAtSemi); else PT.consumeClose(); LHS = ExprError(); } else { assert((ArgExprs.size() == 0 || ArgExprs.size()-1 == CommaLocs.size())&& "Unexpected number of commas!"); LHS = Actions.ActOnCallExpr(getCurScope(), LHS.get(), Loc, ArgExprs, Tok.getLocation(), ExecConfig); PT.consumeClose(); } break; } case tok::arrow: case tok::period: { // postfix-expression: p-e '->' template[opt] id-expression // postfix-expression: p-e '.' template[opt] id-expression tok::TokenKind OpKind = Tok.getKind(); SourceLocation OpLoc = ConsumeToken(); // Eat the "." or "->" token. CXXScopeSpec SS; ParsedType ObjectType; bool MayBePseudoDestructor = false; if (getLangOpts().CPlusPlus && !LHS.isInvalid()) { Expr *Base = LHS.get(); const Type* BaseType = Base->getType().getTypePtrOrNull(); if (BaseType && Tok.is(tok::l_paren) && (BaseType->isFunctionType() || BaseType->isSpecificPlaceholderType(BuiltinType::BoundMember))) { Diag(OpLoc, diag::err_function_is_not_record) << OpKind << Base->getSourceRange() << FixItHint::CreateRemoval(OpLoc); return ParsePostfixExpressionSuffix(Base); } LHS = Actions.ActOnStartCXXMemberReference(getCurScope(), Base, OpLoc, OpKind, ObjectType, MayBePseudoDestructor); if (LHS.isInvalid()) break; ParseOptionalCXXScopeSpecifier(SS, ObjectType, /*EnteringContext=*/false, &MayBePseudoDestructor); if (SS.isNotEmpty()) ObjectType = nullptr; } if (Tok.is(tok::code_completion)) { // Code completion for a member access expression. if (Expr *Base = LHS.get()) Actions.CodeCompleteMemberReferenceExpr( getCurScope(), Base, OpLoc, OpKind == tok::arrow, ExprStatementTokLoc == Base->getLocStart()); cutOffParsing(); return ExprError(); } if (MayBePseudoDestructor && !LHS.isInvalid()) { LHS = ParseCXXPseudoDestructor(LHS.get(), OpLoc, OpKind, SS, ObjectType); break; } // Either the action has told us that this cannot be a // pseudo-destructor expression (based on the type of base // expression), or we didn't see a '~' in the right place. We // can still parse a destructor name here, but in that case it // names a real destructor. // Allow explicit constructor calls in Microsoft mode. // FIXME: Add support for explicit call of template constructor. SourceLocation TemplateKWLoc; UnqualifiedId Name; if (getLangOpts().ObjC2 && OpKind == tok::period && Tok.is(tok::kw_class)) { // Objective-C++: // After a '.' in a member access expression, treat the keyword // 'class' as if it were an identifier. // // This hack allows property access to the 'class' method because it is // such a common method name. For other C++ keywords that are // Objective-C method names, one must use the message send syntax. IdentifierInfo *Id = Tok.getIdentifierInfo(); SourceLocation Loc = ConsumeToken(); Name.setIdentifier(Id, Loc); } else if (ParseUnqualifiedId(SS, /*EnteringContext=*/false, /*AllowDestructorName=*/true, /*AllowConstructorName=*/ getLangOpts().MicrosoftExt, ObjectType, TemplateKWLoc, Name)) { (void)Actions.CorrectDelayedTyposInExpr(LHS); LHS = ExprError(); } if (!LHS.isInvalid()) LHS = Actions.ActOnMemberAccessExpr(getCurScope(), LHS.get(), OpLoc, OpKind, SS, TemplateKWLoc, Name, CurParsedObjCImpl ? CurParsedObjCImpl->Dcl : nullptr); break; } case tok::plusplus: // postfix-expression: postfix-expression '++' case tok::minusminus: // postfix-expression: postfix-expression '--' if (!LHS.isInvalid()) { LHS = Actions.ActOnPostfixUnaryOp(getCurScope(), Tok.getLocation(), Tok.getKind(), LHS.get()); } ConsumeToken(); break; } } } /// ParseExprAfterUnaryExprOrTypeTrait - We parsed a typeof/sizeof/alignof/ /// vec_step and we are at the start of an expression or a parenthesized /// type-id. OpTok is the operand token (typeof/sizeof/alignof). Returns the /// expression (isCastExpr == false) or the type (isCastExpr == true). /// /// \verbatim /// unary-expression: [C99 6.5.3] /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' /// [C11] '_Alignof' '(' type-name ')' /// [C++0x] 'alignof' '(' type-id ')' /// /// [GNU] typeof-specifier: /// typeof ( expressions ) /// typeof ( type-name ) /// [GNU/C++] typeof unary-expression /// /// [OpenCL 1.1 6.11.12] vec_step built-in function: /// vec_step ( expressions ) /// vec_step ( type-name ) /// \endverbatim ExprResult Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, bool &isCastExpr, ParsedType &CastTy, SourceRange &CastRange) { assert(OpTok.isOneOf(tok::kw_typeof, tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step, tok::kw___builtin_omp_required_simd_align) && "Not a typeof/sizeof/alignof/vec_step expression!"); ExprResult Operand; // If the operand doesn't start with an '(', it must be an expression. if (Tok.isNot(tok::l_paren)) { // If construct allows a form without parenthesis, user may forget to put // pathenthesis around type name. if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof)) { if (isTypeIdUnambiguously()) { DeclSpec DS(AttrFactory); ParseSpecifierQualifierList(DS); Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); ParseDeclarator(DeclaratorInfo); SourceLocation LParenLoc = PP.getLocForEndOfToken(OpTok.getLocation()); SourceLocation RParenLoc = PP.getLocForEndOfToken(PrevTokLocation); Diag(LParenLoc, diag::err_expected_parentheses_around_typename) << OpTok.getName() << FixItHint::CreateInsertion(LParenLoc, "(") << FixItHint::CreateInsertion(RParenLoc, ")"); isCastExpr = true; return ExprEmpty(); } } isCastExpr = false; if (OpTok.is(tok::kw_typeof) && !getLangOpts().CPlusPlus) { Diag(Tok, diag::err_expected_after) << OpTok.getIdentifierInfo() << tok::l_paren; return ExprError(); } Operand = ParseCastExpression(true/*isUnaryExpression*/); } else { // If it starts with a '(', we know that it is either a parenthesized // type-name, or it is a unary-expression that starts with a compound // literal, or starts with a primary-expression that is a parenthesized // expression. ParenParseOption ExprType = CastExpr; SourceLocation LParenLoc = Tok.getLocation(), RParenLoc; Operand = ParseParenExpression(ExprType, true/*stopIfCastExpr*/, false, CastTy, RParenLoc); CastRange = SourceRange(LParenLoc, RParenLoc); // If ParseParenExpression parsed a '(typename)' sequence only, then this is // a type. if (ExprType == CastExpr) { isCastExpr = true; return ExprEmpty(); } if (getLangOpts().CPlusPlus || OpTok.isNot(tok::kw_typeof)) { // GNU typeof in C requires the expression to be parenthesized. Not so for // sizeof/alignof or in C++. Therefore, the parenthesized expression is // the start of a unary-expression, but doesn't include any postfix // pieces. Parse these now if present. if (!Operand.isInvalid()) Operand = ParsePostfixExpressionSuffix(Operand.get()); } } // If we get here, the operand to the typeof/sizeof/alignof was an expresion. isCastExpr = false; return Operand; } /// \brief Parse a sizeof or alignof expression. /// /// \verbatim /// unary-expression: [C99 6.5.3] /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' /// [C++11] 'sizeof' '...' '(' identifier ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' /// [C11] '_Alignof' '(' type-name ')' /// [C++11] 'alignof' '(' type-id ')' /// \endverbatim ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step, tok::kw___builtin_omp_required_simd_align) && "Not a sizeof/alignof/vec_step expression!"); Token OpTok = Tok; ConsumeToken(); // [C++11] 'sizeof' '...' '(' identifier ')' if (Tok.is(tok::ellipsis) && OpTok.is(tok::kw_sizeof)) { SourceLocation EllipsisLoc = ConsumeToken(); SourceLocation LParenLoc, RParenLoc; IdentifierInfo *Name = nullptr; SourceLocation NameLoc; if (Tok.is(tok::l_paren)) { BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); LParenLoc = T.getOpenLocation(); if (Tok.is(tok::identifier)) { Name = Tok.getIdentifierInfo(); NameLoc = ConsumeToken(); T.consumeClose(); RParenLoc = T.getCloseLocation(); if (RParenLoc.isInvalid()) RParenLoc = PP.getLocForEndOfToken(NameLoc); } else { Diag(Tok, diag::err_expected_parameter_pack); SkipUntil(tok::r_paren, StopAtSemi); } } else if (Tok.is(tok::identifier)) { Name = Tok.getIdentifierInfo(); NameLoc = ConsumeToken(); LParenLoc = PP.getLocForEndOfToken(EllipsisLoc); RParenLoc = PP.getLocForEndOfToken(NameLoc); Diag(LParenLoc, diag::err_paren_sizeof_parameter_pack) << Name << FixItHint::CreateInsertion(LParenLoc, "(") << FixItHint::CreateInsertion(RParenLoc, ")"); } else { Diag(Tok, diag::err_sizeof_parameter_pack); } if (!Name) return ExprError(); EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated, Sema::ReuseLambdaContextDecl); return Actions.ActOnSizeofParameterPackExpr(getCurScope(), OpTok.getLocation(), *Name, NameLoc, RParenLoc); } if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof)) Diag(OpTok, diag::warn_cxx98_compat_alignof); EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated, Sema::ReuseLambdaContextDecl); bool isCastExpr; ParsedType CastTy; SourceRange CastRange; ExprResult Operand = ParseExprAfterUnaryExprOrTypeTrait(OpTok, isCastExpr, CastTy, CastRange); UnaryExprOrTypeTrait ExprKind = UETT_SizeOf; if (OpTok.isOneOf(tok::kw_alignof, tok::kw___alignof, tok::kw__Alignof)) ExprKind = UETT_AlignOf; else if (OpTok.is(tok::kw_vec_step)) ExprKind = UETT_VecStep; else if (OpTok.is(tok::kw___builtin_omp_required_simd_align)) ExprKind = UETT_OpenMPRequiredSimdAlign; if (isCastExpr) return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(), ExprKind, /*isType=*/true, CastTy.getAsOpaquePtr(), CastRange); if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof)) Diag(OpTok, diag::ext_alignof_expr) << OpTok.getIdentifierInfo(); // If we get here, the operand to the sizeof/alignof was an expresion. if (!Operand.isInvalid()) Operand = Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(), ExprKind, /*isType=*/false, Operand.get(), CastRange); return Operand; } /// ParseBuiltinPrimaryExpression /// /// \verbatim /// primary-expression: [C99 6.5.1] /// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' /// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' /// [GNU] '__builtin_choose_expr' '(' assign-expr ',' assign-expr ',' /// assign-expr ')' /// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' /// [OCL] '__builtin_astype' '(' assignment-expression ',' type-name ')' /// /// [GNU] offsetof-member-designator: /// [GNU] identifier /// [GNU] offsetof-member-designator '.' identifier /// [GNU] offsetof-member-designator '[' expression ']' /// \endverbatim ExprResult Parser::ParseBuiltinPrimaryExpression() { ExprResult Res; const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo(); tok::TokenKind T = Tok.getKind(); SourceLocation StartLoc = ConsumeToken(); // Eat the builtin identifier. // All of these start with an open paren. if (Tok.isNot(tok::l_paren)) return ExprError(Diag(Tok, diag::err_expected_after) << BuiltinII << tok::l_paren); BalancedDelimiterTracker PT(*this, tok::l_paren); PT.consumeOpen(); // TODO: Build AST. switch (T) { default: llvm_unreachable("Not a builtin primary expression!"); case tok::kw___builtin_va_arg: { ExprResult Expr(ParseAssignmentExpression()); if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); Expr = ExprError(); } TypeResult Ty = ParseTypeName(); if (Tok.isNot(tok::r_paren)) { Diag(Tok, diag::err_expected) << tok::r_paren; Expr = ExprError(); } if (Expr.isInvalid() || Ty.isInvalid()) Res = ExprError(); else Res = Actions.ActOnVAArg(StartLoc, Expr.get(), Ty.get(), ConsumeParen()); break; } case tok::kw___builtin_offsetof: { SourceLocation TypeLoc = Tok.getLocation(); TypeResult Ty = ParseTypeName(); if (Ty.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } // We must have at least one identifier here. if (Tok.isNot(tok::identifier)) { Diag(Tok, diag::err_expected) << tok::identifier; SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } // Keep track of the various subcomponents we see. SmallVector Comps; Comps.push_back(Sema::OffsetOfComponent()); Comps.back().isBrackets = false; Comps.back().U.IdentInfo = Tok.getIdentifierInfo(); Comps.back().LocStart = Comps.back().LocEnd = ConsumeToken(); // FIXME: This loop leaks the index expressions on error. while (1) { if (Tok.is(tok::period)) { // offsetof-member-designator: offsetof-member-designator '.' identifier Comps.push_back(Sema::OffsetOfComponent()); Comps.back().isBrackets = false; Comps.back().LocStart = ConsumeToken(); if (Tok.isNot(tok::identifier)) { Diag(Tok, diag::err_expected) << tok::identifier; SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } Comps.back().U.IdentInfo = Tok.getIdentifierInfo(); Comps.back().LocEnd = ConsumeToken(); } else if (Tok.is(tok::l_square)) { if (CheckProhibitedCXX11Attribute()) return ExprError(); // offsetof-member-designator: offsetof-member-design '[' expression ']' Comps.push_back(Sema::OffsetOfComponent()); Comps.back().isBrackets = true; BalancedDelimiterTracker ST(*this, tok::l_square); ST.consumeOpen(); Comps.back().LocStart = ST.getOpenLocation(); Res = ParseExpression(); if (Res.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return Res; } Comps.back().U.E = Res.get(); ST.consumeClose(); Comps.back().LocEnd = ST.getCloseLocation(); } else { if (Tok.isNot(tok::r_paren)) { PT.consumeClose(); Res = ExprError(); } else if (Ty.isInvalid()) { Res = ExprError(); } else { PT.consumeClose(); Res = Actions.ActOnBuiltinOffsetOf(getCurScope(), StartLoc, TypeLoc, Ty.get(), Comps, PT.getCloseLocation()); } break; } } break; } case tok::kw___builtin_choose_expr: { ExprResult Cond(ParseAssignmentExpression()); if (Cond.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return Cond; } if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } ExprResult Expr1(ParseAssignmentExpression()); if (Expr1.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return Expr1; } if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } ExprResult Expr2(ParseAssignmentExpression()); if (Expr2.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return Expr2; } if (Tok.isNot(tok::r_paren)) { Diag(Tok, diag::err_expected) << tok::r_paren; return ExprError(); } Res = Actions.ActOnChooseExpr(StartLoc, Cond.get(), Expr1.get(), Expr2.get(), ConsumeParen()); break; } case tok::kw___builtin_astype: { // The first argument is an expression to be converted, followed by a comma. ExprResult Expr(ParseAssignmentExpression()); if (Expr.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } // Second argument is the type to bitcast to. TypeResult DestTy = ParseTypeName(); if (DestTy.isInvalid()) return ExprError(); // Attempt to consume the r-paren. if (Tok.isNot(tok::r_paren)) { Diag(Tok, diag::err_expected) << tok::r_paren; SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } Res = Actions.ActOnAsTypeExpr(Expr.get(), DestTy.get(), StartLoc, ConsumeParen()); break; } case tok::kw___builtin_convertvector: { // The first argument is an expression to be converted, followed by a comma. ExprResult Expr(ParseAssignmentExpression()); if (Expr.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } // Second argument is the type to bitcast to. TypeResult DestTy = ParseTypeName(); if (DestTy.isInvalid()) return ExprError(); // Attempt to consume the r-paren. if (Tok.isNot(tok::r_paren)) { Diag(Tok, diag::err_expected) << tok::r_paren; SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } Res = Actions.ActOnConvertVectorExpr(Expr.get(), DestTy.get(), StartLoc, ConsumeParen()); break; } } if (Res.isInvalid()) return ExprError(); // These can be followed by postfix-expr pieces because they are // primary-expressions. return ParsePostfixExpressionSuffix(Res.get()); } /// ParseParenExpression - This parses the unit that starts with a '(' token, /// based on what is allowed by ExprType. The actual thing parsed is returned /// in ExprType. If stopIfCastExpr is true, it will only return the parsed type, /// not the parsed cast-expression. /// /// \verbatim /// primary-expression: [C99 6.5.1] /// '(' expression ')' /// [GNU] '(' compound-statement ')' (if !ParenExprOnly) /// postfix-expression: [C99 6.5.2] /// '(' type-name ')' '{' initializer-list '}' /// '(' type-name ')' '{' initializer-list ',' '}' /// cast-expression: [C99 6.5.4] /// '(' type-name ')' cast-expression /// [ARC] bridged-cast-expression /// [ARC] bridged-cast-expression: /// (__bridge type-name) cast-expression /// (__bridge_transfer type-name) cast-expression /// (__bridge_retained type-name) cast-expression /// fold-expression: [C++1z] /// '(' cast-expression fold-operator '...' ')' /// '(' '...' fold-operator cast-expression ')' /// '(' cast-expression fold-operator '...' /// fold-operator cast-expression ')' /// \endverbatim ExprResult Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, bool isTypeCast, ParsedType &CastTy, SourceLocation &RParenLoc) { assert(Tok.is(tok::l_paren) && "Not a paren expr!"); ColonProtectionRAIIObject ColonProtection(*this, false); BalancedDelimiterTracker T(*this, tok::l_paren); if (T.consumeOpen()) return ExprError(); SourceLocation OpenLoc = T.getOpenLocation(); ExprResult Result(true); bool isAmbiguousTypeId; CastTy = nullptr; if (Tok.is(tok::code_completion)) { Actions.CodeCompleteOrdinaryName(getCurScope(), ExprType >= CompoundLiteral? Sema::PCC_ParenthesizedExpression : Sema::PCC_Expression); cutOffParsing(); return ExprError(); } // Diagnose use of bridge casts in non-arc mode. bool BridgeCast = (getLangOpts().ObjC2 && Tok.isOneOf(tok::kw___bridge, tok::kw___bridge_transfer, tok::kw___bridge_retained, tok::kw___bridge_retain)); if (BridgeCast && !getLangOpts().ObjCAutoRefCount) { if (!TryConsumeToken(tok::kw___bridge)) { StringRef BridgeCastName = Tok.getName(); SourceLocation BridgeKeywordLoc = ConsumeToken(); if (!PP.getSourceManager().isInSystemHeader(BridgeKeywordLoc)) Diag(BridgeKeywordLoc, diag::warn_arc_bridge_cast_nonarc) << BridgeCastName << FixItHint::CreateReplacement(BridgeKeywordLoc, ""); } BridgeCast = false; } // None of these cases should fall through with an invalid Result // unless they've already reported an error. if (ExprType >= CompoundStmt && Tok.is(tok::l_brace)) { Diag(Tok, diag::ext_gnu_statement_expr); if (!getCurScope()->getFnParent() && !getCurScope()->getBlockParent()) { Result = ExprError(Diag(OpenLoc, diag::err_stmtexpr_file_scope)); } else { // Find the nearest non-record decl context. Variables declared in a // statement expression behave as if they were declared in the enclosing // function, block, or other code construct. DeclContext *CodeDC = Actions.CurContext; while (CodeDC->isRecord() || isa(CodeDC)) { CodeDC = CodeDC->getParent(); assert(CodeDC && !CodeDC->isFileContext() && "statement expr not in code context"); } Sema::ContextRAII SavedContext(Actions, CodeDC, /*NewThisContext=*/false); Actions.ActOnStartStmtExpr(); StmtResult Stmt(ParseCompoundStatement(true)); ExprType = CompoundStmt; // If the substmt parsed correctly, build the AST node. if (!Stmt.isInvalid()) { Result = Actions.ActOnStmtExpr(OpenLoc, Stmt.get(), Tok.getLocation()); } else { Actions.ActOnStmtExprError(); } } } else if (ExprType >= CompoundLiteral && BridgeCast) { tok::TokenKind tokenKind = Tok.getKind(); SourceLocation BridgeKeywordLoc = ConsumeToken(); // Parse an Objective-C ARC ownership cast expression. ObjCBridgeCastKind Kind; if (tokenKind == tok::kw___bridge) Kind = OBC_Bridge; else if (tokenKind == tok::kw___bridge_transfer) Kind = OBC_BridgeTransfer; else if (tokenKind == tok::kw___bridge_retained) Kind = OBC_BridgeRetained; else { // As a hopefully temporary workaround, allow __bridge_retain as // a synonym for __bridge_retained, but only in system headers. assert(tokenKind == tok::kw___bridge_retain); Kind = OBC_BridgeRetained; if (!PP.getSourceManager().isInSystemHeader(BridgeKeywordLoc)) Diag(BridgeKeywordLoc, diag::err_arc_bridge_retain) << FixItHint::CreateReplacement(BridgeKeywordLoc, "__bridge_retained"); } TypeResult Ty = ParseTypeName(); T.consumeClose(); ColonProtection.restore(); RParenLoc = T.getCloseLocation(); ExprResult SubExpr = ParseCastExpression(/*isUnaryExpression=*/false); if (Ty.isInvalid() || SubExpr.isInvalid()) return ExprError(); return Actions.ActOnObjCBridgedCast(getCurScope(), OpenLoc, Kind, BridgeKeywordLoc, Ty.get(), RParenLoc, SubExpr.get()); } else if (ExprType >= CompoundLiteral && isTypeIdInParens(isAmbiguousTypeId)) { // Otherwise, this is a compound literal expression or cast expression. // In C++, if the type-id is ambiguous we disambiguate based on context. // If stopIfCastExpr is true the context is a typeof/sizeof/alignof // in which case we should treat it as type-id. // if stopIfCastExpr is false, we need to determine the context past the // parens, so we defer to ParseCXXAmbiguousParenExpression for that. if (isAmbiguousTypeId && !stopIfCastExpr) { ExprResult res = ParseCXXAmbiguousParenExpression(ExprType, CastTy, T, ColonProtection); RParenLoc = T.getCloseLocation(); return res; } // Parse the type declarator. DeclSpec DS(AttrFactory); ParseSpecifierQualifierList(DS); Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); ParseDeclarator(DeclaratorInfo); // If our type is followed by an identifier and either ':' or ']', then // this is probably an Objective-C message send where the leading '[' is // missing. Recover as if that were the case. if (!DeclaratorInfo.isInvalidType() && Tok.is(tok::identifier) && !InMessageExpression && getLangOpts().ObjC1 && (NextToken().is(tok::colon) || NextToken().is(tok::r_square))) { TypeResult Ty; { InMessageExpressionRAIIObject InMessage(*this, false); Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo); } Result = ParseObjCMessageExpressionBody(SourceLocation(), SourceLocation(), Ty.get(), nullptr); } else { // Match the ')'. T.consumeClose(); ColonProtection.restore(); RParenLoc = T.getCloseLocation(); if (Tok.is(tok::l_brace)) { ExprType = CompoundLiteral; TypeResult Ty; { InMessageExpressionRAIIObject InMessage(*this, false); Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo); } return ParseCompoundLiteralExpression(Ty.get(), OpenLoc, RParenLoc); } if (ExprType == CastExpr) { // We parsed '(' type-name ')' and the thing after it wasn't a '{'. if (DeclaratorInfo.isInvalidType()) return ExprError(); // Note that this doesn't parse the subsequent cast-expression, it just // returns the parsed type to the callee. if (stopIfCastExpr) { TypeResult Ty; { InMessageExpressionRAIIObject InMessage(*this, false); Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo); } CastTy = Ty.get(); return ExprResult(); } // Reject the cast of super idiom in ObjC. if (Tok.is(tok::identifier) && getLangOpts().ObjC1 && Tok.getIdentifierInfo() == Ident_super && getCurScope()->isInObjcMethodScope() && GetLookAheadToken(1).isNot(tok::period)) { Diag(Tok.getLocation(), diag::err_illegal_super_cast) << SourceRange(OpenLoc, RParenLoc); return ExprError(); } // Parse the cast-expression that follows it next. // TODO: For cast expression with CastTy. Result = ParseCastExpression(/*isUnaryExpression=*/false, /*isAddressOfOperand=*/false, /*isTypeCast=*/IsTypeCast); if (!Result.isInvalid()) { Result = Actions.ActOnCastExpr(getCurScope(), OpenLoc, DeclaratorInfo, CastTy, RParenLoc, Result.get()); } return Result; } Diag(Tok, diag::err_expected_lbrace_in_compound_literal); return ExprError(); } } else if (Tok.is(tok::ellipsis) && isFoldOperator(NextToken().getKind())) { return ParseFoldExpression(ExprResult(), T); } else if (isTypeCast) { // Parse the expression-list. InMessageExpressionRAIIObject InMessage(*this, false); ExprVector ArgExprs; CommaLocsTy CommaLocs; if (!ParseSimpleExpressionList(ArgExprs, CommaLocs)) { // FIXME: If we ever support comma expressions as operands to // fold-expressions, we'll need to allow multiple ArgExprs here. if (ArgExprs.size() == 1 && isFoldOperator(Tok.getKind()) && NextToken().is(tok::ellipsis)) - return ParseFoldExpression(Result, T); + return ParseFoldExpression(ArgExprs[0], T); ExprType = SimpleExpr; Result = Actions.ActOnParenListExpr(OpenLoc, Tok.getLocation(), ArgExprs); } } else { InMessageExpressionRAIIObject InMessage(*this, false); Result = ParseExpression(MaybeTypeCast); if (!getLangOpts().CPlusPlus && MaybeTypeCast && Result.isUsable()) { // Correct typos in non-C++ code earlier so that implicit-cast-like // expressions are parsed correctly. Result = Actions.CorrectDelayedTyposInExpr(Result); } ExprType = SimpleExpr; if (isFoldOperator(Tok.getKind()) && NextToken().is(tok::ellipsis)) return ParseFoldExpression(Result, T); // Don't build a paren expression unless we actually match a ')'. if (!Result.isInvalid() && Tok.is(tok::r_paren)) Result = Actions.ActOnParenExpr(OpenLoc, Tok.getLocation(), Result.get()); } // Match the ')'. if (Result.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } T.consumeClose(); RParenLoc = T.getCloseLocation(); return Result; } /// ParseCompoundLiteralExpression - We have parsed the parenthesized type-name /// and we are at the left brace. /// /// \verbatim /// postfix-expression: [C99 6.5.2] /// '(' type-name ')' '{' initializer-list '}' /// '(' type-name ')' '{' initializer-list ',' '}' /// \endverbatim ExprResult Parser::ParseCompoundLiteralExpression(ParsedType Ty, SourceLocation LParenLoc, SourceLocation RParenLoc) { assert(Tok.is(tok::l_brace) && "Not a compound literal!"); if (!getLangOpts().C99) // Compound literals don't exist in C90. Diag(LParenLoc, diag::ext_c99_compound_literal); ExprResult Result = ParseInitializer(); if (!Result.isInvalid() && Ty) return Actions.ActOnCompoundLiteral(LParenLoc, Ty, RParenLoc, Result.get()); return Result; } /// ParseStringLiteralExpression - This handles the various token types that /// form string literals, and also handles string concatenation [C99 5.1.1.2, /// translation phase #6]. /// /// \verbatim /// primary-expression: [C99 6.5.1] /// string-literal /// \verbatim ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { assert(isTokenStringLiteral() && "Not a string literal!"); // String concat. Note that keywords like __func__ and __FUNCTION__ are not // considered to be strings for concatenation purposes. SmallVector StringToks; do { StringToks.push_back(Tok); ConsumeStringToken(); } while (isTokenStringLiteral()); // Pass the set of string tokens, ready for concatenation, to the actions. return Actions.ActOnStringLiteral(StringToks, AllowUserDefinedLiteral ? getCurScope() : nullptr); } /// ParseGenericSelectionExpression - Parse a C11 generic-selection /// [C11 6.5.1.1]. /// /// \verbatim /// generic-selection: /// _Generic ( assignment-expression , generic-assoc-list ) /// generic-assoc-list: /// generic-association /// generic-assoc-list , generic-association /// generic-association: /// type-name : assignment-expression /// default : assignment-expression /// \endverbatim ExprResult Parser::ParseGenericSelectionExpression() { assert(Tok.is(tok::kw__Generic) && "_Generic keyword expected"); SourceLocation KeyLoc = ConsumeToken(); if (!getLangOpts().C11) Diag(KeyLoc, diag::ext_c11_generic_selection); BalancedDelimiterTracker T(*this, tok::l_paren); if (T.expectAndConsume()) return ExprError(); ExprResult ControllingExpr; { // C11 6.5.1.1p3 "The controlling expression of a generic selection is // not evaluated." EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated); ControllingExpr = Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()); if (ControllingExpr.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } } if (ExpectAndConsume(tok::comma)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } SourceLocation DefaultLoc; TypeVector Types; ExprVector Exprs; do { ParsedType Ty; if (Tok.is(tok::kw_default)) { // C11 6.5.1.1p2 "A generic selection shall have no more than one default // generic association." if (!DefaultLoc.isInvalid()) { Diag(Tok, diag::err_duplicate_default_assoc); Diag(DefaultLoc, diag::note_previous_default_assoc); SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } DefaultLoc = ConsumeToken(); Ty = nullptr; } else { ColonProtectionRAIIObject X(*this); TypeResult TR = ParseTypeName(); if (TR.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } Ty = TR.get(); } Types.push_back(Ty); if (ExpectAndConsume(tok::colon)) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } // FIXME: These expressions should be parsed in a potentially potentially // evaluated context. ExprResult ER( Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression())); if (ER.isInvalid()) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } Exprs.push_back(ER.get()); } while (TryConsumeToken(tok::comma)); T.consumeClose(); if (T.getCloseLocation().isInvalid()) return ExprError(); return Actions.ActOnGenericSelectionExpr(KeyLoc, DefaultLoc, T.getCloseLocation(), ControllingExpr.get(), Types, Exprs); } /// \brief Parse A C++1z fold-expression after the opening paren and optional /// left-hand-side expression. /// /// \verbatim /// fold-expression: /// ( cast-expression fold-operator ... ) /// ( ... fold-operator cast-expression ) /// ( cast-expression fold-operator ... fold-operator cast-expression ) ExprResult Parser::ParseFoldExpression(ExprResult LHS, BalancedDelimiterTracker &T) { if (LHS.isInvalid()) { T.skipToEnd(); return true; } tok::TokenKind Kind = tok::unknown; SourceLocation FirstOpLoc; if (LHS.isUsable()) { Kind = Tok.getKind(); assert(isFoldOperator(Kind) && "missing fold-operator"); FirstOpLoc = ConsumeToken(); } assert(Tok.is(tok::ellipsis) && "not a fold-expression"); SourceLocation EllipsisLoc = ConsumeToken(); ExprResult RHS; if (Tok.isNot(tok::r_paren)) { if (!isFoldOperator(Tok.getKind())) return Diag(Tok.getLocation(), diag::err_expected_fold_operator); if (Kind != tok::unknown && Tok.getKind() != Kind) Diag(Tok.getLocation(), diag::err_fold_operator_mismatch) << SourceRange(FirstOpLoc); Kind = Tok.getKind(); ConsumeToken(); RHS = ParseExpression(); if (RHS.isInvalid()) { T.skipToEnd(); return true; } } Diag(EllipsisLoc, getLangOpts().CPlusPlus1z ? diag::warn_cxx14_compat_fold_expression : diag::ext_fold_expression); T.consumeClose(); return Actions.ActOnCXXFoldExpr(T.getOpenLocation(), LHS.get(), Kind, EllipsisLoc, RHS.get(), T.getCloseLocation()); } /// ParseExpressionList - Used for C/C++ (argument-)expression-list. /// /// \verbatim /// argument-expression-list: /// assignment-expression /// argument-expression-list , assignment-expression /// /// [C++] expression-list: /// [C++] assignment-expression /// [C++] expression-list , assignment-expression /// /// [C++0x] expression-list: /// [C++0x] initializer-list /// /// [C++0x] initializer-list /// [C++0x] initializer-clause ...[opt] /// [C++0x] initializer-list , initializer-clause ...[opt] /// /// [C++0x] initializer-clause: /// [C++0x] assignment-expression /// [C++0x] braced-init-list /// \endverbatim bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, SmallVectorImpl &CommaLocs, std::function Completer) { bool SawError = false; while (1) { if (Tok.is(tok::code_completion)) { if (Completer) Completer(); else Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression); cutOffParsing(); return true; } ExprResult Expr; if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Expr = ParseBraceInitializer(); } else Expr = ParseAssignmentExpression(); if (Tok.is(tok::ellipsis)) Expr = Actions.ActOnPackExpansion(Expr.get(), ConsumeToken()); if (Expr.isInvalid()) { SkipUntil(tok::comma, tok::r_paren, StopBeforeMatch); SawError = true; } else { Exprs.push_back(Expr.get()); } if (Tok.isNot(tok::comma)) break; // Move to the next argument, remember where the comma was. CommaLocs.push_back(ConsumeToken()); } if (SawError) { // Ensure typos get diagnosed when errors were encountered while parsing the // expression list. for (auto &E : Exprs) { ExprResult Expr = Actions.CorrectDelayedTyposInExpr(E); if (Expr.isUsable()) E = Expr.get(); } } return SawError; } /// ParseSimpleExpressionList - A simple comma-separated list of expressions, /// used for misc language extensions. /// /// \verbatim /// simple-expression-list: /// assignment-expression /// simple-expression-list , assignment-expression /// \endverbatim bool Parser::ParseSimpleExpressionList(SmallVectorImpl &Exprs, SmallVectorImpl &CommaLocs) { while (1) { ExprResult Expr = ParseAssignmentExpression(); if (Expr.isInvalid()) return true; Exprs.push_back(Expr.get()); if (Tok.isNot(tok::comma)) return false; // Move to the next argument, remember where the comma was. CommaLocs.push_back(ConsumeToken()); } } /// ParseBlockId - Parse a block-id, which roughly looks like int (int x). /// /// \verbatim /// [clang] block-id: /// [clang] specifier-qualifier-list block-declarator /// \endverbatim void Parser::ParseBlockId(SourceLocation CaretLoc) { if (Tok.is(tok::code_completion)) { Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Type); return cutOffParsing(); } // Parse the specifier-qualifier-list piece. DeclSpec DS(AttrFactory); ParseSpecifierQualifierList(DS); // Parse the block-declarator. Declarator DeclaratorInfo(DS, Declarator::BlockLiteralContext); DeclaratorInfo.setFunctionDefinitionKind(FDK_Definition); ParseDeclarator(DeclaratorInfo); MaybeParseGNUAttributes(DeclaratorInfo); // Inform sema that we are starting a block. Actions.ActOnBlockArguments(CaretLoc, DeclaratorInfo, getCurScope()); } /// ParseBlockLiteralExpression - Parse a block literal, which roughly looks /// like ^(int x){ return x+1; } /// /// \verbatim /// block-literal: /// [clang] '^' block-args[opt] compound-statement /// [clang] '^' block-id compound-statement /// [clang] block-args: /// [clang] '(' parameter-list ')' /// \endverbatim ExprResult Parser::ParseBlockLiteralExpression() { assert(Tok.is(tok::caret) && "block literal starts with ^"); SourceLocation CaretLoc = ConsumeToken(); PrettyStackTraceLoc CrashInfo(PP.getSourceManager(), CaretLoc, "block literal parsing"); // Enter a scope to hold everything within the block. This includes the // argument decls, decls within the compound expression, etc. This also // allows determining whether a variable reference inside the block is // within or outside of the block. ParseScope BlockScope(this, Scope::BlockScope | Scope::FnScope | Scope::DeclScope); // Inform sema that we are starting a block. Actions.ActOnBlockStart(CaretLoc, getCurScope()); // Parse the return type if present. DeclSpec DS(AttrFactory); Declarator ParamInfo(DS, Declarator::BlockLiteralContext); ParamInfo.setFunctionDefinitionKind(FDK_Definition); // FIXME: Since the return type isn't actually parsed, it can't be used to // fill ParamInfo with an initial valid range, so do it manually. ParamInfo.SetSourceRange(SourceRange(Tok.getLocation(), Tok.getLocation())); // If this block has arguments, parse them. There is no ambiguity here with // the expression case, because the expression case requires a parameter list. if (Tok.is(tok::l_paren)) { ParseParenDeclarator(ParamInfo); // Parse the pieces after the identifier as if we had "int(...)". // SetIdentifier sets the source range end, but in this case we're past // that location. SourceLocation Tmp = ParamInfo.getSourceRange().getEnd(); ParamInfo.SetIdentifier(nullptr, CaretLoc); ParamInfo.SetRangeEnd(Tmp); if (ParamInfo.isInvalidType()) { // If there was an error parsing the arguments, they may have // tried to use ^(x+y) which requires an argument list. Just // skip the whole block literal. Actions.ActOnBlockError(CaretLoc, getCurScope()); return ExprError(); } MaybeParseGNUAttributes(ParamInfo); // Inform sema that we are starting a block. Actions.ActOnBlockArguments(CaretLoc, ParamInfo, getCurScope()); } else if (!Tok.is(tok::l_brace)) { ParseBlockId(CaretLoc); } else { // Otherwise, pretend we saw (void). ParsedAttributes attrs(AttrFactory); SourceLocation NoLoc; ParamInfo.AddTypeInfo(DeclaratorChunk::getFunction(/*HasProto=*/true, /*IsAmbiguous=*/false, /*RParenLoc=*/NoLoc, /*ArgInfo=*/nullptr, /*NumArgs=*/0, /*EllipsisLoc=*/NoLoc, /*RParenLoc=*/NoLoc, /*TypeQuals=*/0, /*RefQualifierIsLvalueRef=*/true, /*RefQualifierLoc=*/NoLoc, /*ConstQualifierLoc=*/NoLoc, /*VolatileQualifierLoc=*/NoLoc, /*RestrictQualifierLoc=*/NoLoc, /*MutableLoc=*/NoLoc, EST_None, /*ESpecRange=*/SourceRange(), /*Exceptions=*/nullptr, /*ExceptionRanges=*/nullptr, /*NumExceptions=*/0, /*NoexceptExpr=*/nullptr, /*ExceptionSpecTokens=*/nullptr, /*DeclsInPrototype=*/None, CaretLoc, CaretLoc, ParamInfo), attrs, CaretLoc); MaybeParseGNUAttributes(ParamInfo); // Inform sema that we are starting a block. Actions.ActOnBlockArguments(CaretLoc, ParamInfo, getCurScope()); } ExprResult Result(true); if (!Tok.is(tok::l_brace)) { // Saw something like: ^expr Diag(Tok, diag::err_expected_expression); Actions.ActOnBlockError(CaretLoc, getCurScope()); return ExprError(); } StmtResult Stmt(ParseCompoundStatementBody()); BlockScope.Exit(); if (!Stmt.isInvalid()) Result = Actions.ActOnBlockStmtExpr(CaretLoc, Stmt.get(), getCurScope()); else Actions.ActOnBlockError(CaretLoc, getCurScope()); return Result; } /// ParseObjCBoolLiteral - This handles the objective-c Boolean literals. /// /// '__objc_yes' /// '__objc_no' ExprResult Parser::ParseObjCBoolLiteral() { tok::TokenKind Kind = Tok.getKind(); return Actions.ActOnObjCBoolLiteral(ConsumeToken(), Kind); } /// Validate availability spec list, emitting diagnostics if necessary. Returns /// true if invalid. static bool CheckAvailabilitySpecList(Parser &P, ArrayRef AvailSpecs) { llvm::SmallSet Platforms; bool HasOtherPlatformSpec = false; bool Valid = true; for (const auto &Spec : AvailSpecs) { if (Spec.isOtherPlatformSpec()) { if (HasOtherPlatformSpec) { P.Diag(Spec.getBeginLoc(), diag::err_availability_query_repeated_star); Valid = false; } HasOtherPlatformSpec = true; continue; } bool Inserted = Platforms.insert(Spec.getPlatform()).second; if (!Inserted) { // Rule out multiple version specs referring to the same platform. // For example, we emit an error for: // @available(macos 10.10, macos 10.11, *) StringRef Platform = Spec.getPlatform(); P.Diag(Spec.getBeginLoc(), diag::err_availability_query_repeated_platform) << Spec.getEndLoc() << Platform; Valid = false; } } if (!HasOtherPlatformSpec) { SourceLocation InsertWildcardLoc = AvailSpecs.back().getEndLoc(); P.Diag(InsertWildcardLoc, diag::err_availability_query_wildcard_required) << FixItHint::CreateInsertion(InsertWildcardLoc, ", *"); return true; } return !Valid; } /// Parse availability query specification. /// /// availability-spec: /// '*' /// identifier version-tuple Optional Parser::ParseAvailabilitySpec() { if (Tok.is(tok::star)) { return AvailabilitySpec(ConsumeToken()); } else { // Parse the platform name. if (Tok.isNot(tok::identifier)) { Diag(Tok, diag::err_avail_query_expected_platform_name); return None; } IdentifierLoc *PlatformIdentifier = ParseIdentifierLoc(); SourceRange VersionRange; VersionTuple Version = ParseVersionTuple(VersionRange); if (Version.empty()) return None; StringRef Platform = PlatformIdentifier->Ident->getName(); if (AvailabilityAttr::getPrettyPlatformName(Platform).empty()) { Diag(PlatformIdentifier->Loc, diag::err_avail_query_unrecognized_platform_name) << Platform; return None; } return AvailabilitySpec(Version, Platform, PlatformIdentifier->Loc, VersionRange.getEnd()); } } ExprResult Parser::ParseAvailabilityCheckExpr(SourceLocation BeginLoc) { assert(Tok.is(tok::kw___builtin_available) || Tok.isObjCAtKeyword(tok::objc_available)); // Eat the available or __builtin_available. ConsumeToken(); BalancedDelimiterTracker Parens(*this, tok::l_paren); if (Parens.expectAndConsume()) return ExprError(); SmallVector AvailSpecs; bool HasError = false; while (true) { Optional Spec = ParseAvailabilitySpec(); if (!Spec) HasError = true; else AvailSpecs.push_back(*Spec); if (!TryConsumeToken(tok::comma)) break; } if (HasError) { SkipUntil(tok::r_paren, StopAtSemi); return ExprError(); } CheckAvailabilitySpecList(*this, AvailSpecs); if (Parens.consumeClose()) return ExprError(); return Actions.ActOnObjCAvailabilityCheckExpr(AvailSpecs, BeginLoc, Parens.getCloseLocation()); } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp (revision 313894) @@ -1,5124 +1,5127 @@ //===--------------------- SemaLookup.cpp - Name Lookup ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements name lookup for C, C++, Objective-C, and // Objective-C++. // //===----------------------------------------------------------------------===// #include "clang/AST/ASTContext.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclLookups.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/LangOptions.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Overload.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/TemplateDeduction.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/edit_distance.h" #include "llvm/Support/ErrorHandling.h" #include #include #include #include #include #include using namespace clang; using namespace sema; namespace { class UnqualUsingEntry { const DeclContext *Nominated; const DeclContext *CommonAncestor; public: UnqualUsingEntry(const DeclContext *Nominated, const DeclContext *CommonAncestor) : Nominated(Nominated), CommonAncestor(CommonAncestor) { } const DeclContext *getCommonAncestor() const { return CommonAncestor; } const DeclContext *getNominatedNamespace() const { return Nominated; } // Sort by the pointer value of the common ancestor. struct Comparator { bool operator()(const UnqualUsingEntry &L, const UnqualUsingEntry &R) { return L.getCommonAncestor() < R.getCommonAncestor(); } bool operator()(const UnqualUsingEntry &E, const DeclContext *DC) { return E.getCommonAncestor() < DC; } bool operator()(const DeclContext *DC, const UnqualUsingEntry &E) { return DC < E.getCommonAncestor(); } }; }; /// A collection of using directives, as used by C++ unqualified /// lookup. class UnqualUsingDirectiveSet { typedef SmallVector ListTy; ListTy list; llvm::SmallPtrSet visited; public: UnqualUsingDirectiveSet() {} void visitScopeChain(Scope *S, Scope *InnermostFileScope) { // C++ [namespace.udir]p1: // During unqualified name lookup, the names appear as if they // were declared in the nearest enclosing namespace which contains // both the using-directive and the nominated namespace. DeclContext *InnermostFileDC = InnermostFileScope->getEntity(); assert(InnermostFileDC && InnermostFileDC->isFileContext()); for (; S; S = S->getParent()) { // C++ [namespace.udir]p1: // A using-directive shall not appear in class scope, but may // appear in namespace scope or in block scope. DeclContext *Ctx = S->getEntity(); if (Ctx && Ctx->isFileContext()) { visit(Ctx, Ctx); } else if (!Ctx || Ctx->isFunctionOrMethod()) { for (auto *I : S->using_directives()) visit(I, InnermostFileDC); } } } // Visits a context and collect all of its using directives // recursively. Treats all using directives as if they were // declared in the context. // // A given context is only every visited once, so it is important // that contexts be visited from the inside out in order to get // the effective DCs right. void visit(DeclContext *DC, DeclContext *EffectiveDC) { if (!visited.insert(DC).second) return; addUsingDirectives(DC, EffectiveDC); } // Visits a using directive and collects all of its using // directives recursively. Treats all using directives as if they // were declared in the effective DC. void visit(UsingDirectiveDecl *UD, DeclContext *EffectiveDC) { DeclContext *NS = UD->getNominatedNamespace(); if (!visited.insert(NS).second) return; addUsingDirective(UD, EffectiveDC); addUsingDirectives(NS, EffectiveDC); } // Adds all the using directives in a context (and those nominated // by its using directives, transitively) as if they appeared in // the given effective context. void addUsingDirectives(DeclContext *DC, DeclContext *EffectiveDC) { SmallVector queue; while (true) { for (auto UD : DC->using_directives()) { DeclContext *NS = UD->getNominatedNamespace(); if (visited.insert(NS).second) { addUsingDirective(UD, EffectiveDC); queue.push_back(NS); } } if (queue.empty()) return; DC = queue.pop_back_val(); } } // Add a using directive as if it had been declared in the given // context. This helps implement C++ [namespace.udir]p3: // The using-directive is transitive: if a scope contains a // using-directive that nominates a second namespace that itself // contains using-directives, the effect is as if the // using-directives from the second namespace also appeared in // the first. void addUsingDirective(UsingDirectiveDecl *UD, DeclContext *EffectiveDC) { // Find the common ancestor between the effective context and // the nominated namespace. DeclContext *Common = UD->getNominatedNamespace(); while (!Common->Encloses(EffectiveDC)) Common = Common->getParent(); Common = Common->getPrimaryContext(); list.push_back(UnqualUsingEntry(UD->getNominatedNamespace(), Common)); } void done() { std::sort(list.begin(), list.end(), UnqualUsingEntry::Comparator()); } typedef ListTy::const_iterator const_iterator; const_iterator begin() const { return list.begin(); } const_iterator end() const { return list.end(); } llvm::iterator_range getNamespacesFor(DeclContext *DC) const { return llvm::make_range(std::equal_range(begin(), end(), DC->getPrimaryContext(), UnqualUsingEntry::Comparator())); } }; } // end anonymous namespace // Retrieve the set of identifier namespaces that correspond to a // specific kind of name lookup. static inline unsigned getIDNS(Sema::LookupNameKind NameKind, bool CPlusPlus, bool Redeclaration) { unsigned IDNS = 0; switch (NameKind) { case Sema::LookupObjCImplicitSelfParam: case Sema::LookupOrdinaryName: case Sema::LookupRedeclarationWithLinkage: case Sema::LookupLocalFriendName: IDNS = Decl::IDNS_Ordinary; if (CPlusPlus) { IDNS |= Decl::IDNS_Tag | Decl::IDNS_Member | Decl::IDNS_Namespace; if (Redeclaration) IDNS |= Decl::IDNS_TagFriend | Decl::IDNS_OrdinaryFriend; } if (Redeclaration) IDNS |= Decl::IDNS_LocalExtern; break; case Sema::LookupOperatorName: // Operator lookup is its own crazy thing; it is not the same // as (e.g.) looking up an operator name for redeclaration. assert(!Redeclaration && "cannot do redeclaration operator lookup"); IDNS = Decl::IDNS_NonMemberOperator; break; case Sema::LookupTagName: if (CPlusPlus) { IDNS = Decl::IDNS_Type; // When looking for a redeclaration of a tag name, we add: // 1) TagFriend to find undeclared friend decls // 2) Namespace because they can't "overload" with tag decls. // 3) Tag because it includes class templates, which can't // "overload" with tag decls. if (Redeclaration) IDNS |= Decl::IDNS_Tag | Decl::IDNS_TagFriend | Decl::IDNS_Namespace; } else { IDNS = Decl::IDNS_Tag; } break; case Sema::LookupLabel: IDNS = Decl::IDNS_Label; break; case Sema::LookupMemberName: IDNS = Decl::IDNS_Member; if (CPlusPlus) IDNS |= Decl::IDNS_Tag | Decl::IDNS_Ordinary; break; case Sema::LookupNestedNameSpecifierName: IDNS = Decl::IDNS_Type | Decl::IDNS_Namespace; break; case Sema::LookupNamespaceName: IDNS = Decl::IDNS_Namespace; break; case Sema::LookupUsingDeclName: assert(Redeclaration && "should only be used for redecl lookup"); IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Member | Decl::IDNS_Using | Decl::IDNS_TagFriend | Decl::IDNS_OrdinaryFriend | Decl::IDNS_LocalExtern; break; case Sema::LookupObjCProtocolName: IDNS = Decl::IDNS_ObjCProtocol; break; case Sema::LookupOMPReductionName: IDNS = Decl::IDNS_OMPReduction; break; case Sema::LookupAnyName: IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Member | Decl::IDNS_Using | Decl::IDNS_Namespace | Decl::IDNS_ObjCProtocol | Decl::IDNS_Type; break; } return IDNS; } void LookupResult::configure() { IDNS = getIDNS(LookupKind, getSema().getLangOpts().CPlusPlus, isForRedeclaration()); // If we're looking for one of the allocation or deallocation // operators, make sure that the implicitly-declared new and delete // operators can be found. switch (NameInfo.getName().getCXXOverloadedOperator()) { case OO_New: case OO_Delete: case OO_Array_New: case OO_Array_Delete: getSema().DeclareGlobalNewDelete(); break; default: break; } // Compiler builtins are always visible, regardless of where they end // up being declared. if (IdentifierInfo *Id = NameInfo.getName().getAsIdentifierInfo()) { if (unsigned BuiltinID = Id->getBuiltinID()) { if (!getSema().Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) AllowHidden = true; } } } bool LookupResult::sanity() const { // This function is never called by NDEBUG builds. assert(ResultKind != NotFound || Decls.size() == 0); assert(ResultKind != Found || Decls.size() == 1); assert(ResultKind != FoundOverloaded || Decls.size() > 1 || (Decls.size() == 1 && isa((*begin())->getUnderlyingDecl()))); assert(ResultKind != FoundUnresolvedValue || sanityCheckUnresolved()); assert(ResultKind != Ambiguous || Decls.size() > 1 || (Decls.size() == 1 && (Ambiguity == AmbiguousBaseSubobjects || Ambiguity == AmbiguousBaseSubobjectTypes))); assert((Paths != nullptr) == (ResultKind == Ambiguous && (Ambiguity == AmbiguousBaseSubobjectTypes || Ambiguity == AmbiguousBaseSubobjects))); return true; } // Necessary because CXXBasePaths is not complete in Sema.h void LookupResult::deletePaths(CXXBasePaths *Paths) { delete Paths; } /// Get a representative context for a declaration such that two declarations /// will have the same context if they were found within the same scope. static DeclContext *getContextForScopeMatching(Decl *D) { // For function-local declarations, use that function as the context. This // doesn't account for scopes within the function; the caller must deal with // those. DeclContext *DC = D->getLexicalDeclContext(); if (DC->isFunctionOrMethod()) return DC; // Otherwise, look at the semantic context of the declaration. The // declaration must have been found there. return D->getDeclContext()->getRedeclContext(); } /// \brief Determine whether \p D is a better lookup result than \p Existing, /// given that they declare the same entity. static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind, NamedDecl *D, NamedDecl *Existing) { // When looking up redeclarations of a using declaration, prefer a using // shadow declaration over any other declaration of the same entity. if (Kind == Sema::LookupUsingDeclName && isa(D) && !isa(Existing)) return true; auto *DUnderlying = D->getUnderlyingDecl(); auto *EUnderlying = Existing->getUnderlyingDecl(); // If they have different underlying declarations, prefer a typedef over the // original type (this happens when two type declarations denote the same // type), per a generous reading of C++ [dcl.typedef]p3 and p4. The typedef // might carry additional semantic information, such as an alignment override. // However, per C++ [dcl.typedef]p5, when looking up a tag name, prefer a tag // declaration over a typedef. if (DUnderlying->getCanonicalDecl() != EUnderlying->getCanonicalDecl()) { assert(isa(DUnderlying) && isa(EUnderlying)); bool HaveTag = isa(EUnderlying); bool WantTag = Kind == Sema::LookupTagName; return HaveTag != WantTag; } // Pick the function with more default arguments. // FIXME: In the presence of ambiguous default arguments, we should keep both, // so we can diagnose the ambiguity if the default argument is needed. // See C++ [over.match.best]p3. if (auto *DFD = dyn_cast(DUnderlying)) { auto *EFD = cast(EUnderlying); unsigned DMin = DFD->getMinRequiredArguments(); unsigned EMin = EFD->getMinRequiredArguments(); // If D has more default arguments, it is preferred. if (DMin != EMin) return DMin < EMin; // FIXME: When we track visibility for default function arguments, check // that we pick the declaration with more visible default arguments. } // Pick the template with more default template arguments. if (auto *DTD = dyn_cast(DUnderlying)) { auto *ETD = cast(EUnderlying); unsigned DMin = DTD->getTemplateParameters()->getMinRequiredArguments(); unsigned EMin = ETD->getTemplateParameters()->getMinRequiredArguments(); // If D has more default arguments, it is preferred. Note that default // arguments (and their visibility) is monotonically increasing across the // redeclaration chain, so this is a quick proxy for "is more recent". if (DMin != EMin) return DMin < EMin; // If D has more *visible* default arguments, it is preferred. Note, an // earlier default argument being visible does not imply that a later // default argument is visible, so we can't just check the first one. for (unsigned I = DMin, N = DTD->getTemplateParameters()->size(); I != N; ++I) { if (!S.hasVisibleDefaultArgument( ETD->getTemplateParameters()->getParam(I)) && S.hasVisibleDefaultArgument( DTD->getTemplateParameters()->getParam(I))) return true; } } // VarDecl can have incomplete array types, prefer the one with more complete // array type. if (VarDecl *DVD = dyn_cast(DUnderlying)) { VarDecl *EVD = cast(EUnderlying); if (EVD->getType()->isIncompleteType() && !DVD->getType()->isIncompleteType()) { // Prefer the decl with a more complete type if visible. return S.isVisible(DVD); } return false; // Avoid picking up a newer decl, just because it was newer. } // For most kinds of declaration, it doesn't really matter which one we pick. if (!isa(DUnderlying) && !isa(DUnderlying)) { // If the existing declaration is hidden, prefer the new one. Otherwise, // keep what we've got. return !S.isVisible(Existing); } // Pick the newer declaration; it might have a more precise type. for (Decl *Prev = DUnderlying->getPreviousDecl(); Prev; Prev = Prev->getPreviousDecl()) if (Prev == EUnderlying) return true; return false; } /// Determine whether \p D can hide a tag declaration. static bool canHideTag(NamedDecl *D) { // C++ [basic.scope.declarative]p4: // Given a set of declarations in a single declarative region [...] // exactly one declaration shall declare a class name or enumeration name // that is not a typedef name and the other declarations shall all refer to // the same variable, non-static data member, or enumerator, or all refer // to functions and function templates; in this case the class name or // enumeration name is hidden. // C++ [basic.scope.hiding]p2: // A class name or enumeration name can be hidden by the name of a // variable, data member, function, or enumerator declared in the same // scope. // An UnresolvedUsingValueDecl always instantiates to one of these. D = D->getUnderlyingDecl(); return isa(D) || isa(D) || isa(D) || isa(D) || isa(D) || isa(D); } /// Resolves the result kind of this lookup. void LookupResult::resolveKind() { unsigned N = Decls.size(); // Fast case: no possible ambiguity. if (N == 0) { assert(ResultKind == NotFound || ResultKind == NotFoundInCurrentInstantiation); return; } // If there's a single decl, we need to examine it to decide what // kind of lookup this is. if (N == 1) { NamedDecl *D = (*Decls.begin())->getUnderlyingDecl(); if (isa(D)) ResultKind = FoundOverloaded; else if (isa(D)) ResultKind = FoundUnresolvedValue; return; } // Don't do any extra resolution if we've already resolved as ambiguous. if (ResultKind == Ambiguous) return; llvm::SmallDenseMap Unique; llvm::SmallDenseMap UniqueTypes; bool Ambiguous = false; bool HasTag = false, HasFunction = false; bool HasFunctionTemplate = false, HasUnresolved = false; NamedDecl *HasNonFunction = nullptr; llvm::SmallVector EquivalentNonFunctions; unsigned UniqueTagIndex = 0; unsigned I = 0; while (I < N) { NamedDecl *D = Decls[I]->getUnderlyingDecl(); D = cast(D->getCanonicalDecl()); // Ignore an invalid declaration unless it's the only one left. if (D->isInvalidDecl() && !(I == 0 && N == 1)) { Decls[I] = Decls[--N]; continue; } llvm::Optional ExistingI; // Redeclarations of types via typedef can occur both within a scope // and, through using declarations and directives, across scopes. There is // no ambiguity if they all refer to the same type, so unique based on the // canonical type. if (TypeDecl *TD = dyn_cast(D)) { QualType T = getSema().Context.getTypeDeclType(TD); auto UniqueResult = UniqueTypes.insert( std::make_pair(getSema().Context.getCanonicalType(T), I)); if (!UniqueResult.second) { // The type is not unique. ExistingI = UniqueResult.first->second; } } // For non-type declarations, check for a prior lookup result naming this // canonical declaration. if (!ExistingI) { auto UniqueResult = Unique.insert(std::make_pair(D, I)); if (!UniqueResult.second) { // We've seen this entity before. ExistingI = UniqueResult.first->second; } } if (ExistingI) { // This is not a unique lookup result. Pick one of the results and // discard the other. if (isPreferredLookupResult(getSema(), getLookupKind(), Decls[I], Decls[*ExistingI])) Decls[*ExistingI] = Decls[I]; Decls[I] = Decls[--N]; continue; } // Otherwise, do some decl type analysis and then continue. if (isa(D)) { HasUnresolved = true; } else if (isa(D)) { if (HasTag) Ambiguous = true; UniqueTagIndex = I; HasTag = true; } else if (isa(D)) { HasFunction = true; HasFunctionTemplate = true; } else if (isa(D)) { HasFunction = true; } else { if (HasNonFunction) { // If we're about to create an ambiguity between two declarations that // are equivalent, but one is an internal linkage declaration from one // module and the other is an internal linkage declaration from another // module, just skip it. if (getSema().isEquivalentInternalLinkageDeclaration(HasNonFunction, D)) { EquivalentNonFunctions.push_back(D); Decls[I] = Decls[--N]; continue; } Ambiguous = true; } HasNonFunction = D; } I++; } // C++ [basic.scope.hiding]p2: // A class name or enumeration name can be hidden by the name of // an object, function, or enumerator declared in the same // scope. If a class or enumeration name and an object, function, // or enumerator are declared in the same scope (in any order) // with the same name, the class or enumeration name is hidden // wherever the object, function, or enumerator name is visible. // But it's still an error if there are distinct tag types found, // even if they're not visible. (ref?) if (N > 1 && HideTags && HasTag && !Ambiguous && (HasFunction || HasNonFunction || HasUnresolved)) { NamedDecl *OtherDecl = Decls[UniqueTagIndex ? 0 : N - 1]; if (isa(Decls[UniqueTagIndex]->getUnderlyingDecl()) && getContextForScopeMatching(Decls[UniqueTagIndex])->Equals( getContextForScopeMatching(OtherDecl)) && canHideTag(OtherDecl)) Decls[UniqueTagIndex] = Decls[--N]; else Ambiguous = true; } // FIXME: This diagnostic should really be delayed until we're done with // the lookup result, in case the ambiguity is resolved by the caller. if (!EquivalentNonFunctions.empty() && !Ambiguous) getSema().diagnoseEquivalentInternalLinkageDeclarations( getNameLoc(), HasNonFunction, EquivalentNonFunctions); Decls.set_size(N); if (HasNonFunction && (HasFunction || HasUnresolved)) Ambiguous = true; if (Ambiguous) setAmbiguous(LookupResult::AmbiguousReference); else if (HasUnresolved) ResultKind = LookupResult::FoundUnresolvedValue; else if (N > 1 || HasFunctionTemplate) ResultKind = LookupResult::FoundOverloaded; else ResultKind = LookupResult::Found; } void LookupResult::addDeclsFromBasePaths(const CXXBasePaths &P) { CXXBasePaths::const_paths_iterator I, E; for (I = P.begin(), E = P.end(); I != E; ++I) for (DeclContext::lookup_iterator DI = I->Decls.begin(), DE = I->Decls.end(); DI != DE; ++DI) addDecl(*DI); } void LookupResult::setAmbiguousBaseSubobjects(CXXBasePaths &P) { Paths = new CXXBasePaths; Paths->swap(P); addDeclsFromBasePaths(*Paths); resolveKind(); setAmbiguous(AmbiguousBaseSubobjects); } void LookupResult::setAmbiguousBaseSubobjectTypes(CXXBasePaths &P) { Paths = new CXXBasePaths; Paths->swap(P); addDeclsFromBasePaths(*Paths); resolveKind(); setAmbiguous(AmbiguousBaseSubobjectTypes); } void LookupResult::print(raw_ostream &Out) { Out << Decls.size() << " result(s)"; if (isAmbiguous()) Out << ", ambiguous"; if (Paths) Out << ", base paths present"; for (iterator I = begin(), E = end(); I != E; ++I) { Out << "\n"; (*I)->print(Out, 2); } } LLVM_DUMP_METHOD void LookupResult::dump() { llvm::errs() << "lookup results for " << getLookupName().getAsString() << ":\n"; for (NamedDecl *D : *this) D->dump(); } /// \brief Lookup a builtin function, when name lookup would otherwise /// fail. static bool LookupBuiltin(Sema &S, LookupResult &R) { Sema::LookupNameKind NameKind = R.getLookupKind(); // If we didn't find a use of this identifier, and if the identifier // corresponds to a compiler builtin, create the decl object for the builtin // now, injecting it into translation unit scope, and return it. if (NameKind == Sema::LookupOrdinaryName || NameKind == Sema::LookupRedeclarationWithLinkage) { IdentifierInfo *II = R.getLookupName().getAsIdentifierInfo(); if (II) { if (S.getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName) { if (II == S.getASTContext().getMakeIntegerSeqName()) { R.addDecl(S.getASTContext().getMakeIntegerSeqDecl()); return true; } else if (II == S.getASTContext().getTypePackElementName()) { R.addDecl(S.getASTContext().getTypePackElementDecl()); return true; } } // If this is a builtin on this (or all) targets, create the decl. if (unsigned BuiltinID = II->getBuiltinID()) { // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined // library functions like 'malloc'. Instead, we'll just error. if ((S.getLangOpts().CPlusPlus || S.getLangOpts().OpenCL) && S.Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return false; if (NamedDecl *D = S.LazilyCreateBuiltin((IdentifierInfo *)II, BuiltinID, S.TUScope, R.isForRedeclaration(), R.getNameLoc())) { R.addDecl(D); return true; } } } } return false; } /// \brief Determine whether we can declare a special member function within /// the class at this point. static bool CanDeclareSpecialMemberFunction(const CXXRecordDecl *Class) { // We need to have a definition for the class. if (!Class->getDefinition() || Class->isDependentContext()) return false; // We can't be in the middle of defining the class. return !Class->isBeingDefined(); } void Sema::ForceDeclarationOfImplicitMembers(CXXRecordDecl *Class) { if (!CanDeclareSpecialMemberFunction(Class)) return; // If the default constructor has not yet been declared, do so now. if (Class->needsImplicitDefaultConstructor()) DeclareImplicitDefaultConstructor(Class); // If the copy constructor has not yet been declared, do so now. if (Class->needsImplicitCopyConstructor()) DeclareImplicitCopyConstructor(Class); // If the copy assignment operator has not yet been declared, do so now. if (Class->needsImplicitCopyAssignment()) DeclareImplicitCopyAssignment(Class); if (getLangOpts().CPlusPlus11) { // If the move constructor has not yet been declared, do so now. if (Class->needsImplicitMoveConstructor()) DeclareImplicitMoveConstructor(Class); // If the move assignment operator has not yet been declared, do so now. if (Class->needsImplicitMoveAssignment()) DeclareImplicitMoveAssignment(Class); } // If the destructor has not yet been declared, do so now. if (Class->needsImplicitDestructor()) DeclareImplicitDestructor(Class); } /// \brief Determine whether this is the name of an implicitly-declared /// special member function. static bool isImplicitlyDeclaredMemberFunctionName(DeclarationName Name) { switch (Name.getNameKind()) { case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: return true; case DeclarationName::CXXOperatorName: return Name.getCXXOverloadedOperator() == OO_Equal; default: break; } return false; } /// \brief If there are any implicit member functions with the given name /// that need to be declared in the given declaration context, do so. static void DeclareImplicitMemberFunctionsWithName(Sema &S, DeclarationName Name, const DeclContext *DC) { if (!DC) return; switch (Name.getNameKind()) { case DeclarationName::CXXConstructorName: if (const CXXRecordDecl *Record = dyn_cast(DC)) if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) { CXXRecordDecl *Class = const_cast(Record); if (Record->needsImplicitDefaultConstructor()) S.DeclareImplicitDefaultConstructor(Class); if (Record->needsImplicitCopyConstructor()) S.DeclareImplicitCopyConstructor(Class); if (S.getLangOpts().CPlusPlus11 && Record->needsImplicitMoveConstructor()) S.DeclareImplicitMoveConstructor(Class); } break; case DeclarationName::CXXDestructorName: if (const CXXRecordDecl *Record = dyn_cast(DC)) if (Record->getDefinition() && Record->needsImplicitDestructor() && CanDeclareSpecialMemberFunction(Record)) S.DeclareImplicitDestructor(const_cast(Record)); break; case DeclarationName::CXXOperatorName: if (Name.getCXXOverloadedOperator() != OO_Equal) break; if (const CXXRecordDecl *Record = dyn_cast(DC)) { if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) { CXXRecordDecl *Class = const_cast(Record); if (Record->needsImplicitCopyAssignment()) S.DeclareImplicitCopyAssignment(Class); if (S.getLangOpts().CPlusPlus11 && Record->needsImplicitMoveAssignment()) S.DeclareImplicitMoveAssignment(Class); } } break; default: break; } } // Adds all qualifying matches for a name within a decl context to the // given lookup result. Returns true if any matches were found. static bool LookupDirect(Sema &S, LookupResult &R, const DeclContext *DC) { bool Found = false; // Lazily declare C++ special member functions. if (S.getLangOpts().CPlusPlus) DeclareImplicitMemberFunctionsWithName(S, R.getLookupName(), DC); // Perform lookup into this declaration context. DeclContext::lookup_result DR = DC->lookup(R.getLookupName()); for (DeclContext::lookup_iterator I = DR.begin(), E = DR.end(); I != E; ++I) { NamedDecl *D = *I; if ((D = R.getAcceptableDecl(D))) { R.addDecl(D); Found = true; } } if (!Found && DC->isTranslationUnit() && LookupBuiltin(S, R)) return true; if (R.getLookupName().getNameKind() != DeclarationName::CXXConversionFunctionName || R.getLookupName().getCXXNameType()->isDependentType() || !isa(DC)) return Found; // C++ [temp.mem]p6: // A specialization of a conversion function template is not found by // name lookup. Instead, any conversion function templates visible in the // context of the use are considered. [...] const CXXRecordDecl *Record = cast(DC); if (!Record->isCompleteDefinition()) return Found; for (CXXRecordDecl::conversion_iterator U = Record->conversion_begin(), UEnd = Record->conversion_end(); U != UEnd; ++U) { FunctionTemplateDecl *ConvTemplate = dyn_cast(*U); if (!ConvTemplate) continue; // When we're performing lookup for the purposes of redeclaration, just // add the conversion function template. When we deduce template // arguments for specializations, we'll end up unifying the return // type of the new declaration with the type of the function template. if (R.isForRedeclaration()) { R.addDecl(ConvTemplate); Found = true; continue; } // C++ [temp.mem]p6: // [...] For each such operator, if argument deduction succeeds // (14.9.2.3), the resulting specialization is used as if found by // name lookup. // // When referencing a conversion function for any purpose other than // a redeclaration (such that we'll be building an expression with the // result), perform template argument deduction and place the // specialization into the result set. We do this to avoid forcing all // callers to perform special deduction for conversion functions. TemplateDeductionInfo Info(R.getNameLoc()); FunctionDecl *Specialization = nullptr; const FunctionProtoType *ConvProto = ConvTemplate->getTemplatedDecl()->getType()->getAs(); assert(ConvProto && "Nonsensical conversion function template type"); // Compute the type of the function that we would expect the conversion // function to have, if it were to match the name given. // FIXME: Calling convention! FunctionProtoType::ExtProtoInfo EPI = ConvProto->getExtProtoInfo(); EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CC_C); EPI.ExceptionSpec = EST_None; QualType ExpectedType = R.getSema().Context.getFunctionType(R.getLookupName().getCXXNameType(), None, EPI); // Perform template argument deduction against the type that we would // expect the function to have. if (R.getSema().DeduceTemplateArguments(ConvTemplate, nullptr, ExpectedType, Specialization, Info) == Sema::TDK_Success) { R.addDecl(Specialization); Found = true; } } return Found; } // Performs C++ unqualified lookup into the given file context. static bool CppNamespaceLookup(Sema &S, LookupResult &R, ASTContext &Context, DeclContext *NS, UnqualUsingDirectiveSet &UDirs) { assert(NS && NS->isFileContext() && "CppNamespaceLookup() requires namespace!"); // Perform direct name lookup into the LookupCtx. bool Found = LookupDirect(S, R, NS); // Perform direct name lookup into the namespaces nominated by the // using directives whose common ancestor is this namespace. for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(NS)) if (LookupDirect(S, R, UUE.getNominatedNamespace())) Found = true; R.resolveKind(); return Found; } static bool isNamespaceOrTranslationUnitScope(Scope *S) { if (DeclContext *Ctx = S->getEntity()) return Ctx->isFileContext(); return false; } // Find the next outer declaration context from this scope. This // routine actually returns the semantic outer context, which may // differ from the lexical context (encoded directly in the Scope // stack) when we are parsing a member of a class template. In this // case, the second element of the pair will be true, to indicate that // name lookup should continue searching in this semantic context when // it leaves the current template parameter scope. static std::pair findOuterContext(Scope *S) { DeclContext *DC = S->getEntity(); DeclContext *Lexical = nullptr; for (Scope *OuterS = S->getParent(); OuterS; OuterS = OuterS->getParent()) { if (OuterS->getEntity()) { Lexical = OuterS->getEntity(); break; } } // C++ [temp.local]p8: // In the definition of a member of a class template that appears // outside of the namespace containing the class template // definition, the name of a template-parameter hides the name of // a member of this namespace. // // Example: // // namespace N { // class C { }; // // template class B { // void f(T); // }; // } // // template void N::B::f(C) { // C b; // C is the template parameter, not N::C // } // // In this example, the lexical context we return is the // TranslationUnit, while the semantic context is the namespace N. if (!Lexical || !DC || !S->getParent() || !S->getParent()->isTemplateParamScope()) return std::make_pair(Lexical, false); // Find the outermost template parameter scope. // For the example, this is the scope for the template parameters of // template. Scope *OutermostTemplateScope = S->getParent(); while (OutermostTemplateScope->getParent() && OutermostTemplateScope->getParent()->isTemplateParamScope()) OutermostTemplateScope = OutermostTemplateScope->getParent(); // Find the namespace context in which the original scope occurs. In // the example, this is namespace N. DeclContext *Semantic = DC; while (!Semantic->isFileContext()) Semantic = Semantic->getParent(); // Find the declaration context just outside of the template // parameter scope. This is the context in which the template is // being lexically declaration (a namespace context). In the // example, this is the global scope. if (Lexical->isFileContext() && !Lexical->Equals(Semantic) && Lexical->Encloses(Semantic)) return std::make_pair(Semantic, true); return std::make_pair(Lexical, false); } namespace { /// An RAII object to specify that we want to find block scope extern /// declarations. struct FindLocalExternScope { FindLocalExternScope(LookupResult &R) : R(R), OldFindLocalExtern(R.getIdentifierNamespace() & Decl::IDNS_LocalExtern) { R.setFindLocalExtern(R.getIdentifierNamespace() & Decl::IDNS_Ordinary); } void restore() { R.setFindLocalExtern(OldFindLocalExtern); } ~FindLocalExternScope() { restore(); } LookupResult &R; bool OldFindLocalExtern; }; } // end anonymous namespace bool Sema::CppLookupName(LookupResult &R, Scope *S) { assert(getLangOpts().CPlusPlus && "Can perform only C++ lookup"); DeclarationName Name = R.getLookupName(); Sema::LookupNameKind NameKind = R.getLookupKind(); // If this is the name of an implicitly-declared special member function, // go through the scope stack to implicitly declare if (isImplicitlyDeclaredMemberFunctionName(Name)) { for (Scope *PreS = S; PreS; PreS = PreS->getParent()) if (DeclContext *DC = PreS->getEntity()) DeclareImplicitMemberFunctionsWithName(*this, Name, DC); } // Implicitly declare member functions with the name we're looking for, if in // fact we are in a scope where it matters. Scope *Initial = S; IdentifierResolver::iterator I = IdResolver.begin(Name), IEnd = IdResolver.end(); // First we lookup local scope. // We don't consider using-directives, as per 7.3.4.p1 [namespace.udir] // ...During unqualified name lookup (3.4.1), the names appear as if // they were declared in the nearest enclosing namespace which contains // both the using-directive and the nominated namespace. // [Note: in this context, "contains" means "contains directly or // indirectly". // // For example: // namespace A { int i; } // void foo() { // int i; // { // using namespace A; // ++i; // finds local 'i', A::i appears at global scope // } // } // UnqualUsingDirectiveSet UDirs; bool VisitedUsingDirectives = false; bool LeftStartingScope = false; DeclContext *OutsideOfTemplateParamDC = nullptr; // When performing a scope lookup, we want to find local extern decls. FindLocalExternScope FindLocals(R); for (; S && !isNamespaceOrTranslationUnitScope(S); S = S->getParent()) { DeclContext *Ctx = S->getEntity(); bool SearchNamespaceScope = true; // Check whether the IdResolver has anything in this scope. for (; I != IEnd && S->isDeclScope(*I); ++I) { if (NamedDecl *ND = R.getAcceptableDecl(*I)) { if (NameKind == LookupRedeclarationWithLinkage && !(*I)->isTemplateParameter()) { // If it's a template parameter, we still find it, so we can diagnose // the invalid redeclaration. // Determine whether this (or a previous) declaration is // out-of-scope. if (!LeftStartingScope && !Initial->isDeclScope(*I)) LeftStartingScope = true; // If we found something outside of our starting scope that // does not have linkage, skip it. if (LeftStartingScope && !((*I)->hasLinkage())) { R.setShadowed(); continue; } } else { // We found something in this scope, we should not look at the // namespace scope SearchNamespaceScope = false; } R.addDecl(ND); } } if (!SearchNamespaceScope) { R.resolveKind(); if (S->isClassScope()) if (CXXRecordDecl *Record = dyn_cast_or_null(Ctx)) R.setNamingClass(Record); return true; } if (NameKind == LookupLocalFriendName && !S->isClassScope()) { // C++11 [class.friend]p11: // If a friend declaration appears in a local class and the name // specified is an unqualified name, a prior declaration is // looked up without considering scopes that are outside the // innermost enclosing non-class scope. return false; } if (!Ctx && S->isTemplateParamScope() && OutsideOfTemplateParamDC && S->getParent() && !S->getParent()->isTemplateParamScope()) { // We've just searched the last template parameter scope and // found nothing, so look into the contexts between the // lexical and semantic declaration contexts returned by // findOuterContext(). This implements the name lookup behavior // of C++ [temp.local]p8. Ctx = OutsideOfTemplateParamDC; OutsideOfTemplateParamDC = nullptr; } if (Ctx) { DeclContext *OuterCtx; bool SearchAfterTemplateScope; std::tie(OuterCtx, SearchAfterTemplateScope) = findOuterContext(S); if (SearchAfterTemplateScope) OutsideOfTemplateParamDC = OuterCtx; for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) { // We do not directly look into transparent contexts, since // those entities will be found in the nearest enclosing // non-transparent context. if (Ctx->isTransparentContext()) continue; // We do not look directly into function or method contexts, // since all of the local variables and parameters of the // function/method are present within the Scope. if (Ctx->isFunctionOrMethod()) { // If we have an Objective-C instance method, look for ivars // in the corresponding interface. if (ObjCMethodDecl *Method = dyn_cast(Ctx)) { if (Method->isInstanceMethod() && Name.getAsIdentifierInfo()) if (ObjCInterfaceDecl *Class = Method->getClassInterface()) { ObjCInterfaceDecl *ClassDeclared; if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable( Name.getAsIdentifierInfo(), ClassDeclared)) { if (NamedDecl *ND = R.getAcceptableDecl(Ivar)) { R.addDecl(ND); R.resolveKind(); return true; } } } } continue; } // If this is a file context, we need to perform unqualified name // lookup considering using directives. if (Ctx->isFileContext()) { // If we haven't handled using directives yet, do so now. if (!VisitedUsingDirectives) { // Add using directives from this context up to the top level. for (DeclContext *UCtx = Ctx; UCtx; UCtx = UCtx->getParent()) { if (UCtx->isTransparentContext()) continue; UDirs.visit(UCtx, UCtx); } // Find the innermost file scope, so we can add using directives // from local scopes. Scope *InnermostFileScope = S; while (InnermostFileScope && !isNamespaceOrTranslationUnitScope(InnermostFileScope)) InnermostFileScope = InnermostFileScope->getParent(); UDirs.visitScopeChain(Initial, InnermostFileScope); UDirs.done(); VisitedUsingDirectives = true; } if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs)) { R.resolveKind(); return true; } continue; } // Perform qualified name lookup into this context. // FIXME: In some cases, we know that every name that could be found by // this qualified name lookup will also be on the identifier chain. For // example, inside a class without any base classes, we never need to // perform qualified lookup because all of the members are on top of the // identifier chain. if (LookupQualifiedName(R, Ctx, /*InUnqualifiedLookup=*/true)) return true; } } } // Stop if we ran out of scopes. // FIXME: This really, really shouldn't be happening. if (!S) return false; // If we are looking for members, no need to look into global/namespace scope. if (NameKind == LookupMemberName) return false; // Collect UsingDirectiveDecls in all scopes, and recursively all // nominated namespaces by those using-directives. // // FIXME: Cache this sorted list in Scope structure, and DeclContext, so we // don't build it for each lookup! if (!VisitedUsingDirectives) { UDirs.visitScopeChain(Initial, S); UDirs.done(); } // If we're not performing redeclaration lookup, do not look for local // extern declarations outside of a function scope. if (!R.isForRedeclaration()) FindLocals.restore(); // Lookup namespace scope, and global scope. // Unqualified name lookup in C++ requires looking into scopes // that aren't strictly lexical, and therefore we walk through the // context as well as walking through the scopes. for (; S; S = S->getParent()) { // Check whether the IdResolver has anything in this scope. bool Found = false; for (; I != IEnd && S->isDeclScope(*I); ++I) { if (NamedDecl *ND = R.getAcceptableDecl(*I)) { // We found something. Look for anything else in our scope // with this same name and in an acceptable identifier // namespace, so that we can construct an overload set if we // need to. Found = true; R.addDecl(ND); } } if (Found && S->isTemplateParamScope()) { R.resolveKind(); return true; } DeclContext *Ctx = S->getEntity(); if (!Ctx && S->isTemplateParamScope() && OutsideOfTemplateParamDC && S->getParent() && !S->getParent()->isTemplateParamScope()) { // We've just searched the last template parameter scope and // found nothing, so look into the contexts between the // lexical and semantic declaration contexts returned by // findOuterContext(). This implements the name lookup behavior // of C++ [temp.local]p8. Ctx = OutsideOfTemplateParamDC; OutsideOfTemplateParamDC = nullptr; } if (Ctx) { DeclContext *OuterCtx; bool SearchAfterTemplateScope; std::tie(OuterCtx, SearchAfterTemplateScope) = findOuterContext(S); if (SearchAfterTemplateScope) OutsideOfTemplateParamDC = OuterCtx; for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) { // We do not directly look into transparent contexts, since // those entities will be found in the nearest enclosing // non-transparent context. if (Ctx->isTransparentContext()) continue; // If we have a context, and it's not a context stashed in the // template parameter scope for an out-of-line definition, also // look into that context. if (!(Found && S->isTemplateParamScope())) { assert(Ctx->isFileContext() && "We should have been looking only at file context here already."); // Look into context considering using-directives. if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs)) Found = true; } if (Found) { R.resolveKind(); return true; } if (R.isForRedeclaration() && !Ctx->isTransparentContext()) return false; } } if (R.isForRedeclaration() && Ctx && !Ctx->isTransparentContext()) return false; } return !R.empty(); } /// \brief Find the declaration that a class temploid member specialization was /// instantiated from, or the member itself if it is an explicit specialization. static Decl *getInstantiatedFrom(Decl *D, MemberSpecializationInfo *MSInfo) { return MSInfo->isExplicitSpecialization() ? D : MSInfo->getInstantiatedFrom(); } Module *Sema::getOwningModule(Decl *Entity) { // If it's imported, grab its owning module. Module *M = Entity->getImportedOwningModule(); if (M || !isa(Entity) || !cast(Entity)->isHidden()) return M; assert(!Entity->isFromASTFile() && "hidden entity from AST file has no owning module"); if (!getLangOpts().ModulesLocalVisibility) { // If we're not tracking visibility locally, the only way a declaration // can be hidden and local is if it's hidden because it's parent is (for // instance, maybe this is a lazily-declared special member of an imported // class). auto *Parent = cast(Entity->getDeclContext()); assert(Parent->isHidden() && "unexpectedly hidden decl"); return getOwningModule(Parent); } // It's local and hidden; grab or compute its owning module. M = Entity->getLocalOwningModule(); if (M) return M; if (auto *Containing = PP.getModuleContainingLocation(Entity->getLocation())) { M = Containing; } else if (Entity->isInvalidDecl() || Entity->getLocation().isInvalid()) { // Don't bother tracking visibility for invalid declarations with broken // locations. cast(Entity)->setHidden(false); } else { // We need to assign a module to an entity that exists outside of any // module, so that we can hide it from modules that we textually enter. // Invent a fake module for all such entities. if (!CachedFakeTopLevelModule) { CachedFakeTopLevelModule = PP.getHeaderSearchInfo().getModuleMap().findOrCreateModule( "", nullptr, false, false).first; auto &SrcMgr = PP.getSourceManager(); SourceLocation StartLoc = SrcMgr.getLocForStartOfFile(SrcMgr.getMainFileID()); auto &TopLevel = ModuleScopes.empty() ? VisibleModules : ModuleScopes[0].OuterVisibleModules; TopLevel.setVisible(CachedFakeTopLevelModule, StartLoc); } M = CachedFakeTopLevelModule; } if (M) Entity->setLocalOwningModule(M); return M; } void Sema::makeMergedDefinitionVisible(NamedDecl *ND, SourceLocation Loc) { if (auto *M = PP.getModuleContainingLocation(Loc)) Context.mergeDefinitionIntoModule(ND, M); else // We're not building a module; just make the definition visible. ND->setHidden(false); // If ND is a template declaration, make the template parameters // visible too. They're not (necessarily) within a mergeable DeclContext. if (auto *TD = dyn_cast(ND)) for (auto *Param : *TD->getTemplateParameters()) makeMergedDefinitionVisible(Param, Loc); } /// \brief Find the module in which the given declaration was defined. static Module *getDefiningModule(Sema &S, Decl *Entity) { if (FunctionDecl *FD = dyn_cast(Entity)) { // If this function was instantiated from a template, the defining module is // the module containing the pattern. if (FunctionDecl *Pattern = FD->getTemplateInstantiationPattern()) Entity = Pattern; } else if (CXXRecordDecl *RD = dyn_cast(Entity)) { if (CXXRecordDecl *Pattern = RD->getTemplateInstantiationPattern()) Entity = Pattern; } else if (EnumDecl *ED = dyn_cast(Entity)) { if (MemberSpecializationInfo *MSInfo = ED->getMemberSpecializationInfo()) Entity = getInstantiatedFrom(ED, MSInfo); } else if (VarDecl *VD = dyn_cast(Entity)) { // FIXME: Map from variable template specializations back to the template. if (MemberSpecializationInfo *MSInfo = VD->getMemberSpecializationInfo()) Entity = getInstantiatedFrom(VD, MSInfo); } // Walk up to the containing context. That might also have been instantiated // from a template. DeclContext *Context = Entity->getDeclContext(); if (Context->isFileContext()) return S.getOwningModule(Entity); return getDefiningModule(S, cast(Context)); } llvm::DenseSet &Sema::getLookupModules() { unsigned N = ActiveTemplateInstantiations.size(); for (unsigned I = ActiveTemplateInstantiationLookupModules.size(); I != N; ++I) { Module *M = getDefiningModule(*this, ActiveTemplateInstantiations[I].Entity); if (M && !LookupModulesCache.insert(M).second) M = nullptr; ActiveTemplateInstantiationLookupModules.push_back(M); } return LookupModulesCache; } bool Sema::hasVisibleMergedDefinition(NamedDecl *Def) { for (Module *Merged : Context.getModulesWithMergedDefinition(Def)) if (isModuleVisible(Merged)) return true; return false; } template static bool hasVisibleDefaultArgument(Sema &S, const ParmDecl *D, llvm::SmallVectorImpl *Modules) { if (!D->hasDefaultArgument()) return false; while (D) { auto &DefaultArg = D->getDefaultArgStorage(); if (!DefaultArg.isInherited() && S.isVisible(D)) return true; if (!DefaultArg.isInherited() && Modules) { auto *NonConstD = const_cast(D); Modules->push_back(S.getOwningModule(NonConstD)); const auto &Merged = S.Context.getModulesWithMergedDefinition(NonConstD); Modules->insert(Modules->end(), Merged.begin(), Merged.end()); } // If there was a previous default argument, maybe its parameter is visible. D = DefaultArg.getInheritedFrom(); } return false; } bool Sema::hasVisibleDefaultArgument(const NamedDecl *D, llvm::SmallVectorImpl *Modules) { if (auto *P = dyn_cast(D)) return ::hasVisibleDefaultArgument(*this, P, Modules); if (auto *P = dyn_cast(D)) return ::hasVisibleDefaultArgument(*this, P, Modules); return ::hasVisibleDefaultArgument(*this, cast(D), Modules); } bool Sema::hasVisibleMemberSpecialization( const NamedDecl *D, llvm::SmallVectorImpl *Modules) { assert(isa(D->getDeclContext()) && "not a member specialization"); for (auto *Redecl : D->redecls()) { // If the specialization is declared at namespace scope, then it's a member // specialization declaration. If it's lexically inside the class // definition then it was instantiated. // // FIXME: This is a hack. There should be a better way to determine this. // FIXME: What about MS-style explicit specializations declared within a // class definition? if (Redecl->getLexicalDeclContext()->isFileContext()) { auto *NonConstR = const_cast(cast(Redecl)); if (isVisible(NonConstR)) return true; if (Modules) { Modules->push_back(getOwningModule(NonConstR)); const auto &Merged = Context.getModulesWithMergedDefinition(NonConstR); Modules->insert(Modules->end(), Merged.begin(), Merged.end()); } } } return false; } /// \brief Determine whether a declaration is visible to name lookup. /// /// This routine determines whether the declaration D is visible in the current /// lookup context, taking into account the current template instantiation /// stack. During template instantiation, a declaration is visible if it is /// visible from a module containing any entity on the template instantiation /// path (by instantiating a template, you allow it to see the declarations that /// your module can see, including those later on in your module). bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) { assert(D->isHidden() && "should not call this: not in slow case"); Module *DeclModule = nullptr; if (SemaRef.getLangOpts().ModulesLocalVisibility) { DeclModule = SemaRef.getOwningModule(D); if (!DeclModule) { // getOwningModule() may have decided the declaration should not be hidden. assert(!D->isHidden() && "hidden decl not from a module"); return true; } // If the owning module is visible, and the decl is not module private, // then the decl is visible too. (Module private is ignored within the same // top-level module.) if ((!D->isFromASTFile() || !D->isModulePrivate()) && (SemaRef.isModuleVisible(DeclModule) || SemaRef.hasVisibleMergedDefinition(D))) return true; } // If this declaration is not at namespace scope nor module-private, // then it is visible if its lexical parent has a visible definition. DeclContext *DC = D->getLexicalDeclContext(); if (!D->isModulePrivate() && DC && !DC->isFileContext() && !isa(DC) && !isa(DC)) { // For a parameter, check whether our current template declaration's // lexical context is visible, not whether there's some other visible // definition of it, because parameters aren't "within" the definition. // // In C++ we need to check for a visible definition due to ODR merging, // and in C we must not because each declaration of a function gets its own // set of declarations for tags in prototype scope. if ((D->isTemplateParameter() || isa(D) || (isa(DC) && !SemaRef.getLangOpts().CPlusPlus)) ? isVisible(SemaRef, cast(DC)) : SemaRef.hasVisibleDefinition(cast(DC))) { if (SemaRef.ActiveTemplateInstantiations.empty() && // FIXME: Do something better in this case. !SemaRef.getLangOpts().ModulesLocalVisibility) { // Cache the fact that this declaration is implicitly visible because // its parent has a visible definition. D->setHidden(false); } return true; } return false; } // Find the extra places where we need to look. llvm::DenseSet &LookupModules = SemaRef.getLookupModules(); if (LookupModules.empty()) return false; if (!DeclModule) { DeclModule = SemaRef.getOwningModule(D); assert(DeclModule && "hidden decl not from a module"); } // If our lookup set contains the decl's module, it's visible. if (LookupModules.count(DeclModule)) return true; // If the declaration isn't exported, it's not visible in any other module. if (D->isModulePrivate()) return false; // Check whether DeclModule is transitively exported to an import of // the lookup set. return std::any_of(LookupModules.begin(), LookupModules.end(), [&](Module *M) { return M->isModuleVisible(DeclModule); }); } bool Sema::isVisibleSlow(const NamedDecl *D) { return LookupResult::isVisible(*this, const_cast(D)); } bool Sema::shouldLinkPossiblyHiddenDecl(LookupResult &R, const NamedDecl *New) { for (auto *D : R) { if (isVisible(D)) return true; } return New->isExternallyVisible(); } /// \brief Retrieve the visible declaration corresponding to D, if any. /// /// This routine determines whether the declaration D is visible in the current /// module, with the current imports. If not, it checks whether any /// redeclaration of D is visible, and if so, returns that declaration. /// /// \returns D, or a visible previous declaration of D, whichever is more recent /// and visible. If no declaration of D is visible, returns null. static NamedDecl *findAcceptableDecl(Sema &SemaRef, NamedDecl *D) { assert(!LookupResult::isVisible(SemaRef, D) && "not in slow case"); for (auto RD : D->redecls()) { // Don't bother with extra checks if we already know this one isn't visible. if (RD == D) continue; auto ND = cast(RD); // FIXME: This is wrong in the case where the previous declaration is not // visible in the same scope as D. This needs to be done much more // carefully. if (LookupResult::isVisible(SemaRef, ND)) return ND; } return nullptr; } bool Sema::hasVisibleDeclarationSlow(const NamedDecl *D, llvm::SmallVectorImpl *Modules) { assert(!isVisible(D) && "not in slow case"); for (auto *Redecl : D->redecls()) { auto *NonConstR = const_cast(cast(Redecl)); if (isVisible(NonConstR)) return true; if (Modules) { Modules->push_back(getOwningModule(NonConstR)); const auto &Merged = Context.getModulesWithMergedDefinition(NonConstR); Modules->insert(Modules->end(), Merged.begin(), Merged.end()); } } return false; } NamedDecl *LookupResult::getAcceptableDeclSlow(NamedDecl *D) const { if (auto *ND = dyn_cast(D)) { // Namespaces are a bit of a special case: we expect there to be a lot of // redeclarations of some namespaces, all declarations of a namespace are // essentially interchangeable, all declarations are found by name lookup // if any is, and namespaces are never looked up during template // instantiation. So we benefit from caching the check in this case, and // it is correct to do so. auto *Key = ND->getCanonicalDecl(); if (auto *Acceptable = getSema().VisibleNamespaceCache.lookup(Key)) return Acceptable; auto *Acceptable = isVisible(getSema(), Key) ? Key : findAcceptableDecl(getSema(), Key); if (Acceptable) getSema().VisibleNamespaceCache.insert(std::make_pair(Key, Acceptable)); return Acceptable; } return findAcceptableDecl(getSema(), D); } /// @brief Perform unqualified name lookup starting from a given /// scope. /// /// Unqualified name lookup (C++ [basic.lookup.unqual], C99 6.2.1) is /// used to find names within the current scope. For example, 'x' in /// @code /// int x; /// int f() { /// return x; // unqualified name look finds 'x' in the global scope /// } /// @endcode /// /// Different lookup criteria can find different names. For example, a /// particular scope can have both a struct and a function of the same /// name, and each can be found by certain lookup criteria. For more /// information about lookup criteria, see the documentation for the /// class LookupCriteria. /// /// @param S The scope from which unqualified name lookup will /// begin. If the lookup criteria permits, name lookup may also search /// in the parent scopes. /// /// @param [in,out] R Specifies the lookup to perform (e.g., the name to /// look up and the lookup kind), and is updated with the results of lookup /// including zero or more declarations and possibly additional information /// used to diagnose ambiguities. /// /// @returns \c true if lookup succeeded and false otherwise. bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation) { DeclarationName Name = R.getLookupName(); if (!Name) return false; LookupNameKind NameKind = R.getLookupKind(); if (!getLangOpts().CPlusPlus) { // Unqualified name lookup in C/Objective-C is purely lexical, so // search in the declarations attached to the name. if (NameKind == Sema::LookupRedeclarationWithLinkage) { // Find the nearest non-transparent declaration scope. while (!(S->getFlags() & Scope::DeclScope) || (S->getEntity() && S->getEntity()->isTransparentContext())) S = S->getParent(); } // When performing a scope lookup, we want to find local extern decls. FindLocalExternScope FindLocals(R); // Scan up the scope chain looking for a decl that matches this // identifier that is in the appropriate namespace. This search // should not take long, as shadowing of names is uncommon, and // deep shadowing is extremely uncommon. bool LeftStartingScope = false; for (IdentifierResolver::iterator I = IdResolver.begin(Name), IEnd = IdResolver.end(); I != IEnd; ++I) if (NamedDecl *D = R.getAcceptableDecl(*I)) { if (NameKind == LookupRedeclarationWithLinkage) { // Determine whether this (or a previous) declaration is // out-of-scope. if (!LeftStartingScope && !S->isDeclScope(*I)) LeftStartingScope = true; // If we found something outside of our starting scope that // does not have linkage, skip it. if (LeftStartingScope && !((*I)->hasLinkage())) { R.setShadowed(); continue; } } else if (NameKind == LookupObjCImplicitSelfParam && !isa(*I)) continue; R.addDecl(D); // Check whether there are any other declarations with the same name // and in the same scope. if (I != IEnd) { // Find the scope in which this declaration was declared (if it // actually exists in a Scope). while (S && !S->isDeclScope(D)) S = S->getParent(); // If the scope containing the declaration is the translation unit, // then we'll need to perform our checks based on the matching // DeclContexts rather than matching scopes. if (S && isNamespaceOrTranslationUnitScope(S)) S = nullptr; // Compute the DeclContext, if we need it. DeclContext *DC = nullptr; if (!S) DC = (*I)->getDeclContext()->getRedeclContext(); IdentifierResolver::iterator LastI = I; for (++LastI; LastI != IEnd; ++LastI) { if (S) { // Match based on scope. if (!S->isDeclScope(*LastI)) break; } else { // Match based on DeclContext. DeclContext *LastDC = (*LastI)->getDeclContext()->getRedeclContext(); if (!LastDC->Equals(DC)) break; } // If the declaration is in the right namespace and visible, add it. if (NamedDecl *LastD = R.getAcceptableDecl(*LastI)) R.addDecl(LastD); } R.resolveKind(); } return true; } } else { // Perform C++ unqualified name lookup. if (CppLookupName(R, S)) return true; } // If we didn't find a use of this identifier, and if the identifier // corresponds to a compiler builtin, create the decl object for the builtin // now, injecting it into translation unit scope, and return it. if (AllowBuiltinCreation && LookupBuiltin(*this, R)) return true; // If we didn't find a use of this identifier, the ExternalSource // may be able to handle the situation. // Note: some lookup failures are expected! // See e.g. R.isForRedeclaration(). return (ExternalSource && ExternalSource->LookupUnqualified(R, S)); } /// @brief Perform qualified name lookup in the namespaces nominated by /// using directives by the given context. /// /// C++98 [namespace.qual]p2: /// Given X::m (where X is a user-declared namespace), or given \::m /// (where X is the global namespace), let S be the set of all /// declarations of m in X and in the transitive closure of all /// namespaces nominated by using-directives in X and its used /// namespaces, except that using-directives are ignored in any /// namespace, including X, directly containing one or more /// declarations of m. No namespace is searched more than once in /// the lookup of a name. If S is the empty set, the program is /// ill-formed. Otherwise, if S has exactly one member, or if the /// context of the reference is a using-declaration /// (namespace.udecl), S is the required set of declarations of /// m. Otherwise if the use of m is not one that allows a unique /// declaration to be chosen from S, the program is ill-formed. /// /// C++98 [namespace.qual]p5: /// During the lookup of a qualified namespace member name, if the /// lookup finds more than one declaration of the member, and if one /// declaration introduces a class name or enumeration name and the /// other declarations either introduce the same object, the same /// enumerator or a set of functions, the non-type name hides the /// class or enumeration name if and only if the declarations are /// from the same namespace; otherwise (the declarations are from /// different namespaces), the program is ill-formed. static bool LookupQualifiedNameInUsingDirectives(Sema &S, LookupResult &R, DeclContext *StartDC) { assert(StartDC->isFileContext() && "start context is not a file context"); DeclContext::udir_range UsingDirectives = StartDC->using_directives(); if (UsingDirectives.begin() == UsingDirectives.end()) return false; // We have at least added all these contexts to the queue. llvm::SmallPtrSet Visited; Visited.insert(StartDC); // We have not yet looked into these namespaces, much less added // their "using-children" to the queue. SmallVector Queue; // We have already looked into the initial namespace; seed the queue // with its using-children. for (auto *I : UsingDirectives) { NamespaceDecl *ND = I->getNominatedNamespace()->getOriginalNamespace(); if (Visited.insert(ND).second) Queue.push_back(ND); } // The easiest way to implement the restriction in [namespace.qual]p5 // is to check whether any of the individual results found a tag // and, if so, to declare an ambiguity if the final result is not // a tag. bool FoundTag = false; bool FoundNonTag = false; LookupResult LocalR(LookupResult::Temporary, R); bool Found = false; while (!Queue.empty()) { NamespaceDecl *ND = Queue.pop_back_val(); // We go through some convolutions here to avoid copying results // between LookupResults. bool UseLocal = !R.empty(); LookupResult &DirectR = UseLocal ? LocalR : R; bool FoundDirect = LookupDirect(S, DirectR, ND); if (FoundDirect) { // First do any local hiding. DirectR.resolveKind(); // If the local result is a tag, remember that. if (DirectR.isSingleTagDecl()) FoundTag = true; else FoundNonTag = true; // Append the local results to the total results if necessary. if (UseLocal) { R.addAllDecls(LocalR); LocalR.clear(); } } // If we find names in this namespace, ignore its using directives. if (FoundDirect) { Found = true; continue; } for (auto I : ND->using_directives()) { NamespaceDecl *Nom = I->getNominatedNamespace(); if (Visited.insert(Nom).second) Queue.push_back(Nom); } } if (Found) { if (FoundTag && FoundNonTag) R.setAmbiguousQualifiedTagHiding(); else R.resolveKind(); } return Found; } /// \brief Callback that looks for any member of a class with the given name. static bool LookupAnyMember(const CXXBaseSpecifier *Specifier, CXXBasePath &Path, DeclarationName Name) { RecordDecl *BaseRecord = Specifier->getType()->getAs()->getDecl(); Path.Decls = BaseRecord->lookup(Name); return !Path.Decls.empty(); } /// \brief Determine whether the given set of member declarations contains only /// static members, nested types, and enumerators. template static bool HasOnlyStaticMembers(InputIterator First, InputIterator Last) { Decl *D = (*First)->getUnderlyingDecl(); if (isa(D) || isa(D) || isa(D)) return true; if (isa(D)) { // Determine whether all of the methods are static. bool AllMethodsAreStatic = true; for(; First != Last; ++First) { D = (*First)->getUnderlyingDecl(); if (!isa(D)) { assert(isa(D) && "Non-function must be a tag decl"); break; } if (!cast(D)->isStatic()) { AllMethodsAreStatic = false; break; } } if (AllMethodsAreStatic) return true; } return false; } /// \brief Perform qualified name lookup into a given context. /// /// Qualified name lookup (C++ [basic.lookup.qual]) is used to find /// names when the context of those names is explicit specified, e.g., /// "std::vector" or "x->member", or as part of unqualified name lookup. /// /// Different lookup criteria can find different names. For example, a /// particular scope can have both a struct and a function of the same /// name, and each can be found by certain lookup criteria. For more /// information about lookup criteria, see the documentation for the /// class LookupCriteria. /// /// \param R captures both the lookup criteria and any lookup results found. /// /// \param LookupCtx The context in which qualified name lookup will /// search. If the lookup criteria permits, name lookup may also search /// in the parent contexts or (for C++ classes) base classes. /// /// \param InUnqualifiedLookup true if this is qualified name lookup that /// occurs as part of unqualified name lookup. /// /// \returns true if lookup succeeded, false if it failed. bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx, bool InUnqualifiedLookup) { assert(LookupCtx && "Sema::LookupQualifiedName requires a lookup context"); if (!R.getLookupName()) return false; // Make sure that the declaration context is complete. assert((!isa(LookupCtx) || LookupCtx->isDependentContext() || cast(LookupCtx)->isCompleteDefinition() || cast(LookupCtx)->isBeingDefined()) && "Declaration context must already be complete!"); struct QualifiedLookupInScope { bool oldVal; DeclContext *Context; // Set flag in DeclContext informing debugger that we're looking for qualified name QualifiedLookupInScope(DeclContext *ctx) : Context(ctx) { oldVal = ctx->setUseQualifiedLookup(); } ~QualifiedLookupInScope() { Context->setUseQualifiedLookup(oldVal); } } QL(LookupCtx); if (LookupDirect(*this, R, LookupCtx)) { R.resolveKind(); if (isa(LookupCtx)) R.setNamingClass(cast(LookupCtx)); return true; } // Don't descend into implied contexts for redeclarations. // C++98 [namespace.qual]p6: // In a declaration for a namespace member in which the // declarator-id is a qualified-id, given that the qualified-id // for the namespace member has the form // nested-name-specifier unqualified-id // the unqualified-id shall name a member of the namespace // designated by the nested-name-specifier. // See also [class.mfct]p5 and [class.static.data]p2. if (R.isForRedeclaration()) return false; // If this is a namespace, look it up in the implied namespaces. if (LookupCtx->isFileContext()) return LookupQualifiedNameInUsingDirectives(*this, R, LookupCtx); // If this isn't a C++ class, we aren't allowed to look into base // classes, we're done. CXXRecordDecl *LookupRec = dyn_cast(LookupCtx); if (!LookupRec || !LookupRec->getDefinition()) return false; // If we're performing qualified name lookup into a dependent class, // then we are actually looking into a current instantiation. If we have any // dependent base classes, then we either have to delay lookup until // template instantiation time (at which point all bases will be available) // or we have to fail. if (!InUnqualifiedLookup && LookupRec->isDependentContext() && LookupRec->hasAnyDependentBases()) { R.setNotFoundInCurrentInstantiation(); return false; } // Perform lookup into our base classes. CXXBasePaths Paths; Paths.setOrigin(LookupRec); // Look for this member in our base classes bool (*BaseCallback)(const CXXBaseSpecifier *Specifier, CXXBasePath &Path, DeclarationName Name) = nullptr; switch (R.getLookupKind()) { case LookupObjCImplicitSelfParam: case LookupOrdinaryName: case LookupMemberName: case LookupRedeclarationWithLinkage: case LookupLocalFriendName: BaseCallback = &CXXRecordDecl::FindOrdinaryMember; break; case LookupTagName: BaseCallback = &CXXRecordDecl::FindTagMember; break; case LookupAnyName: BaseCallback = &LookupAnyMember; break; case LookupOMPReductionName: BaseCallback = &CXXRecordDecl::FindOMPReductionMember; break; case LookupUsingDeclName: // This lookup is for redeclarations only. case LookupOperatorName: case LookupNamespaceName: case LookupObjCProtocolName: case LookupLabel: // These lookups will never find a member in a C++ class (or base class). return false; case LookupNestedNameSpecifierName: BaseCallback = &CXXRecordDecl::FindNestedNameSpecifierMember; break; } DeclarationName Name = R.getLookupName(); if (!LookupRec->lookupInBases( [=](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) { return BaseCallback(Specifier, Path, Name); }, Paths)) return false; R.setNamingClass(LookupRec); // C++ [class.member.lookup]p2: // [...] If the resulting set of declarations are not all from // sub-objects of the same type, or the set has a nonstatic member // and includes members from distinct sub-objects, there is an // ambiguity and the program is ill-formed. Otherwise that set is // the result of the lookup. QualType SubobjectType; int SubobjectNumber = 0; AccessSpecifier SubobjectAccess = AS_none; for (CXXBasePaths::paths_iterator Path = Paths.begin(), PathEnd = Paths.end(); Path != PathEnd; ++Path) { const CXXBasePathElement &PathElement = Path->back(); // Pick the best (i.e. most permissive i.e. numerically lowest) access // across all paths. SubobjectAccess = std::min(SubobjectAccess, Path->Access); // Determine whether we're looking at a distinct sub-object or not. if (SubobjectType.isNull()) { // This is the first subobject we've looked at. Record its type. SubobjectType = Context.getCanonicalType(PathElement.Base->getType()); SubobjectNumber = PathElement.SubobjectNumber; continue; } if (SubobjectType != Context.getCanonicalType(PathElement.Base->getType())) { // We found members of the given name in two subobjects of // different types. If the declaration sets aren't the same, this // lookup is ambiguous. if (HasOnlyStaticMembers(Path->Decls.begin(), Path->Decls.end())) { CXXBasePaths::paths_iterator FirstPath = Paths.begin(); DeclContext::lookup_iterator FirstD = FirstPath->Decls.begin(); DeclContext::lookup_iterator CurrentD = Path->Decls.begin(); while (FirstD != FirstPath->Decls.end() && CurrentD != Path->Decls.end()) { if ((*FirstD)->getUnderlyingDecl()->getCanonicalDecl() != (*CurrentD)->getUnderlyingDecl()->getCanonicalDecl()) break; ++FirstD; ++CurrentD; } if (FirstD == FirstPath->Decls.end() && CurrentD == Path->Decls.end()) continue; } R.setAmbiguousBaseSubobjectTypes(Paths); return true; } if (SubobjectNumber != PathElement.SubobjectNumber) { // We have a different subobject of the same type. // C++ [class.member.lookup]p5: // A static member, a nested type or an enumerator defined in // a base class T can unambiguously be found even if an object // has more than one base class subobject of type T. if (HasOnlyStaticMembers(Path->Decls.begin(), Path->Decls.end())) continue; // We have found a nonstatic member name in multiple, distinct // subobjects. Name lookup is ambiguous. R.setAmbiguousBaseSubobjects(Paths); return true; } } // Lookup in a base class succeeded; return these results. for (auto *D : Paths.front().Decls) { AccessSpecifier AS = CXXRecordDecl::MergeAccess(SubobjectAccess, D->getAccess()); R.addDecl(D, AS); } R.resolveKind(); return true; } /// \brief Performs qualified name lookup or special type of lookup for /// "__super::" scope specifier. /// /// This routine is a convenience overload meant to be called from contexts /// that need to perform a qualified name lookup with an optional C++ scope /// specifier that might require special kind of lookup. /// /// \param R captures both the lookup criteria and any lookup results found. /// /// \param LookupCtx The context in which qualified name lookup will /// search. /// /// \param SS An optional C++ scope-specifier. /// /// \returns true if lookup succeeded, false if it failed. bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx, CXXScopeSpec &SS) { auto *NNS = SS.getScopeRep(); if (NNS && NNS->getKind() == NestedNameSpecifier::Super) return LookupInSuper(R, NNS->getAsRecordDecl()); else return LookupQualifiedName(R, LookupCtx); } /// @brief Performs name lookup for a name that was parsed in the /// source code, and may contain a C++ scope specifier. /// /// This routine is a convenience routine meant to be called from /// contexts that receive a name and an optional C++ scope specifier /// (e.g., "N::M::x"). It will then perform either qualified or /// unqualified name lookup (with LookupQualifiedName or LookupName, /// respectively) on the given name and return those results. It will /// perform a special type of lookup for "__super::" scope specifier. /// /// @param S The scope from which unqualified name lookup will /// begin. /// /// @param SS An optional C++ scope-specifier, e.g., "::N::M". /// /// @param EnteringContext Indicates whether we are going to enter the /// context of the scope-specifier SS (if present). /// /// @returns True if any decls were found (but possibly ambiguous) bool Sema::LookupParsedName(LookupResult &R, Scope *S, CXXScopeSpec *SS, bool AllowBuiltinCreation, bool EnteringContext) { if (SS && SS->isInvalid()) { // When the scope specifier is invalid, don't even look for // anything. return false; } if (SS && SS->isSet()) { NestedNameSpecifier *NNS = SS->getScopeRep(); if (NNS->getKind() == NestedNameSpecifier::Super) return LookupInSuper(R, NNS->getAsRecordDecl()); if (DeclContext *DC = computeDeclContext(*SS, EnteringContext)) { // We have resolved the scope specifier to a particular declaration // contex, and will perform name lookup in that context. if (!DC->isDependentContext() && RequireCompleteDeclContext(*SS, DC)) return false; R.setContextRange(SS->getRange()); return LookupQualifiedName(R, DC); } // We could not resolve the scope specified to a specific declaration // context, which means that SS refers to an unknown specialization. // Name lookup can't find anything in this case. R.setNotFoundInCurrentInstantiation(); R.setContextRange(SS->getRange()); return false; } // Perform unqualified name lookup starting in the given scope. return LookupName(R, S, AllowBuiltinCreation); } /// \brief Perform qualified name lookup into all base classes of the given /// class. /// /// \param R captures both the lookup criteria and any lookup results found. /// /// \param Class The context in which qualified name lookup will /// search. Name lookup will search in all base classes merging the results. /// /// @returns True if any decls were found (but possibly ambiguous) bool Sema::LookupInSuper(LookupResult &R, CXXRecordDecl *Class) { // The access-control rules we use here are essentially the rules for // doing a lookup in Class that just magically skipped the direct // members of Class itself. That is, the naming class is Class, and the // access includes the access of the base. for (const auto &BaseSpec : Class->bases()) { CXXRecordDecl *RD = cast( BaseSpec.getType()->castAs()->getDecl()); LookupResult Result(*this, R.getLookupNameInfo(), R.getLookupKind()); Result.setBaseObjectType(Context.getRecordType(Class)); LookupQualifiedName(Result, RD); // Copy the lookup results into the target, merging the base's access into // the path access. for (auto I = Result.begin(), E = Result.end(); I != E; ++I) { R.addDecl(I.getDecl(), CXXRecordDecl::MergeAccess(BaseSpec.getAccessSpecifier(), I.getAccess())); } Result.suppressDiagnostics(); } R.resolveKind(); R.setNamingClass(Class); return !R.empty(); } /// \brief Produce a diagnostic describing the ambiguity that resulted /// from name lookup. /// /// \param Result The result of the ambiguous lookup to be diagnosed. void Sema::DiagnoseAmbiguousLookup(LookupResult &Result) { assert(Result.isAmbiguous() && "Lookup result must be ambiguous"); DeclarationName Name = Result.getLookupName(); SourceLocation NameLoc = Result.getNameLoc(); SourceRange LookupRange = Result.getContextRange(); switch (Result.getAmbiguityKind()) { case LookupResult::AmbiguousBaseSubobjects: { CXXBasePaths *Paths = Result.getBasePaths(); QualType SubobjectType = Paths->front().back().Base->getType(); Diag(NameLoc, diag::err_ambiguous_member_multiple_subobjects) << Name << SubobjectType << getAmbiguousPathsDisplayString(*Paths) << LookupRange; DeclContext::lookup_iterator Found = Paths->front().Decls.begin(); while (isa(*Found) && cast(*Found)->isStatic()) ++Found; Diag((*Found)->getLocation(), diag::note_ambiguous_member_found); break; } case LookupResult::AmbiguousBaseSubobjectTypes: { Diag(NameLoc, diag::err_ambiguous_member_multiple_subobject_types) << Name << LookupRange; CXXBasePaths *Paths = Result.getBasePaths(); std::set DeclsPrinted; for (CXXBasePaths::paths_iterator Path = Paths->begin(), PathEnd = Paths->end(); Path != PathEnd; ++Path) { Decl *D = Path->Decls.front(); if (DeclsPrinted.insert(D).second) Diag(D->getLocation(), diag::note_ambiguous_member_found); } break; } case LookupResult::AmbiguousTagHiding: { Diag(NameLoc, diag::err_ambiguous_tag_hiding) << Name << LookupRange; llvm::SmallPtrSet TagDecls; for (auto *D : Result) if (TagDecl *TD = dyn_cast(D)) { TagDecls.insert(TD); Diag(TD->getLocation(), diag::note_hidden_tag); } for (auto *D : Result) if (!isa(D)) Diag(D->getLocation(), diag::note_hiding_object); // For recovery purposes, go ahead and implement the hiding. LookupResult::Filter F = Result.makeFilter(); while (F.hasNext()) { if (TagDecls.count(F.next())) F.erase(); } F.done(); break; } case LookupResult::AmbiguousReference: { Diag(NameLoc, diag::err_ambiguous_reference) << Name << LookupRange; for (auto *D : Result) Diag(D->getLocation(), diag::note_ambiguous_candidate) << D; break; } } } namespace { struct AssociatedLookup { AssociatedLookup(Sema &S, SourceLocation InstantiationLoc, Sema::AssociatedNamespaceSet &Namespaces, Sema::AssociatedClassSet &Classes) : S(S), Namespaces(Namespaces), Classes(Classes), InstantiationLoc(InstantiationLoc) { } Sema &S; Sema::AssociatedNamespaceSet &Namespaces; Sema::AssociatedClassSet &Classes; SourceLocation InstantiationLoc; }; } // end anonymous namespace static void addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType T); static void CollectEnclosingNamespace(Sema::AssociatedNamespaceSet &Namespaces, DeclContext *Ctx) { // Add the associated namespace for this class. // We don't use DeclContext::getEnclosingNamespaceContext() as this may // be a locally scoped record. // We skip out of inline namespaces. The innermost non-inline namespace // contains all names of all its nested inline namespaces anyway, so we can // replace the entire inline namespace tree with its root. while (Ctx->isRecord() || Ctx->isTransparentContext() || Ctx->isInlineNamespace()) Ctx = Ctx->getParent(); if (Ctx->isFileContext()) Namespaces.insert(Ctx->getPrimaryContext()); } // \brief Add the associated classes and namespaces for argument-dependent // lookup that involves a template argument (C++ [basic.lookup.koenig]p2). static void addAssociatedClassesAndNamespaces(AssociatedLookup &Result, const TemplateArgument &Arg) { // C++ [basic.lookup.koenig]p2, last bullet: // -- [...] ; switch (Arg.getKind()) { case TemplateArgument::Null: break; case TemplateArgument::Type: // [...] the namespaces and classes associated with the types of the // template arguments provided for template type parameters (excluding // template template parameters) addAssociatedClassesAndNamespaces(Result, Arg.getAsType()); break; case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: { // [...] the namespaces in which any template template arguments are // defined; and the classes in which any member templates used as // template template arguments are defined. TemplateName Template = Arg.getAsTemplateOrTemplatePattern(); if (ClassTemplateDecl *ClassTemplate = dyn_cast(Template.getAsTemplateDecl())) { DeclContext *Ctx = ClassTemplate->getDeclContext(); if (CXXRecordDecl *EnclosingClass = dyn_cast(Ctx)) Result.Classes.insert(EnclosingClass); // Add the associated namespace for this class. CollectEnclosingNamespace(Result.Namespaces, Ctx); } break; } case TemplateArgument::Declaration: case TemplateArgument::Integral: case TemplateArgument::Expression: case TemplateArgument::NullPtr: // [Note: non-type template arguments do not contribute to the set of // associated namespaces. ] break; case TemplateArgument::Pack: for (const auto &P : Arg.pack_elements()) addAssociatedClassesAndNamespaces(Result, P); break; } } // \brief Add the associated classes and namespaces for // argument-dependent lookup with an argument of class type // (C++ [basic.lookup.koenig]p2). static void addAssociatedClassesAndNamespaces(AssociatedLookup &Result, CXXRecordDecl *Class) { // Just silently ignore anything whose name is __va_list_tag. if (Class->getDeclName() == Result.S.VAListTagName) return; // C++ [basic.lookup.koenig]p2: // [...] // -- If T is a class type (including unions), its associated // classes are: the class itself; the class of which it is a // member, if any; and its direct and indirect base // classes. Its associated namespaces are the namespaces in // which its associated classes are defined. // Add the class of which it is a member, if any. DeclContext *Ctx = Class->getDeclContext(); if (CXXRecordDecl *EnclosingClass = dyn_cast(Ctx)) Result.Classes.insert(EnclosingClass); // Add the associated namespace for this class. CollectEnclosingNamespace(Result.Namespaces, Ctx); // Add the class itself. If we've already seen this class, we don't // need to visit base classes. // // FIXME: That's not correct, we may have added this class only because it // was the enclosing class of another class, and in that case we won't have // added its base classes yet. if (!Result.Classes.insert(Class)) return; // -- If T is a template-id, its associated namespaces and classes are // the namespace in which the template is defined; for member // templates, the member template's class; the namespaces and classes // associated with the types of the template arguments provided for // template type parameters (excluding template template parameters); the // namespaces in which any template template arguments are defined; and // the classes in which any member templates used as template template // arguments are defined. [Note: non-type template arguments do not // contribute to the set of associated namespaces. ] if (ClassTemplateSpecializationDecl *Spec = dyn_cast(Class)) { DeclContext *Ctx = Spec->getSpecializedTemplate()->getDeclContext(); if (CXXRecordDecl *EnclosingClass = dyn_cast(Ctx)) Result.Classes.insert(EnclosingClass); // Add the associated namespace for this class. CollectEnclosingNamespace(Result.Namespaces, Ctx); const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I) addAssociatedClassesAndNamespaces(Result, TemplateArgs[I]); } // Only recurse into base classes for complete types. if (!Result.S.isCompleteType(Result.InstantiationLoc, Result.S.Context.getRecordType(Class))) return; // Add direct and indirect base classes along with their associated // namespaces. SmallVector Bases; Bases.push_back(Class); while (!Bases.empty()) { // Pop this class off the stack. Class = Bases.pop_back_val(); // Visit the base classes. for (const auto &Base : Class->bases()) { const RecordType *BaseType = Base.getType()->getAs(); // In dependent contexts, we do ADL twice, and the first time around, // the base type might be a dependent TemplateSpecializationType, or a // TemplateTypeParmType. If that happens, simply ignore it. // FIXME: If we want to support export, we probably need to add the // namespace of the template in a TemplateSpecializationType, or even // the classes and namespaces of known non-dependent arguments. if (!BaseType) continue; CXXRecordDecl *BaseDecl = cast(BaseType->getDecl()); if (Result.Classes.insert(BaseDecl)) { // Find the associated namespace for this base class. DeclContext *BaseCtx = BaseDecl->getDeclContext(); CollectEnclosingNamespace(Result.Namespaces, BaseCtx); // Make sure we visit the bases of this base class. if (BaseDecl->bases_begin() != BaseDecl->bases_end()) Bases.push_back(BaseDecl); } } } } // \brief Add the associated classes and namespaces for // argument-dependent lookup with an argument of type T // (C++ [basic.lookup.koenig]p2). static void addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) { // C++ [basic.lookup.koenig]p2: // // For each argument type T in the function call, there is a set // of zero or more associated namespaces and a set of zero or more // associated classes to be considered. The sets of namespaces and // classes is determined entirely by the types of the function // arguments (and the namespace of any template template // argument). Typedef names and using-declarations used to specify // the types do not contribute to this set. The sets of namespaces // and classes are determined in the following way: SmallVector Queue; const Type *T = Ty->getCanonicalTypeInternal().getTypePtr(); while (true) { switch (T->getTypeClass()) { #define TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: #define ABSTRACT_TYPE(Class, Base) #include "clang/AST/TypeNodes.def" // T is canonical. We can also ignore dependent types because // we don't need to do ADL at the definition point, but if we // wanted to implement template export (or if we find some other // use for associated classes and namespaces...) this would be // wrong. break; // -- If T is a pointer to U or an array of U, its associated // namespaces and classes are those associated with U. case Type::Pointer: T = cast(T)->getPointeeType().getTypePtr(); continue; case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: T = cast(T)->getElementType().getTypePtr(); continue; // -- If T is a fundamental type, its associated sets of // namespaces and classes are both empty. case Type::Builtin: break; // -- If T is a class type (including unions), its associated // classes are: the class itself; the class of which it is a // member, if any; and its direct and indirect base // classes. Its associated namespaces are the namespaces in // which its associated classes are defined. case Type::Record: { CXXRecordDecl *Class = cast(cast(T)->getDecl()); addAssociatedClassesAndNamespaces(Result, Class); break; } // -- If T is an enumeration type, its associated namespace is // the namespace in which it is defined. If it is class // member, its associated class is the member's class; else // it has no associated class. case Type::Enum: { EnumDecl *Enum = cast(T)->getDecl(); DeclContext *Ctx = Enum->getDeclContext(); if (CXXRecordDecl *EnclosingClass = dyn_cast(Ctx)) Result.Classes.insert(EnclosingClass); // Add the associated namespace for this class. CollectEnclosingNamespace(Result.Namespaces, Ctx); break; } // -- If T is a function type, its associated namespaces and // classes are those associated with the function parameter // types and those associated with the return type. case Type::FunctionProto: { const FunctionProtoType *Proto = cast(T); for (const auto &Arg : Proto->param_types()) Queue.push_back(Arg.getTypePtr()); // fallthrough } case Type::FunctionNoProto: { const FunctionType *FnType = cast(T); T = FnType->getReturnType().getTypePtr(); continue; } // -- If T is a pointer to a member function of a class X, its // associated namespaces and classes are those associated // with the function parameter types and return type, // together with those associated with X. // // -- If T is a pointer to a data member of class X, its // associated namespaces and classes are those associated // with the member type together with those associated with // X. case Type::MemberPointer: { const MemberPointerType *MemberPtr = cast(T); // Queue up the class type into which this points. Queue.push_back(MemberPtr->getClass()); // And directly continue with the pointee type. T = MemberPtr->getPointeeType().getTypePtr(); continue; } // As an extension, treat this like a normal pointer. case Type::BlockPointer: T = cast(T)->getPointeeType().getTypePtr(); continue; // References aren't covered by the standard, but that's such an // obvious defect that we cover them anyway. case Type::LValueReference: case Type::RValueReference: T = cast(T)->getPointeeType().getTypePtr(); continue; // These are fundamental types. case Type::Vector: case Type::ExtVector: case Type::Complex: break; // Non-deduced auto types only get here for error cases. case Type::Auto: break; // If T is an Objective-C object or interface type, or a pointer to an // object or interface type, the associated namespace is the global // namespace. case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: Result.Namespaces.insert(Result.S.Context.getTranslationUnitDecl()); break; // Atomic types are just wrappers; use the associations of the // contained type. case Type::Atomic: T = cast(T)->getValueType().getTypePtr(); continue; case Type::Pipe: T = cast(T)->getElementType().getTypePtr(); continue; } if (Queue.empty()) break; T = Queue.pop_back_val(); } } /// \brief Find the associated classes and namespaces for /// argument-dependent lookup for a call with the given set of /// arguments. /// /// This routine computes the sets of associated classes and associated /// namespaces searched by argument-dependent lookup /// (C++ [basic.lookup.argdep]) for a given set of arguments. void Sema::FindAssociatedClassesAndNamespaces( SourceLocation InstantiationLoc, ArrayRef Args, AssociatedNamespaceSet &AssociatedNamespaces, AssociatedClassSet &AssociatedClasses) { AssociatedNamespaces.clear(); AssociatedClasses.clear(); AssociatedLookup Result(*this, InstantiationLoc, AssociatedNamespaces, AssociatedClasses); // C++ [basic.lookup.koenig]p2: // For each argument type T in the function call, there is a set // of zero or more associated namespaces and a set of zero or more // associated classes to be considered. The sets of namespaces and // classes is determined entirely by the types of the function // arguments (and the namespace of any template template // argument). for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) { Expr *Arg = Args[ArgIdx]; if (Arg->getType() != Context.OverloadTy) { addAssociatedClassesAndNamespaces(Result, Arg->getType()); continue; } // [...] In addition, if the argument is the name or address of a // set of overloaded functions and/or function templates, its // associated classes and namespaces are the union of those // associated with each of the members of the set: the namespace // in which the function or function template is defined and the // classes and namespaces associated with its (non-dependent) // parameter types and return type. Arg = Arg->IgnoreParens(); if (UnaryOperator *unaryOp = dyn_cast(Arg)) if (unaryOp->getOpcode() == UO_AddrOf) Arg = unaryOp->getSubExpr(); UnresolvedLookupExpr *ULE = dyn_cast(Arg); if (!ULE) continue; for (const auto *D : ULE->decls()) { // Look through any using declarations to find the underlying function. const FunctionDecl *FDecl = D->getUnderlyingDecl()->getAsFunction(); // Add the classes and namespaces associated with the parameter // types and return type of this function. addAssociatedClassesAndNamespaces(Result, FDecl->getType()); } } } NamedDecl *Sema::LookupSingleName(Scope *S, DeclarationName Name, SourceLocation Loc, LookupNameKind NameKind, RedeclarationKind Redecl) { LookupResult R(*this, Name, Loc, NameKind, Redecl); LookupName(R, S); return R.getAsSingle(); } /// \brief Find the protocol with the given name, if any. ObjCProtocolDecl *Sema::LookupProtocol(IdentifierInfo *II, SourceLocation IdLoc, RedeclarationKind Redecl) { Decl *D = LookupSingleName(TUScope, II, IdLoc, LookupObjCProtocolName, Redecl); return cast_or_null(D); } void Sema::LookupOverloadedOperatorName(OverloadedOperatorKind Op, Scope *S, QualType T1, QualType T2, UnresolvedSetImpl &Functions) { // C++ [over.match.oper]p3: // -- The set of non-member candidates is the result of the // unqualified lookup of operator@ in the context of the // expression according to the usual rules for name lookup in // unqualified function calls (3.4.2) except that all member // functions are ignored. DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(Op); LookupResult Operators(*this, OpName, SourceLocation(), LookupOperatorName); LookupName(Operators, S); assert(!Operators.isAmbiguous() && "Operator lookup cannot be ambiguous"); Functions.append(Operators.begin(), Operators.end()); } Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, CXXSpecialMember SM, bool ConstArg, bool VolatileArg, bool RValueThis, bool ConstThis, bool VolatileThis) { assert(CanDeclareSpecialMemberFunction(RD) && "doing special member lookup into record that isn't fully complete"); RD = RD->getDefinition(); if (RValueThis || ConstThis || VolatileThis) assert((SM == CXXCopyAssignment || SM == CXXMoveAssignment) && "constructors and destructors always have unqualified lvalue this"); if (ConstArg || VolatileArg) assert((SM != CXXDefaultConstructor && SM != CXXDestructor) && "parameter-less special members can't have qualified arguments"); + // FIXME: Get the caller to pass in a location for the lookup. + SourceLocation LookupLoc = RD->getLocation(); + llvm::FoldingSetNodeID ID; ID.AddPointer(RD); ID.AddInteger(SM); ID.AddInteger(ConstArg); ID.AddInteger(VolatileArg); ID.AddInteger(RValueThis); ID.AddInteger(ConstThis); ID.AddInteger(VolatileThis); void *InsertPoint; SpecialMemberOverloadResult *Result = SpecialMemberCache.FindNodeOrInsertPos(ID, InsertPoint); // This was already cached if (Result) return Result; Result = BumpAlloc.Allocate(); Result = new (Result) SpecialMemberOverloadResult(ID); SpecialMemberCache.InsertNode(Result, InsertPoint); if (SM == CXXDestructor) { if (RD->needsImplicitDestructor()) DeclareImplicitDestructor(RD); CXXDestructorDecl *DD = RD->getDestructor(); assert(DD && "record without a destructor"); Result->setMethod(DD); Result->setKind(DD->isDeleted() ? SpecialMemberOverloadResult::NoMemberOrDeleted : SpecialMemberOverloadResult::Success); return Result; } // Prepare for overload resolution. Here we construct a synthetic argument // if necessary and make sure that implicit functions are declared. CanQualType CanTy = Context.getCanonicalType(Context.getTagDeclType(RD)); DeclarationName Name; Expr *Arg = nullptr; unsigned NumArgs; QualType ArgType = CanTy; ExprValueKind VK = VK_LValue; if (SM == CXXDefaultConstructor) { Name = Context.DeclarationNames.getCXXConstructorName(CanTy); NumArgs = 0; if (RD->needsImplicitDefaultConstructor()) DeclareImplicitDefaultConstructor(RD); } else { if (SM == CXXCopyConstructor || SM == CXXMoveConstructor) { Name = Context.DeclarationNames.getCXXConstructorName(CanTy); if (RD->needsImplicitCopyConstructor()) DeclareImplicitCopyConstructor(RD); if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveConstructor()) DeclareImplicitMoveConstructor(RD); } else { Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal); if (RD->needsImplicitCopyAssignment()) DeclareImplicitCopyAssignment(RD); if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveAssignment()) DeclareImplicitMoveAssignment(RD); } if (ConstArg) ArgType.addConst(); if (VolatileArg) ArgType.addVolatile(); // This isn't /really/ specified by the standard, but it's implied // we should be working from an RValue in the case of move to ensure // that we prefer to bind to rvalue references, and an LValue in the // case of copy to ensure we don't bind to rvalue references. // Possibly an XValue is actually correct in the case of move, but // there is no semantic difference for class types in this restricted // case. if (SM == CXXCopyConstructor || SM == CXXCopyAssignment) VK = VK_LValue; else VK = VK_RValue; } - OpaqueValueExpr FakeArg(SourceLocation(), ArgType, VK); + OpaqueValueExpr FakeArg(LookupLoc, ArgType, VK); if (SM != CXXDefaultConstructor) { NumArgs = 1; Arg = &FakeArg; } // Create the object argument QualType ThisTy = CanTy; if (ConstThis) ThisTy.addConst(); if (VolatileThis) ThisTy.addVolatile(); Expr::Classification Classification = - OpaqueValueExpr(SourceLocation(), ThisTy, + OpaqueValueExpr(LookupLoc, ThisTy, RValueThis ? VK_RValue : VK_LValue).Classify(Context); // Now we perform lookup on the name we computed earlier and do overload // resolution. Lookup is only performed directly into the class since there // will always be a (possibly implicit) declaration to shadow any others. - OverloadCandidateSet OCS(RD->getLocation(), OverloadCandidateSet::CSK_Normal); + OverloadCandidateSet OCS(LookupLoc, OverloadCandidateSet::CSK_Normal); DeclContext::lookup_result R = RD->lookup(Name); if (R.empty()) { // We might have no default constructor because we have a lambda's closure // type, rather than because there's some other declared constructor. // Every class has a copy/move constructor, copy/move assignment, and // destructor. assert(SM == CXXDefaultConstructor && "lookup for a constructor or assignment operator was empty"); Result->setMethod(nullptr); Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted); return Result; } // Copy the candidates as our processing of them may load new declarations // from an external source and invalidate lookup_result. SmallVector Candidates(R.begin(), R.end()); for (NamedDecl *CandDecl : Candidates) { if (CandDecl->isInvalidDecl()) continue; DeclAccessPair Cand = DeclAccessPair::make(CandDecl, AS_public); auto CtorInfo = getConstructorInfo(Cand); if (CXXMethodDecl *M = dyn_cast(Cand->getUnderlyingDecl())) { if (SM == CXXCopyAssignment || SM == CXXMoveAssignment) AddMethodCandidate(M, Cand, RD, ThisTy, Classification, llvm::makeArrayRef(&Arg, NumArgs), OCS, true); else if (CtorInfo) AddOverloadCandidate(CtorInfo.Constructor, CtorInfo.FoundDecl, llvm::makeArrayRef(&Arg, NumArgs), OCS, true); else AddOverloadCandidate(M, Cand, llvm::makeArrayRef(&Arg, NumArgs), OCS, true); } else if (FunctionTemplateDecl *Tmpl = dyn_cast(Cand->getUnderlyingDecl())) { if (SM == CXXCopyAssignment || SM == CXXMoveAssignment) AddMethodTemplateCandidate( Tmpl, Cand, RD, nullptr, ThisTy, Classification, llvm::makeArrayRef(&Arg, NumArgs), OCS, true); else if (CtorInfo) AddTemplateOverloadCandidate( CtorInfo.ConstructorTmpl, CtorInfo.FoundDecl, nullptr, llvm::makeArrayRef(&Arg, NumArgs), OCS, true); else AddTemplateOverloadCandidate( Tmpl, Cand, nullptr, llvm::makeArrayRef(&Arg, NumArgs), OCS, true); } else { assert(isa(Cand.getDecl()) && "illegal Kind of operator = Decl"); } } OverloadCandidateSet::iterator Best; - switch (OCS.BestViableFunction(*this, SourceLocation(), Best)) { + switch (OCS.BestViableFunction(*this, LookupLoc, Best)) { case OR_Success: Result->setMethod(cast(Best->Function)); Result->setKind(SpecialMemberOverloadResult::Success); break; case OR_Deleted: Result->setMethod(cast(Best->Function)); Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted); break; case OR_Ambiguous: Result->setMethod(nullptr); Result->setKind(SpecialMemberOverloadResult::Ambiguous); break; case OR_No_Viable_Function: Result->setMethod(nullptr); Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted); break; } return Result; } /// \brief Look up the default constructor for the given class. CXXConstructorDecl *Sema::LookupDefaultConstructor(CXXRecordDecl *Class) { SpecialMemberOverloadResult *Result = LookupSpecialMember(Class, CXXDefaultConstructor, false, false, false, false, false); return cast_or_null(Result->getMethod()); } /// \brief Look up the copying constructor for the given class. CXXConstructorDecl *Sema::LookupCopyingConstructor(CXXRecordDecl *Class, unsigned Quals) { assert(!(Quals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy ctor arg"); SpecialMemberOverloadResult *Result = LookupSpecialMember(Class, CXXCopyConstructor, Quals & Qualifiers::Const, Quals & Qualifiers::Volatile, false, false, false); return cast_or_null(Result->getMethod()); } /// \brief Look up the moving constructor for the given class. CXXConstructorDecl *Sema::LookupMovingConstructor(CXXRecordDecl *Class, unsigned Quals) { SpecialMemberOverloadResult *Result = LookupSpecialMember(Class, CXXMoveConstructor, Quals & Qualifiers::Const, Quals & Qualifiers::Volatile, false, false, false); return cast_or_null(Result->getMethod()); } /// \brief Look up the constructors for the given class. DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) { // If the implicit constructors have not yet been declared, do so now. if (CanDeclareSpecialMemberFunction(Class)) { if (Class->needsImplicitDefaultConstructor()) DeclareImplicitDefaultConstructor(Class); if (Class->needsImplicitCopyConstructor()) DeclareImplicitCopyConstructor(Class); if (getLangOpts().CPlusPlus11 && Class->needsImplicitMoveConstructor()) DeclareImplicitMoveConstructor(Class); } CanQualType T = Context.getCanonicalType(Context.getTypeDeclType(Class)); DeclarationName Name = Context.DeclarationNames.getCXXConstructorName(T); return Class->lookup(Name); } /// \brief Look up the copying assignment operator for the given class. CXXMethodDecl *Sema::LookupCopyingAssignment(CXXRecordDecl *Class, unsigned Quals, bool RValueThis, unsigned ThisQuals) { assert(!(Quals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy assignment arg"); assert(!(ThisQuals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy assignment this"); SpecialMemberOverloadResult *Result = LookupSpecialMember(Class, CXXCopyAssignment, Quals & Qualifiers::Const, Quals & Qualifiers::Volatile, RValueThis, ThisQuals & Qualifiers::Const, ThisQuals & Qualifiers::Volatile); return Result->getMethod(); } /// \brief Look up the moving assignment operator for the given class. CXXMethodDecl *Sema::LookupMovingAssignment(CXXRecordDecl *Class, unsigned Quals, bool RValueThis, unsigned ThisQuals) { assert(!(ThisQuals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy assignment this"); SpecialMemberOverloadResult *Result = LookupSpecialMember(Class, CXXMoveAssignment, Quals & Qualifiers::Const, Quals & Qualifiers::Volatile, RValueThis, ThisQuals & Qualifiers::Const, ThisQuals & Qualifiers::Volatile); return Result->getMethod(); } /// \brief Look for the destructor of the given class. /// /// During semantic analysis, this routine should be used in lieu of /// CXXRecordDecl::getDestructor(). /// /// \returns The destructor for this class. CXXDestructorDecl *Sema::LookupDestructor(CXXRecordDecl *Class) { return cast(LookupSpecialMember(Class, CXXDestructor, false, false, false, false, false)->getMethod()); } /// LookupLiteralOperator - Determine which literal operator should be used for /// a user-defined literal, per C++11 [lex.ext]. /// /// Normal overload resolution is not used to select which literal operator to /// call for a user-defined literal. Look up the provided literal operator name, /// and filter the results to the appropriate set for the given argument types. Sema::LiteralOperatorLookupResult Sema::LookupLiteralOperator(Scope *S, LookupResult &R, ArrayRef ArgTys, bool AllowRaw, bool AllowTemplate, bool AllowStringTemplate) { LookupName(R, S); assert(R.getResultKind() != LookupResult::Ambiguous && "literal operator lookup can't be ambiguous"); // Filter the lookup results appropriately. LookupResult::Filter F = R.makeFilter(); bool FoundRaw = false; bool FoundTemplate = false; bool FoundStringTemplate = false; bool FoundExactMatch = false; while (F.hasNext()) { Decl *D = F.next(); if (UsingShadowDecl *USD = dyn_cast(D)) D = USD->getTargetDecl(); // If the declaration we found is invalid, skip it. if (D->isInvalidDecl()) { F.erase(); continue; } bool IsRaw = false; bool IsTemplate = false; bool IsStringTemplate = false; bool IsExactMatch = false; if (FunctionDecl *FD = dyn_cast(D)) { if (FD->getNumParams() == 1 && FD->getParamDecl(0)->getType()->getAs()) IsRaw = true; else if (FD->getNumParams() == ArgTys.size()) { IsExactMatch = true; for (unsigned ArgIdx = 0; ArgIdx != ArgTys.size(); ++ArgIdx) { QualType ParamTy = FD->getParamDecl(ArgIdx)->getType(); if (!Context.hasSameUnqualifiedType(ArgTys[ArgIdx], ParamTy)) { IsExactMatch = false; break; } } } } if (FunctionTemplateDecl *FD = dyn_cast(D)) { TemplateParameterList *Params = FD->getTemplateParameters(); if (Params->size() == 1) IsTemplate = true; else IsStringTemplate = true; } if (IsExactMatch) { FoundExactMatch = true; AllowRaw = false; AllowTemplate = false; AllowStringTemplate = false; if (FoundRaw || FoundTemplate || FoundStringTemplate) { // Go through again and remove the raw and template decls we've // already found. F.restart(); FoundRaw = FoundTemplate = FoundStringTemplate = false; } } else if (AllowRaw && IsRaw) { FoundRaw = true; } else if (AllowTemplate && IsTemplate) { FoundTemplate = true; } else if (AllowStringTemplate && IsStringTemplate) { FoundStringTemplate = true; } else { F.erase(); } } F.done(); // C++11 [lex.ext]p3, p4: If S contains a literal operator with a matching // parameter type, that is used in preference to a raw literal operator // or literal operator template. if (FoundExactMatch) return LOLR_Cooked; // C++11 [lex.ext]p3, p4: S shall contain a raw literal operator or a literal // operator template, but not both. if (FoundRaw && FoundTemplate) { Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName(); for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) NoteOverloadCandidate(*I, (*I)->getUnderlyingDecl()->getAsFunction()); return LOLR_Error; } if (FoundRaw) return LOLR_Raw; if (FoundTemplate) return LOLR_Template; if (FoundStringTemplate) return LOLR_StringTemplate; // Didn't find anything we could use. Diag(R.getNameLoc(), diag::err_ovl_no_viable_literal_operator) << R.getLookupName() << (int)ArgTys.size() << ArgTys[0] << (ArgTys.size() == 2 ? ArgTys[1] : QualType()) << AllowRaw << (AllowTemplate || AllowStringTemplate); return LOLR_Error; } void ADLResult::insert(NamedDecl *New) { NamedDecl *&Old = Decls[cast(New->getCanonicalDecl())]; // If we haven't yet seen a decl for this key, or the last decl // was exactly this one, we're done. if (Old == nullptr || Old == New) { Old = New; return; } // Otherwise, decide which is a more recent redeclaration. FunctionDecl *OldFD = Old->getAsFunction(); FunctionDecl *NewFD = New->getAsFunction(); FunctionDecl *Cursor = NewFD; while (true) { Cursor = Cursor->getPreviousDecl(); // If we got to the end without finding OldFD, OldFD is the newer // declaration; leave things as they are. if (!Cursor) return; // If we do find OldFD, then NewFD is newer. if (Cursor == OldFD) break; // Otherwise, keep looking. } Old = New; } void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, ArrayRef Args, ADLResult &Result) { // Find all of the associated namespaces and classes based on the // arguments we have. AssociatedNamespaceSet AssociatedNamespaces; AssociatedClassSet AssociatedClasses; FindAssociatedClassesAndNamespaces(Loc, Args, AssociatedNamespaces, AssociatedClasses); // C++ [basic.lookup.argdep]p3: // Let X be the lookup set produced by unqualified lookup (3.4.1) // and let Y be the lookup set produced by argument dependent // lookup (defined as follows). If X contains [...] then Y is // empty. Otherwise Y is the set of declarations found in the // namespaces associated with the argument types as described // below. The set of declarations found by the lookup of the name // is the union of X and Y. // // Here, we compute Y and add its members to the overloaded // candidate set. for (auto *NS : AssociatedNamespaces) { // When considering an associated namespace, the lookup is the // same as the lookup performed when the associated namespace is // used as a qualifier (3.4.3.2) except that: // // -- Any using-directives in the associated namespace are // ignored. // // -- Any namespace-scope friend functions declared in // associated classes are visible within their respective // namespaces even if they are not visible during an ordinary // lookup (11.4). DeclContext::lookup_result R = NS->lookup(Name); for (auto *D : R) { // If the only declaration here is an ordinary friend, consider // it only if it was declared in an associated classes. if ((D->getIdentifierNamespace() & Decl::IDNS_Ordinary) == 0) { // If it's neither ordinarily visible nor a friend, we can't find it. if ((D->getIdentifierNamespace() & Decl::IDNS_OrdinaryFriend) == 0) continue; bool DeclaredInAssociatedClass = false; for (Decl *DI = D; DI; DI = DI->getPreviousDecl()) { DeclContext *LexDC = DI->getLexicalDeclContext(); if (isa(LexDC) && AssociatedClasses.count(cast(LexDC)) && isVisible(cast(DI))) { DeclaredInAssociatedClass = true; break; } } if (!DeclaredInAssociatedClass) continue; } if (isa(D)) D = cast(D)->getTargetDecl(); if (!isa(D) && !isa(D)) continue; if (!isVisible(D) && !(D = findAcceptableDecl(*this, D))) continue; Result.insert(D); } } } //---------------------------------------------------------------------------- // Search for all visible declarations. //---------------------------------------------------------------------------- VisibleDeclConsumer::~VisibleDeclConsumer() { } bool VisibleDeclConsumer::includeHiddenDecls() const { return false; } namespace { class ShadowContextRAII; class VisibleDeclsRecord { public: /// \brief An entry in the shadow map, which is optimized to store a /// single declaration (the common case) but can also store a list /// of declarations. typedef llvm::TinyPtrVector ShadowMapEntry; private: /// \brief A mapping from declaration names to the declarations that have /// this name within a particular scope. typedef llvm::DenseMap ShadowMap; /// \brief A list of shadow maps, which is used to model name hiding. std::list ShadowMaps; /// \brief The declaration contexts we have already visited. llvm::SmallPtrSet VisitedContexts; friend class ShadowContextRAII; public: /// \brief Determine whether we have already visited this context /// (and, if not, note that we are going to visit that context now). bool visitedContext(DeclContext *Ctx) { return !VisitedContexts.insert(Ctx).second; } bool alreadyVisitedContext(DeclContext *Ctx) { return VisitedContexts.count(Ctx); } /// \brief Determine whether the given declaration is hidden in the /// current scope. /// /// \returns the declaration that hides the given declaration, or /// NULL if no such declaration exists. NamedDecl *checkHidden(NamedDecl *ND); /// \brief Add a declaration to the current shadow map. void add(NamedDecl *ND) { ShadowMaps.back()[ND->getDeclName()].push_back(ND); } }; /// \brief RAII object that records when we've entered a shadow context. class ShadowContextRAII { VisibleDeclsRecord &Visible; typedef VisibleDeclsRecord::ShadowMap ShadowMap; public: ShadowContextRAII(VisibleDeclsRecord &Visible) : Visible(Visible) { Visible.ShadowMaps.emplace_back(); } ~ShadowContextRAII() { Visible.ShadowMaps.pop_back(); } }; } // end anonymous namespace NamedDecl *VisibleDeclsRecord::checkHidden(NamedDecl *ND) { unsigned IDNS = ND->getIdentifierNamespace(); std::list::reverse_iterator SM = ShadowMaps.rbegin(); for (std::list::reverse_iterator SMEnd = ShadowMaps.rend(); SM != SMEnd; ++SM) { ShadowMap::iterator Pos = SM->find(ND->getDeclName()); if (Pos == SM->end()) continue; for (auto *D : Pos->second) { // A tag declaration does not hide a non-tag declaration. if (D->hasTagIdentifierNamespace() && (IDNS & (Decl::IDNS_Member | Decl::IDNS_Ordinary | Decl::IDNS_ObjCProtocol))) continue; // Protocols are in distinct namespaces from everything else. if (((D->getIdentifierNamespace() & Decl::IDNS_ObjCProtocol) || (IDNS & Decl::IDNS_ObjCProtocol)) && D->getIdentifierNamespace() != IDNS) continue; // Functions and function templates in the same scope overload // rather than hide. FIXME: Look for hiding based on function // signatures! if (D->getUnderlyingDecl()->isFunctionOrFunctionTemplate() && ND->getUnderlyingDecl()->isFunctionOrFunctionTemplate() && SM == ShadowMaps.rbegin()) continue; // We've found a declaration that hides this one. return D; } } return nullptr; } static void LookupVisibleDecls(DeclContext *Ctx, LookupResult &Result, bool QualifiedNameLookup, bool InBaseClass, VisibleDeclConsumer &Consumer, VisibleDeclsRecord &Visited) { if (!Ctx) return; // Make sure we don't visit the same context twice. if (Visited.visitedContext(Ctx->getPrimaryContext())) return; // Outside C++, lookup results for the TU live on identifiers. if (isa(Ctx) && !Result.getSema().getLangOpts().CPlusPlus) { auto &S = Result.getSema(); auto &Idents = S.Context.Idents; // Ensure all external identifiers are in the identifier table. if (IdentifierInfoLookup *External = Idents.getExternalIdentifierLookup()) { std::unique_ptr Iter(External->getIdentifiers()); for (StringRef Name = Iter->Next(); !Name.empty(); Name = Iter->Next()) Idents.get(Name); } // Walk all lookup results in the TU for each identifier. for (const auto &Ident : Idents) { for (auto I = S.IdResolver.begin(Ident.getValue()), E = S.IdResolver.end(); I != E; ++I) { if (S.IdResolver.isDeclInScope(*I, Ctx)) { if (NamedDecl *ND = Result.getAcceptableDecl(*I)) { Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass); Visited.add(ND); } } } } return; } if (CXXRecordDecl *Class = dyn_cast(Ctx)) Result.getSema().ForceDeclarationOfImplicitMembers(Class); // Enumerate all of the results in this context. for (DeclContextLookupResult R : Ctx->lookups()) { for (auto *D : R) { if (auto *ND = Result.getAcceptableDecl(D)) { Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass); Visited.add(ND); } } } // Traverse using directives for qualified name lookup. if (QualifiedNameLookup) { ShadowContextRAII Shadow(Visited); for (auto I : Ctx->using_directives()) { LookupVisibleDecls(I->getNominatedNamespace(), Result, QualifiedNameLookup, InBaseClass, Consumer, Visited); } } // Traverse the contexts of inherited C++ classes. if (CXXRecordDecl *Record = dyn_cast(Ctx)) { if (!Record->hasDefinition()) return; for (const auto &B : Record->bases()) { QualType BaseType = B.getType(); // Don't look into dependent bases, because name lookup can't look // there anyway. if (BaseType->isDependentType()) continue; const RecordType *Record = BaseType->getAs(); if (!Record) continue; // FIXME: It would be nice to be able to determine whether referencing // a particular member would be ambiguous. For example, given // // struct A { int member; }; // struct B { int member; }; // struct C : A, B { }; // // void f(C *c) { c->### } // // accessing 'member' would result in an ambiguity. However, we // could be smart enough to qualify the member with the base // class, e.g., // // c->B::member // // or // // c->A::member // Find results in this base class (and its bases). ShadowContextRAII Shadow(Visited); LookupVisibleDecls(Record->getDecl(), Result, QualifiedNameLookup, true, Consumer, Visited); } } // Traverse the contexts of Objective-C classes. if (ObjCInterfaceDecl *IFace = dyn_cast(Ctx)) { // Traverse categories. for (auto *Cat : IFace->visible_categories()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(Cat, Result, QualifiedNameLookup, false, Consumer, Visited); } // Traverse protocols. for (auto *I : IFace->all_referenced_protocols()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(I, Result, QualifiedNameLookup, false, Consumer, Visited); } // Traverse the superclass. if (IFace->getSuperClass()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(IFace->getSuperClass(), Result, QualifiedNameLookup, true, Consumer, Visited); } // If there is an implementation, traverse it. We do this to find // synthesized ivars. if (IFace->getImplementation()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(IFace->getImplementation(), Result, QualifiedNameLookup, InBaseClass, Consumer, Visited); } } else if (ObjCProtocolDecl *Protocol = dyn_cast(Ctx)) { for (auto *I : Protocol->protocols()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(I, Result, QualifiedNameLookup, false, Consumer, Visited); } } else if (ObjCCategoryDecl *Category = dyn_cast(Ctx)) { for (auto *I : Category->protocols()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(I, Result, QualifiedNameLookup, false, Consumer, Visited); } // If there is an implementation, traverse it. if (Category->getImplementation()) { ShadowContextRAII Shadow(Visited); LookupVisibleDecls(Category->getImplementation(), Result, QualifiedNameLookup, true, Consumer, Visited); } } } static void LookupVisibleDecls(Scope *S, LookupResult &Result, UnqualUsingDirectiveSet &UDirs, VisibleDeclConsumer &Consumer, VisibleDeclsRecord &Visited) { if (!S) return; if (!S->getEntity() || (!S->getParent() && !Visited.alreadyVisitedContext(S->getEntity())) || (S->getEntity())->isFunctionOrMethod()) { FindLocalExternScope FindLocals(Result); // Walk through the declarations in this Scope. for (auto *D : S->decls()) { if (NamedDecl *ND = dyn_cast(D)) if ((ND = Result.getAcceptableDecl(ND))) { Consumer.FoundDecl(ND, Visited.checkHidden(ND), nullptr, false); Visited.add(ND); } } } // FIXME: C++ [temp.local]p8 DeclContext *Entity = nullptr; if (S->getEntity()) { // Look into this scope's declaration context, along with any of its // parent lookup contexts (e.g., enclosing classes), up to the point // where we hit the context stored in the next outer scope. Entity = S->getEntity(); DeclContext *OuterCtx = findOuterContext(S).first; // FIXME for (DeclContext *Ctx = Entity; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) { if (ObjCMethodDecl *Method = dyn_cast(Ctx)) { if (Method->isInstanceMethod()) { // For instance methods, look for ivars in the method's interface. LookupResult IvarResult(Result.getSema(), Result.getLookupName(), Result.getNameLoc(), Sema::LookupMemberName); if (ObjCInterfaceDecl *IFace = Method->getClassInterface()) { LookupVisibleDecls(IFace, IvarResult, /*QualifiedNameLookup=*/false, /*InBaseClass=*/false, Consumer, Visited); } } // We've already performed all of the name lookup that we need // to for Objective-C methods; the next context will be the // outer scope. break; } if (Ctx->isFunctionOrMethod()) continue; LookupVisibleDecls(Ctx, Result, /*QualifiedNameLookup=*/false, /*InBaseClass=*/false, Consumer, Visited); } } else if (!S->getParent()) { // Look into the translation unit scope. We walk through the translation // unit's declaration context, because the Scope itself won't have all of // the declarations if we loaded a precompiled header. // FIXME: We would like the translation unit's Scope object to point to the // translation unit, so we don't need this special "if" branch. However, // doing so would force the normal C++ name-lookup code to look into the // translation unit decl when the IdentifierInfo chains would suffice. // Once we fix that problem (which is part of a more general "don't look // in DeclContexts unless we have to" optimization), we can eliminate this. Entity = Result.getSema().Context.getTranslationUnitDecl(); LookupVisibleDecls(Entity, Result, /*QualifiedNameLookup=*/false, /*InBaseClass=*/false, Consumer, Visited); } if (Entity) { // Lookup visible declarations in any namespaces found by using // directives. for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(Entity)) LookupVisibleDecls(const_cast(UUE.getNominatedNamespace()), Result, /*QualifiedNameLookup=*/false, /*InBaseClass=*/false, Consumer, Visited); } // Lookup names in the parent scope. ShadowContextRAII Shadow(Visited); LookupVisibleDecls(S->getParent(), Result, UDirs, Consumer, Visited); } void Sema::LookupVisibleDecls(Scope *S, LookupNameKind Kind, VisibleDeclConsumer &Consumer, bool IncludeGlobalScope) { // Determine the set of using directives available during // unqualified name lookup. Scope *Initial = S; UnqualUsingDirectiveSet UDirs; if (getLangOpts().CPlusPlus) { // Find the first namespace or translation-unit scope. while (S && !isNamespaceOrTranslationUnitScope(S)) S = S->getParent(); UDirs.visitScopeChain(Initial, S); } UDirs.done(); // Look for visible declarations. LookupResult Result(*this, DeclarationName(), SourceLocation(), Kind); Result.setAllowHidden(Consumer.includeHiddenDecls()); VisibleDeclsRecord Visited; if (!IncludeGlobalScope) Visited.visitedContext(Context.getTranslationUnitDecl()); ShadowContextRAII Shadow(Visited); ::LookupVisibleDecls(Initial, Result, UDirs, Consumer, Visited); } void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind, VisibleDeclConsumer &Consumer, bool IncludeGlobalScope) { LookupResult Result(*this, DeclarationName(), SourceLocation(), Kind); Result.setAllowHidden(Consumer.includeHiddenDecls()); VisibleDeclsRecord Visited; if (!IncludeGlobalScope) Visited.visitedContext(Context.getTranslationUnitDecl()); ShadowContextRAII Shadow(Visited); ::LookupVisibleDecls(Ctx, Result, /*QualifiedNameLookup=*/true, /*InBaseClass=*/false, Consumer, Visited); } /// LookupOrCreateLabel - Do a name lookup of a label with the specified name. /// If GnuLabelLoc is a valid source location, then this is a definition /// of an __label__ label name, otherwise it is a normal label definition /// or use. LabelDecl *Sema::LookupOrCreateLabel(IdentifierInfo *II, SourceLocation Loc, SourceLocation GnuLabelLoc) { // Do a lookup to see if we have a label with this name already. NamedDecl *Res = nullptr; if (GnuLabelLoc.isValid()) { // Local label definitions always shadow existing labels. Res = LabelDecl::Create(Context, CurContext, Loc, II, GnuLabelLoc); Scope *S = CurScope; PushOnScopeChains(Res, S, true); return cast(Res); } // Not a GNU local label. Res = LookupSingleName(CurScope, II, Loc, LookupLabel, NotForRedeclaration); // If we found a label, check to see if it is in the same context as us. // When in a Block, we don't want to reuse a label in an enclosing function. if (Res && Res->getDeclContext() != CurContext) Res = nullptr; if (!Res) { // If not forward referenced or defined already, create the backing decl. Res = LabelDecl::Create(Context, CurContext, Loc, II); Scope *S = CurScope->getFnParent(); assert(S && "Not in a function?"); PushOnScopeChains(Res, S, true); } return cast(Res); } //===----------------------------------------------------------------------===// // Typo correction //===----------------------------------------------------------------------===// static bool isCandidateViable(CorrectionCandidateCallback &CCC, TypoCorrection &Candidate) { Candidate.setCallbackDistance(CCC.RankCandidate(Candidate)); return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance; } static void LookupPotentialTypoResult(Sema &SemaRef, LookupResult &Res, IdentifierInfo *Name, Scope *S, CXXScopeSpec *SS, DeclContext *MemberContext, bool EnteringContext, bool isObjCIvarLookup, bool FindHidden); /// \brief Check whether the declarations found for a typo correction are /// visible, and if none of them are, convert the correction to an 'import /// a module' correction. static void checkCorrectionVisibility(Sema &SemaRef, TypoCorrection &TC) { if (TC.begin() == TC.end()) return; TypoCorrection::decl_iterator DI = TC.begin(), DE = TC.end(); for (/**/; DI != DE; ++DI) if (!LookupResult::isVisible(SemaRef, *DI)) break; // Nothing to do if all decls are visible. if (DI == DE) return; llvm::SmallVector NewDecls(TC.begin(), DI); bool AnyVisibleDecls = !NewDecls.empty(); for (/**/; DI != DE; ++DI) { NamedDecl *VisibleDecl = *DI; if (!LookupResult::isVisible(SemaRef, *DI)) VisibleDecl = findAcceptableDecl(SemaRef, *DI); if (VisibleDecl) { if (!AnyVisibleDecls) { // Found a visible decl, discard all hidden ones. AnyVisibleDecls = true; NewDecls.clear(); } NewDecls.push_back(VisibleDecl); } else if (!AnyVisibleDecls && !(*DI)->isModulePrivate()) NewDecls.push_back(*DI); } if (NewDecls.empty()) TC = TypoCorrection(); else { TC.setCorrectionDecls(NewDecls); TC.setRequiresImport(!AnyVisibleDecls); } } // Fill the supplied vector with the IdentifierInfo pointers for each piece of // the given NestedNameSpecifier (i.e. given a NestedNameSpecifier "foo::bar::", // fill the vector with the IdentifierInfo pointers for "foo" and "bar"). static void getNestedNameSpecifierIdentifiers( NestedNameSpecifier *NNS, SmallVectorImpl &Identifiers) { if (NestedNameSpecifier *Prefix = NNS->getPrefix()) getNestedNameSpecifierIdentifiers(Prefix, Identifiers); else Identifiers.clear(); const IdentifierInfo *II = nullptr; switch (NNS->getKind()) { case NestedNameSpecifier::Identifier: II = NNS->getAsIdentifier(); break; case NestedNameSpecifier::Namespace: if (NNS->getAsNamespace()->isAnonymousNamespace()) return; II = NNS->getAsNamespace()->getIdentifier(); break; case NestedNameSpecifier::NamespaceAlias: II = NNS->getAsNamespaceAlias()->getIdentifier(); break; case NestedNameSpecifier::TypeSpecWithTemplate: case NestedNameSpecifier::TypeSpec: II = QualType(NNS->getAsType(), 0).getBaseTypeIdentifier(); break; case NestedNameSpecifier::Global: case NestedNameSpecifier::Super: return; } if (II) Identifiers.push_back(II); } void TypoCorrectionConsumer::FoundDecl(NamedDecl *ND, NamedDecl *Hiding, DeclContext *Ctx, bool InBaseClass) { // Don't consider hidden names for typo correction. if (Hiding) return; // Only consider entities with identifiers for names, ignoring // special names (constructors, overloaded operators, selectors, // etc.). IdentifierInfo *Name = ND->getIdentifier(); if (!Name) return; // Only consider visible declarations and declarations from modules with // names that exactly match. if (!LookupResult::isVisible(SemaRef, ND) && Name != Typo && !findAcceptableDecl(SemaRef, ND)) return; FoundName(Name->getName()); } void TypoCorrectionConsumer::FoundName(StringRef Name) { // Compute the edit distance between the typo and the name of this // entity, and add the identifier to the list of results. addName(Name, nullptr); } void TypoCorrectionConsumer::addKeywordResult(StringRef Keyword) { // Compute the edit distance between the typo and this keyword, // and add the keyword to the list of results. addName(Keyword, nullptr, nullptr, true); } void TypoCorrectionConsumer::addName(StringRef Name, NamedDecl *ND, NestedNameSpecifier *NNS, bool isKeyword) { // Use a simple length-based heuristic to determine the minimum possible // edit distance. If the minimum isn't good enough, bail out early. StringRef TypoStr = Typo->getName(); unsigned MinED = abs((int)Name.size() - (int)TypoStr.size()); if (MinED && TypoStr.size() / MinED < 3) return; // Compute an upper bound on the allowable edit distance, so that the // edit-distance algorithm can short-circuit. unsigned UpperBound = (TypoStr.size() + 2) / 3 + 1; unsigned ED = TypoStr.edit_distance(Name, true, UpperBound); if (ED >= UpperBound) return; TypoCorrection TC(&SemaRef.Context.Idents.get(Name), ND, NNS, ED); if (isKeyword) TC.makeKeyword(); TC.setCorrectionRange(nullptr, Result.getLookupNameInfo()); addCorrection(TC); } static const unsigned MaxTypoDistanceResultSets = 5; void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) { StringRef TypoStr = Typo->getName(); StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName(); // For very short typos, ignore potential corrections that have a different // base identifier from the typo or which have a normalized edit distance // longer than the typo itself. if (TypoStr.size() < 3 && (Name != TypoStr || Correction.getEditDistance(true) > TypoStr.size())) return; // If the correction is resolved but is not viable, ignore it. if (Correction.isResolved()) { checkCorrectionVisibility(SemaRef, Correction); if (!Correction || !isCandidateViable(*CorrectionValidator, Correction)) return; } TypoResultList &CList = CorrectionResults[Correction.getEditDistance(false)][Name]; if (!CList.empty() && !CList.back().isResolved()) CList.pop_back(); if (NamedDecl *NewND = Correction.getCorrectionDecl()) { std::string CorrectionStr = Correction.getAsString(SemaRef.getLangOpts()); for (TypoResultList::iterator RI = CList.begin(), RIEnd = CList.end(); RI != RIEnd; ++RI) { // If the Correction refers to a decl already in the result list, // replace the existing result if the string representation of Correction // comes before the current result alphabetically, then stop as there is // nothing more to be done to add Correction to the candidate set. if (RI->getCorrectionDecl() == NewND) { if (CorrectionStr < RI->getAsString(SemaRef.getLangOpts())) *RI = Correction; return; } } } if (CList.empty() || Correction.isResolved()) CList.push_back(Correction); while (CorrectionResults.size() > MaxTypoDistanceResultSets) CorrectionResults.erase(std::prev(CorrectionResults.end())); } void TypoCorrectionConsumer::addNamespaces( const llvm::MapVector &KnownNamespaces) { SearchNamespaces = true; for (auto KNPair : KnownNamespaces) Namespaces.addNameSpecifier(KNPair.first); bool SSIsTemplate = false; if (NestedNameSpecifier *NNS = (SS && SS->isValid()) ? SS->getScopeRep() : nullptr) { if (const Type *T = NNS->getAsType()) SSIsTemplate = T->getTypeClass() == Type::TemplateSpecialization; } // Do not transform this into an iterator-based loop. The loop body can // trigger the creation of further types (through lazy deserialization) and // invalide iterators into this list. auto &Types = SemaRef.getASTContext().getTypes(); for (unsigned I = 0; I != Types.size(); ++I) { const auto *TI = Types[I]; if (CXXRecordDecl *CD = TI->getAsCXXRecordDecl()) { CD = CD->getCanonicalDecl(); if (!CD->isDependentType() && !CD->isAnonymousStructOrUnion() && !CD->isUnion() && CD->getIdentifier() && (SSIsTemplate || !isa(CD)) && (CD->isBeingDefined() || CD->isCompleteDefinition())) Namespaces.addNameSpecifier(CD); } } } const TypoCorrection &TypoCorrectionConsumer::getNextCorrection() { if (++CurrentTCIndex < ValidatedCorrections.size()) return ValidatedCorrections[CurrentTCIndex]; CurrentTCIndex = ValidatedCorrections.size(); while (!CorrectionResults.empty()) { auto DI = CorrectionResults.begin(); if (DI->second.empty()) { CorrectionResults.erase(DI); continue; } auto RI = DI->second.begin(); if (RI->second.empty()) { DI->second.erase(RI); performQualifiedLookups(); continue; } TypoCorrection TC = RI->second.pop_back_val(); if (TC.isResolved() || TC.requiresImport() || resolveCorrection(TC)) { ValidatedCorrections.push_back(TC); return ValidatedCorrections[CurrentTCIndex]; } } return ValidatedCorrections[0]; // The empty correction. } bool TypoCorrectionConsumer::resolveCorrection(TypoCorrection &Candidate) { IdentifierInfo *Name = Candidate.getCorrectionAsIdentifierInfo(); DeclContext *TempMemberContext = MemberContext; CXXScopeSpec *TempSS = SS.get(); retry_lookup: LookupPotentialTypoResult(SemaRef, Result, Name, S, TempSS, TempMemberContext, EnteringContext, CorrectionValidator->IsObjCIvarLookup, Name == Typo && !Candidate.WillReplaceSpecifier()); switch (Result.getResultKind()) { case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: case LookupResult::FoundUnresolvedValue: if (TempSS) { // Immediately retry the lookup without the given CXXScopeSpec TempSS = nullptr; Candidate.WillReplaceSpecifier(true); goto retry_lookup; } if (TempMemberContext) { if (SS && !TempSS) TempSS = SS.get(); TempMemberContext = nullptr; goto retry_lookup; } if (SearchNamespaces) QualifiedResults.push_back(Candidate); break; case LookupResult::Ambiguous: // We don't deal with ambiguities. break; case LookupResult::Found: case LookupResult::FoundOverloaded: // Store all of the Decls for overloaded symbols for (auto *TRD : Result) Candidate.addCorrectionDecl(TRD); checkCorrectionVisibility(SemaRef, Candidate); if (!isCandidateViable(*CorrectionValidator, Candidate)) { if (SearchNamespaces) QualifiedResults.push_back(Candidate); break; } Candidate.setCorrectionRange(SS.get(), Result.getLookupNameInfo()); return true; } return false; } void TypoCorrectionConsumer::performQualifiedLookups() { unsigned TypoLen = Typo->getName().size(); for (const TypoCorrection &QR : QualifiedResults) { for (const auto &NSI : Namespaces) { DeclContext *Ctx = NSI.DeclCtx; const Type *NSType = NSI.NameSpecifier->getAsType(); // If the current NestedNameSpecifier refers to a class and the // current correction candidate is the name of that class, then skip // it as it is unlikely a qualified version of the class' constructor // is an appropriate correction. if (CXXRecordDecl *NSDecl = NSType ? NSType->getAsCXXRecordDecl() : nullptr) { if (NSDecl->getIdentifier() == QR.getCorrectionAsIdentifierInfo()) continue; } TypoCorrection TC(QR); TC.ClearCorrectionDecls(); TC.setCorrectionSpecifier(NSI.NameSpecifier); TC.setQualifierDistance(NSI.EditDistance); TC.setCallbackDistance(0); // Reset the callback distance // If the current correction candidate and namespace combination are // too far away from the original typo based on the normalized edit // distance, then skip performing a qualified name lookup. unsigned TmpED = TC.getEditDistance(true); if (QR.getCorrectionAsIdentifierInfo() != Typo && TmpED && TypoLen / TmpED < 3) continue; Result.clear(); Result.setLookupName(QR.getCorrectionAsIdentifierInfo()); if (!SemaRef.LookupQualifiedName(Result, Ctx)) continue; // Any corrections added below will be validated in subsequent // iterations of the main while() loop over the Consumer's contents. switch (Result.getResultKind()) { case LookupResult::Found: case LookupResult::FoundOverloaded: { if (SS && SS->isValid()) { std::string NewQualified = TC.getAsString(SemaRef.getLangOpts()); std::string OldQualified; llvm::raw_string_ostream OldOStream(OldQualified); SS->getScopeRep()->print(OldOStream, SemaRef.getPrintingPolicy()); OldOStream << Typo->getName(); // If correction candidate would be an identical written qualified // identifer, then the existing CXXScopeSpec probably included a // typedef that didn't get accounted for properly. if (OldOStream.str() == NewQualified) break; } for (LookupResult::iterator TRD = Result.begin(), TRDEnd = Result.end(); TRD != TRDEnd; ++TRD) { if (SemaRef.CheckMemberAccess(TC.getCorrectionRange().getBegin(), NSType ? NSType->getAsCXXRecordDecl() : nullptr, TRD.getPair()) == Sema::AR_accessible) TC.addCorrectionDecl(*TRD); } if (TC.isResolved()) { TC.setCorrectionRange(SS.get(), Result.getLookupNameInfo()); addCorrection(TC); } break; } case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: case LookupResult::Ambiguous: case LookupResult::FoundUnresolvedValue: break; } } } QualifiedResults.clear(); } TypoCorrectionConsumer::NamespaceSpecifierSet::NamespaceSpecifierSet( ASTContext &Context, DeclContext *CurContext, CXXScopeSpec *CurScopeSpec) : Context(Context), CurContextChain(buildContextChain(CurContext)) { if (NestedNameSpecifier *NNS = CurScopeSpec ? CurScopeSpec->getScopeRep() : nullptr) { llvm::raw_string_ostream SpecifierOStream(CurNameSpecifier); NNS->print(SpecifierOStream, Context.getPrintingPolicy()); getNestedNameSpecifierIdentifiers(NNS, CurNameSpecifierIdentifiers); } // Build the list of identifiers that would be used for an absolute // (from the global context) NestedNameSpecifier referring to the current // context. for (DeclContext *C : llvm::reverse(CurContextChain)) { if (auto *ND = dyn_cast_or_null(C)) CurContextIdentifiers.push_back(ND->getIdentifier()); } // Add the global context as a NestedNameSpecifier SpecifierInfo SI = {cast(Context.getTranslationUnitDecl()), NestedNameSpecifier::GlobalSpecifier(Context), 1}; DistanceMap[1].push_back(SI); } auto TypoCorrectionConsumer::NamespaceSpecifierSet::buildContextChain( DeclContext *Start) -> DeclContextList { assert(Start && "Building a context chain from a null context"); DeclContextList Chain; for (DeclContext *DC = Start->getPrimaryContext(); DC != nullptr; DC = DC->getLookupParent()) { NamespaceDecl *ND = dyn_cast_or_null(DC); if (!DC->isInlineNamespace() && !DC->isTransparentContext() && !(ND && ND->isAnonymousNamespace())) Chain.push_back(DC->getPrimaryContext()); } return Chain; } unsigned TypoCorrectionConsumer::NamespaceSpecifierSet::buildNestedNameSpecifier( DeclContextList &DeclChain, NestedNameSpecifier *&NNS) { unsigned NumSpecifiers = 0; for (DeclContext *C : llvm::reverse(DeclChain)) { if (auto *ND = dyn_cast_or_null(C)) { NNS = NestedNameSpecifier::Create(Context, NNS, ND); ++NumSpecifiers; } else if (auto *RD = dyn_cast_or_null(C)) { NNS = NestedNameSpecifier::Create(Context, NNS, RD->isTemplateDecl(), RD->getTypeForDecl()); ++NumSpecifiers; } } return NumSpecifiers; } void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier( DeclContext *Ctx) { NestedNameSpecifier *NNS = nullptr; unsigned NumSpecifiers = 0; DeclContextList NamespaceDeclChain(buildContextChain(Ctx)); DeclContextList FullNamespaceDeclChain(NamespaceDeclChain); // Eliminate common elements from the two DeclContext chains. for (DeclContext *C : llvm::reverse(CurContextChain)) { if (NamespaceDeclChain.empty() || NamespaceDeclChain.back() != C) break; NamespaceDeclChain.pop_back(); } // Build the NestedNameSpecifier from what is left of the NamespaceDeclChain NumSpecifiers = buildNestedNameSpecifier(NamespaceDeclChain, NNS); // Add an explicit leading '::' specifier if needed. if (NamespaceDeclChain.empty()) { // Rebuild the NestedNameSpecifier as a globally-qualified specifier. NNS = NestedNameSpecifier::GlobalSpecifier(Context); NumSpecifiers = buildNestedNameSpecifier(FullNamespaceDeclChain, NNS); } else if (NamedDecl *ND = dyn_cast_or_null(NamespaceDeclChain.back())) { IdentifierInfo *Name = ND->getIdentifier(); bool SameNameSpecifier = false; if (std::find(CurNameSpecifierIdentifiers.begin(), CurNameSpecifierIdentifiers.end(), Name) != CurNameSpecifierIdentifiers.end()) { std::string NewNameSpecifier; llvm::raw_string_ostream SpecifierOStream(NewNameSpecifier); SmallVector NewNameSpecifierIdentifiers; getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers); NNS->print(SpecifierOStream, Context.getPrintingPolicy()); SpecifierOStream.flush(); SameNameSpecifier = NewNameSpecifier == CurNameSpecifier; } if (SameNameSpecifier || std::find(CurContextIdentifiers.begin(), CurContextIdentifiers.end(), Name) != CurContextIdentifiers.end()) { // Rebuild the NestedNameSpecifier as a globally-qualified specifier. NNS = NestedNameSpecifier::GlobalSpecifier(Context); NumSpecifiers = buildNestedNameSpecifier(FullNamespaceDeclChain, NNS); } } // If the built NestedNameSpecifier would be replacing an existing // NestedNameSpecifier, use the number of component identifiers that // would need to be changed as the edit distance instead of the number // of components in the built NestedNameSpecifier. if (NNS && !CurNameSpecifierIdentifiers.empty()) { SmallVector NewNameSpecifierIdentifiers; getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers); NumSpecifiers = llvm::ComputeEditDistance( llvm::makeArrayRef(CurNameSpecifierIdentifiers), llvm::makeArrayRef(NewNameSpecifierIdentifiers)); } SpecifierInfo SI = {Ctx, NNS, NumSpecifiers}; DistanceMap[NumSpecifiers].push_back(SI); } /// \brief Perform name lookup for a possible result for typo correction. static void LookupPotentialTypoResult(Sema &SemaRef, LookupResult &Res, IdentifierInfo *Name, Scope *S, CXXScopeSpec *SS, DeclContext *MemberContext, bool EnteringContext, bool isObjCIvarLookup, bool FindHidden) { Res.suppressDiagnostics(); Res.clear(); Res.setLookupName(Name); Res.setAllowHidden(FindHidden); if (MemberContext) { if (ObjCInterfaceDecl *Class = dyn_cast(MemberContext)) { if (isObjCIvarLookup) { if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(Name)) { Res.addDecl(Ivar); Res.resolveKind(); return; } } if (ObjCPropertyDecl *Prop = Class->FindPropertyDeclaration( Name, ObjCPropertyQueryKind::OBJC_PR_query_instance)) { Res.addDecl(Prop); Res.resolveKind(); return; } } SemaRef.LookupQualifiedName(Res, MemberContext); return; } SemaRef.LookupParsedName(Res, S, SS, /*AllowBuiltinCreation=*/false, EnteringContext); // Fake ivar lookup; this should really be part of // LookupParsedName. if (ObjCMethodDecl *Method = SemaRef.getCurMethodDecl()) { if (Method->isInstanceMethod() && Method->getClassInterface() && (Res.empty() || (Res.isSingleResult() && Res.getFoundDecl()->isDefinedOutsideFunctionOrMethod()))) { if (ObjCIvarDecl *IV = Method->getClassInterface()->lookupInstanceVariable(Name)) { Res.addDecl(IV); Res.resolveKind(); } } } } /// \brief Add keywords to the consumer as possible typo corrections. static void AddKeywordsToConsumer(Sema &SemaRef, TypoCorrectionConsumer &Consumer, Scope *S, CorrectionCandidateCallback &CCC, bool AfterNestedNameSpecifier) { if (AfterNestedNameSpecifier) { // For 'X::', we know exactly which keywords can appear next. Consumer.addKeywordResult("template"); if (CCC.WantExpressionKeywords) Consumer.addKeywordResult("operator"); return; } if (CCC.WantObjCSuper) Consumer.addKeywordResult("super"); if (CCC.WantTypeSpecifiers) { // Add type-specifier keywords to the set of results. static const char *const CTypeSpecs[] = { "char", "const", "double", "enum", "float", "int", "long", "short", "signed", "struct", "union", "unsigned", "void", "volatile", "_Complex", "_Imaginary", // storage-specifiers as well "extern", "inline", "static", "typedef" }; const unsigned NumCTypeSpecs = llvm::array_lengthof(CTypeSpecs); for (unsigned I = 0; I != NumCTypeSpecs; ++I) Consumer.addKeywordResult(CTypeSpecs[I]); if (SemaRef.getLangOpts().C99) Consumer.addKeywordResult("restrict"); if (SemaRef.getLangOpts().Bool || SemaRef.getLangOpts().CPlusPlus) Consumer.addKeywordResult("bool"); else if (SemaRef.getLangOpts().C99) Consumer.addKeywordResult("_Bool"); if (SemaRef.getLangOpts().CPlusPlus) { Consumer.addKeywordResult("class"); Consumer.addKeywordResult("typename"); Consumer.addKeywordResult("wchar_t"); if (SemaRef.getLangOpts().CPlusPlus11) { Consumer.addKeywordResult("char16_t"); Consumer.addKeywordResult("char32_t"); Consumer.addKeywordResult("constexpr"); Consumer.addKeywordResult("decltype"); Consumer.addKeywordResult("thread_local"); } } if (SemaRef.getLangOpts().GNUMode) Consumer.addKeywordResult("typeof"); } else if (CCC.WantFunctionLikeCasts) { static const char *const CastableTypeSpecs[] = { "char", "double", "float", "int", "long", "short", "signed", "unsigned", "void" }; for (auto *kw : CastableTypeSpecs) Consumer.addKeywordResult(kw); } if (CCC.WantCXXNamedCasts && SemaRef.getLangOpts().CPlusPlus) { Consumer.addKeywordResult("const_cast"); Consumer.addKeywordResult("dynamic_cast"); Consumer.addKeywordResult("reinterpret_cast"); Consumer.addKeywordResult("static_cast"); } if (CCC.WantExpressionKeywords) { Consumer.addKeywordResult("sizeof"); if (SemaRef.getLangOpts().Bool || SemaRef.getLangOpts().CPlusPlus) { Consumer.addKeywordResult("false"); Consumer.addKeywordResult("true"); } if (SemaRef.getLangOpts().CPlusPlus) { static const char *const CXXExprs[] = { "delete", "new", "operator", "throw", "typeid" }; const unsigned NumCXXExprs = llvm::array_lengthof(CXXExprs); for (unsigned I = 0; I != NumCXXExprs; ++I) Consumer.addKeywordResult(CXXExprs[I]); if (isa(SemaRef.CurContext) && cast(SemaRef.CurContext)->isInstance()) Consumer.addKeywordResult("this"); if (SemaRef.getLangOpts().CPlusPlus11) { Consumer.addKeywordResult("alignof"); Consumer.addKeywordResult("nullptr"); } } if (SemaRef.getLangOpts().C11) { // FIXME: We should not suggest _Alignof if the alignof macro // is present. Consumer.addKeywordResult("_Alignof"); } } if (CCC.WantRemainingKeywords) { if (SemaRef.getCurFunctionOrMethodDecl() || SemaRef.getCurBlock()) { // Statements. static const char *const CStmts[] = { "do", "else", "for", "goto", "if", "return", "switch", "while" }; const unsigned NumCStmts = llvm::array_lengthof(CStmts); for (unsigned I = 0; I != NumCStmts; ++I) Consumer.addKeywordResult(CStmts[I]); if (SemaRef.getLangOpts().CPlusPlus) { Consumer.addKeywordResult("catch"); Consumer.addKeywordResult("try"); } if (S && S->getBreakParent()) Consumer.addKeywordResult("break"); if (S && S->getContinueParent()) Consumer.addKeywordResult("continue"); if (!SemaRef.getCurFunction()->SwitchStack.empty()) { Consumer.addKeywordResult("case"); Consumer.addKeywordResult("default"); } } else { if (SemaRef.getLangOpts().CPlusPlus) { Consumer.addKeywordResult("namespace"); Consumer.addKeywordResult("template"); } if (S && S->isClassScope()) { Consumer.addKeywordResult("explicit"); Consumer.addKeywordResult("friend"); Consumer.addKeywordResult("mutable"); Consumer.addKeywordResult("private"); Consumer.addKeywordResult("protected"); Consumer.addKeywordResult("public"); Consumer.addKeywordResult("virtual"); } } if (SemaRef.getLangOpts().CPlusPlus) { Consumer.addKeywordResult("using"); if (SemaRef.getLangOpts().CPlusPlus11) Consumer.addKeywordResult("static_assert"); } } } std::unique_ptr Sema::makeTypoCorrectionConsumer( const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind, Scope *S, CXXScopeSpec *SS, std::unique_ptr CCC, DeclContext *MemberContext, bool EnteringContext, const ObjCObjectPointerType *OPT, bool ErrorRecovery) { if (Diags.hasFatalErrorOccurred() || !getLangOpts().SpellChecking || DisableTypoCorrection) return nullptr; // In Microsoft mode, don't perform typo correction in a template member // function dependent context because it interferes with the "lookup into // dependent bases of class templates" feature. if (getLangOpts().MSVCCompat && CurContext->isDependentContext() && isa(CurContext)) return nullptr; // We only attempt to correct typos for identifiers. IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo(); if (!Typo) return nullptr; // If the scope specifier itself was invalid, don't try to correct // typos. if (SS && SS->isInvalid()) return nullptr; // Never try to correct typos during template deduction or // instantiation. if (!ActiveTemplateInstantiations.empty()) return nullptr; // Don't try to correct 'super'. if (S && S->isInObjcMethodScope() && Typo == getSuperIdentifier()) return nullptr; // Abort if typo correction already failed for this specific typo. IdentifierSourceLocations::iterator locs = TypoCorrectionFailures.find(Typo); if (locs != TypoCorrectionFailures.end() && locs->second.count(TypoName.getLoc())) return nullptr; // Don't try to correct the identifier "vector" when in AltiVec mode. // TODO: Figure out why typo correction misbehaves in this case, fix it, and // remove this workaround. if ((getLangOpts().AltiVec || getLangOpts().ZVector) && Typo->isStr("vector")) return nullptr; // Provide a stop gap for files that are just seriously broken. Trying // to correct all typos can turn into a HUGE performance penalty, causing // some files to take minutes to get rejected by the parser. unsigned Limit = getDiagnostics().getDiagnosticOptions().SpellCheckingLimit; if (Limit && TyposCorrected >= Limit) return nullptr; ++TyposCorrected; // If we're handling a missing symbol error, using modules, and the // special search all modules option is used, look for a missing import. if (ErrorRecovery && getLangOpts().Modules && getLangOpts().ModulesSearchAll) { // The following has the side effect of loading the missing module. getModuleLoader().lookupMissingImports(Typo->getName(), TypoName.getLocStart()); } CorrectionCandidateCallback &CCCRef = *CCC; auto Consumer = llvm::make_unique( *this, TypoName, LookupKind, S, SS, std::move(CCC), MemberContext, EnteringContext); // Perform name lookup to find visible, similarly-named entities. bool IsUnqualifiedLookup = false; DeclContext *QualifiedDC = MemberContext; if (MemberContext) { LookupVisibleDecls(MemberContext, LookupKind, *Consumer); // Look in qualified interfaces. if (OPT) { for (auto *I : OPT->quals()) LookupVisibleDecls(I, LookupKind, *Consumer); } } else if (SS && SS->isSet()) { QualifiedDC = computeDeclContext(*SS, EnteringContext); if (!QualifiedDC) return nullptr; LookupVisibleDecls(QualifiedDC, LookupKind, *Consumer); } else { IsUnqualifiedLookup = true; } // Determine whether we are going to search in the various namespaces for // corrections. bool SearchNamespaces = getLangOpts().CPlusPlus && (IsUnqualifiedLookup || (SS && SS->isSet())); if (IsUnqualifiedLookup || SearchNamespaces) { // For unqualified lookup, look through all of the names that we have // seen in this translation unit. // FIXME: Re-add the ability to skip very unlikely potential corrections. for (const auto &I : Context.Idents) Consumer->FoundName(I.getKey()); // Walk through identifiers in external identifier sources. // FIXME: Re-add the ability to skip very unlikely potential corrections. if (IdentifierInfoLookup *External = Context.Idents.getExternalIdentifierLookup()) { std::unique_ptr Iter(External->getIdentifiers()); do { StringRef Name = Iter->Next(); if (Name.empty()) break; Consumer->FoundName(Name); } while (true); } } AddKeywordsToConsumer(*this, *Consumer, S, CCCRef, SS && SS->isNotEmpty()); // Build the NestedNameSpecifiers for the KnownNamespaces, if we're going // to search those namespaces. if (SearchNamespaces) { // Load any externally-known namespaces. if (ExternalSource && !LoadedExternalKnownNamespaces) { SmallVector ExternalKnownNamespaces; LoadedExternalKnownNamespaces = true; ExternalSource->ReadKnownNamespaces(ExternalKnownNamespaces); for (auto *N : ExternalKnownNamespaces) KnownNamespaces[N] = true; } Consumer->addNamespaces(KnownNamespaces); } return Consumer; } /// \brief Try to "correct" a typo in the source code by finding /// visible declarations whose names are similar to the name that was /// present in the source code. /// /// \param TypoName the \c DeclarationNameInfo structure that contains /// the name that was present in the source code along with its location. /// /// \param LookupKind the name-lookup criteria used to search for the name. /// /// \param S the scope in which name lookup occurs. /// /// \param SS the nested-name-specifier that precedes the name we're /// looking for, if present. /// /// \param CCC A CorrectionCandidateCallback object that provides further /// validation of typo correction candidates. It also provides flags for /// determining the set of keywords permitted. /// /// \param MemberContext if non-NULL, the context in which to look for /// a member access expression. /// /// \param EnteringContext whether we're entering the context described by /// the nested-name-specifier SS. /// /// \param OPT when non-NULL, the search for visible declarations will /// also walk the protocols in the qualified interfaces of \p OPT. /// /// \returns a \c TypoCorrection containing the corrected name if the typo /// along with information such as the \c NamedDecl where the corrected name /// was declared, and any additional \c NestedNameSpecifier needed to access /// it (C++ only). The \c TypoCorrection is empty if there is no correction. TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind, Scope *S, CXXScopeSpec *SS, std::unique_ptr CCC, CorrectTypoKind Mode, DeclContext *MemberContext, bool EnteringContext, const ObjCObjectPointerType *OPT, bool RecordFailure) { assert(CCC && "CorrectTypo requires a CorrectionCandidateCallback"); // Always let the ExternalSource have the first chance at correction, even // if we would otherwise have given up. if (ExternalSource) { if (TypoCorrection Correction = ExternalSource->CorrectTypo( TypoName, LookupKind, S, SS, *CCC, MemberContext, EnteringContext, OPT)) return Correction; } // Ugly hack equivalent to CTC == CTC_ObjCMessageReceiver; // WantObjCSuper is only true for CTC_ObjCMessageReceiver and for // some instances of CTC_Unknown, while WantRemainingKeywords is true // for CTC_Unknown but not for CTC_ObjCMessageReceiver. bool ObjCMessageReceiver = CCC->WantObjCSuper && !CCC->WantRemainingKeywords; IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo(); auto Consumer = makeTypoCorrectionConsumer( TypoName, LookupKind, S, SS, std::move(CCC), MemberContext, EnteringContext, OPT, Mode == CTK_ErrorRecovery); if (!Consumer) return TypoCorrection(); // If we haven't found anything, we're done. if (Consumer->empty()) return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure); // Make sure the best edit distance (prior to adding any namespace qualifiers) // is not more that about a third of the length of the typo's identifier. unsigned ED = Consumer->getBestEditDistance(true); unsigned TypoLen = Typo->getName().size(); if (ED > 0 && TypoLen / ED < 3) return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure); TypoCorrection BestTC = Consumer->getNextCorrection(); TypoCorrection SecondBestTC = Consumer->getNextCorrection(); if (!BestTC) return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure); ED = BestTC.getEditDistance(); if (TypoLen >= 3 && ED > 0 && TypoLen / ED < 3) { // If this was an unqualified lookup and we believe the callback // object wouldn't have filtered out possible corrections, note // that no correction was found. return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure); } // If only a single name remains, return that result. if (!SecondBestTC || SecondBestTC.getEditDistance(false) > BestTC.getEditDistance(false)) { const TypoCorrection &Result = BestTC; // Don't correct to a keyword that's the same as the typo; the keyword // wasn't actually in scope. if (ED == 0 && Result.isKeyword()) return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure); TypoCorrection TC = Result; TC.setCorrectionRange(SS, TypoName); checkCorrectionVisibility(*this, TC); return TC; } else if (SecondBestTC && ObjCMessageReceiver) { // Prefer 'super' when we're completing in a message-receiver // context. if (BestTC.getCorrection().getAsString() != "super") { if (SecondBestTC.getCorrection().getAsString() == "super") BestTC = SecondBestTC; else if ((*Consumer)["super"].front().isKeyword()) BestTC = (*Consumer)["super"].front(); } // Don't correct to a keyword that's the same as the typo; the keyword // wasn't actually in scope. if (BestTC.getEditDistance() == 0 || BestTC.getCorrection().getAsString() != "super") return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure); BestTC.setCorrectionRange(SS, TypoName); return BestTC; } // Record the failure's location if needed and return an empty correction. If // this was an unqualified lookup and we believe the callback object did not // filter out possible corrections, also cache the failure for the typo. return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure && !SecondBestTC); } /// \brief Try to "correct" a typo in the source code by finding /// visible declarations whose names are similar to the name that was /// present in the source code. /// /// \param TypoName the \c DeclarationNameInfo structure that contains /// the name that was present in the source code along with its location. /// /// \param LookupKind the name-lookup criteria used to search for the name. /// /// \param S the scope in which name lookup occurs. /// /// \param SS the nested-name-specifier that precedes the name we're /// looking for, if present. /// /// \param CCC A CorrectionCandidateCallback object that provides further /// validation of typo correction candidates. It also provides flags for /// determining the set of keywords permitted. /// /// \param TDG A TypoDiagnosticGenerator functor that will be used to print /// diagnostics when the actual typo correction is attempted. /// /// \param TRC A TypoRecoveryCallback functor that will be used to build an /// Expr from a typo correction candidate. /// /// \param MemberContext if non-NULL, the context in which to look for /// a member access expression. /// /// \param EnteringContext whether we're entering the context described by /// the nested-name-specifier SS. /// /// \param OPT when non-NULL, the search for visible declarations will /// also walk the protocols in the qualified interfaces of \p OPT. /// /// \returns a new \c TypoExpr that will later be replaced in the AST with an /// Expr representing the result of performing typo correction, or nullptr if /// typo correction is not possible. If nullptr is returned, no diagnostics will /// be emitted and it is the responsibility of the caller to emit any that are /// needed. TypoExpr *Sema::CorrectTypoDelayed( const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind, Scope *S, CXXScopeSpec *SS, std::unique_ptr CCC, TypoDiagnosticGenerator TDG, TypoRecoveryCallback TRC, CorrectTypoKind Mode, DeclContext *MemberContext, bool EnteringContext, const ObjCObjectPointerType *OPT) { assert(CCC && "CorrectTypoDelayed requires a CorrectionCandidateCallback"); auto Consumer = makeTypoCorrectionConsumer( TypoName, LookupKind, S, SS, std::move(CCC), MemberContext, EnteringContext, OPT, Mode == CTK_ErrorRecovery); // Give the external sema source a chance to correct the typo. TypoCorrection ExternalTypo; if (ExternalSource && Consumer) { ExternalTypo = ExternalSource->CorrectTypo( TypoName, LookupKind, S, SS, *Consumer->getCorrectionValidator(), MemberContext, EnteringContext, OPT); if (ExternalTypo) Consumer->addCorrection(ExternalTypo); } if (!Consumer || Consumer->empty()) return nullptr; // Make sure the best edit distance (prior to adding any namespace qualifiers) // is not more that about a third of the length of the typo's identifier. unsigned ED = Consumer->getBestEditDistance(true); IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo(); if (!ExternalTypo && ED > 0 && Typo->getName().size() / ED < 3) return nullptr; ExprEvalContexts.back().NumTypos++; return createDelayedTypo(std::move(Consumer), std::move(TDG), std::move(TRC)); } void TypoCorrection::addCorrectionDecl(NamedDecl *CDecl) { if (!CDecl) return; if (isKeyword()) CorrectionDecls.clear(); CorrectionDecls.push_back(CDecl); if (!CorrectionName) CorrectionName = CDecl->getDeclName(); } std::string TypoCorrection::getAsString(const LangOptions &LO) const { if (CorrectionNameSpec) { std::string tmpBuffer; llvm::raw_string_ostream PrefixOStream(tmpBuffer); CorrectionNameSpec->print(PrefixOStream, PrintingPolicy(LO)); PrefixOStream << CorrectionName; return PrefixOStream.str(); } return CorrectionName.getAsString(); } bool CorrectionCandidateCallback::ValidateCandidate( const TypoCorrection &candidate) { if (!candidate.isResolved()) return true; if (candidate.isKeyword()) return WantTypeSpecifiers || WantExpressionKeywords || WantCXXNamedCasts || WantRemainingKeywords || WantObjCSuper; bool HasNonType = false; bool HasStaticMethod = false; bool HasNonStaticMethod = false; for (Decl *D : candidate) { if (FunctionTemplateDecl *FTD = dyn_cast(D)) D = FTD->getTemplatedDecl(); if (CXXMethodDecl *Method = dyn_cast(D)) { if (Method->isStatic()) HasStaticMethod = true; else HasNonStaticMethod = true; } if (!isa(D)) HasNonType = true; } if (IsAddressOfOperand && HasNonStaticMethod && !HasStaticMethod && !candidate.getCorrectionSpecifier()) return false; return WantTypeSpecifiers || HasNonType; } FunctionCallFilterCCC::FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs, bool HasExplicitTemplateArgs, MemberExpr *ME) : NumArgs(NumArgs), HasExplicitTemplateArgs(HasExplicitTemplateArgs), CurContext(SemaRef.CurContext), MemberFn(ME) { WantTypeSpecifiers = false; WantFunctionLikeCasts = SemaRef.getLangOpts().CPlusPlus && NumArgs == 1; WantRemainingKeywords = false; } bool FunctionCallFilterCCC::ValidateCandidate(const TypoCorrection &candidate) { if (!candidate.getCorrectionDecl()) return candidate.isKeyword(); for (auto *C : candidate) { FunctionDecl *FD = nullptr; NamedDecl *ND = C->getUnderlyingDecl(); if (FunctionTemplateDecl *FTD = dyn_cast(ND)) FD = FTD->getTemplatedDecl(); if (!HasExplicitTemplateArgs && !FD) { if (!(FD = dyn_cast(ND)) && isa(ND)) { // If the Decl is neither a function nor a template function, // determine if it is a pointer or reference to a function. If so, // check against the number of arguments expected for the pointee. QualType ValType = cast(ND)->getType(); if (ValType->isAnyPointerType() || ValType->isReferenceType()) ValType = ValType->getPointeeType(); if (const FunctionProtoType *FPT = ValType->getAs()) if (FPT->getNumParams() == NumArgs) return true; } } // Skip the current candidate if it is not a FunctionDecl or does not accept // the current number of arguments. if (!FD || !(FD->getNumParams() >= NumArgs && FD->getMinRequiredArguments() <= NumArgs)) continue; // If the current candidate is a non-static C++ method, skip the candidate // unless the method being corrected--or the current DeclContext, if the // function being corrected is not a method--is a method in the same class // or a descendent class of the candidate's parent class. if (CXXMethodDecl *MD = dyn_cast(FD)) { if (MemberFn || !MD->isStatic()) { CXXMethodDecl *CurMD = MemberFn ? dyn_cast_or_null(MemberFn->getMemberDecl()) : dyn_cast_or_null(CurContext); CXXRecordDecl *CurRD = CurMD ? CurMD->getParent()->getCanonicalDecl() : nullptr; CXXRecordDecl *RD = MD->getParent()->getCanonicalDecl(); if (!CurRD || (CurRD != RD && !CurRD->isDerivedFrom(RD))) continue; } } return true; } return false; } void Sema::diagnoseTypo(const TypoCorrection &Correction, const PartialDiagnostic &TypoDiag, bool ErrorRecovery) { diagnoseTypo(Correction, TypoDiag, PDiag(diag::note_previous_decl), ErrorRecovery); } /// Find which declaration we should import to provide the definition of /// the given declaration. static NamedDecl *getDefinitionToImport(NamedDecl *D) { if (VarDecl *VD = dyn_cast(D)) return VD->getDefinition(); if (FunctionDecl *FD = dyn_cast(D)) return FD->getDefinition(); if (TagDecl *TD = dyn_cast(D)) return TD->getDefinition(); if (ObjCInterfaceDecl *ID = dyn_cast(D)) return ID->getDefinition(); if (ObjCProtocolDecl *PD = dyn_cast(D)) return PD->getDefinition(); if (TemplateDecl *TD = dyn_cast(D)) return getDefinitionToImport(TD->getTemplatedDecl()); return nullptr; } void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl, MissingImportKind MIK, bool Recover) { assert(!isVisible(Decl) && "missing import for non-hidden decl?"); // Suggest importing a module providing the definition of this entity, if // possible. NamedDecl *Def = getDefinitionToImport(Decl); if (!Def) Def = Decl; Module *Owner = getOwningModule(Decl); assert(Owner && "definition of hidden declaration is not in a module"); llvm::SmallVector OwningModules; OwningModules.push_back(Owner); auto Merged = Context.getModulesWithMergedDefinition(Decl); OwningModules.insert(OwningModules.end(), Merged.begin(), Merged.end()); diagnoseMissingImport(Loc, Decl, Decl->getLocation(), OwningModules, MIK, Recover); } /// \brief Get a "quoted.h" or include path to use in a diagnostic /// suggesting the addition of a #include of the specified file. static std::string getIncludeStringForHeader(Preprocessor &PP, const FileEntry *E) { bool IsSystem; auto Path = PP.getHeaderSearchInfo().suggestPathToFileForDiagnostics(E, &IsSystem); return (IsSystem ? '<' : '"') + Path + (IsSystem ? '>' : '"'); } void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl, SourceLocation DeclLoc, ArrayRef Modules, MissingImportKind MIK, bool Recover) { assert(!Modules.empty()); if (Modules.size() > 1) { std::string ModuleList; unsigned N = 0; for (Module *M : Modules) { ModuleList += "\n "; if (++N == 5 && N != Modules.size()) { ModuleList += "[...]"; break; } ModuleList += M->getFullModuleName(); } Diag(UseLoc, diag::err_module_unimported_use_multiple) << (int)MIK << Decl << ModuleList; } else if (const FileEntry *E = PP.getModuleHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) { // The right way to make the declaration visible is to include a header; // suggest doing so. // // FIXME: Find a smart place to suggest inserting a #include, and add // a FixItHint there. Diag(UseLoc, diag::err_module_unimported_use_header) << (int)MIK << Decl << Modules[0]->getFullModuleName() << getIncludeStringForHeader(PP, E); } else { // FIXME: Add a FixItHint that imports the corresponding module. Diag(UseLoc, diag::err_module_unimported_use) << (int)MIK << Decl << Modules[0]->getFullModuleName(); } unsigned DiagID; switch (MIK) { case MissingImportKind::Declaration: DiagID = diag::note_previous_declaration; break; case MissingImportKind::Definition: DiagID = diag::note_previous_definition; break; case MissingImportKind::DefaultArgument: DiagID = diag::note_default_argument_declared_here; break; case MissingImportKind::ExplicitSpecialization: DiagID = diag::note_explicit_specialization_declared_here; break; case MissingImportKind::PartialSpecialization: DiagID = diag::note_partial_specialization_declared_here; break; } Diag(DeclLoc, DiagID); // Try to recover by implicitly importing this module. if (Recover) createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]); } /// \brief Diagnose a successfully-corrected typo. Separated from the correction /// itself to allow external validation of the result, etc. /// /// \param Correction The result of performing typo correction. /// \param TypoDiag The diagnostic to produce. This will have the corrected /// string added to it (and usually also a fixit). /// \param PrevNote A note to use when indicating the location of the entity to /// which we are correcting. Will have the correction string added to it. /// \param ErrorRecovery If \c true (the default), the caller is going to /// recover from the typo as if the corrected string had been typed. /// In this case, \c PDiag must be an error, and we will attach a fixit /// to it. void Sema::diagnoseTypo(const TypoCorrection &Correction, const PartialDiagnostic &TypoDiag, const PartialDiagnostic &PrevNote, bool ErrorRecovery) { std::string CorrectedStr = Correction.getAsString(getLangOpts()); std::string CorrectedQuotedStr = Correction.getQuoted(getLangOpts()); FixItHint FixTypo = FixItHint::CreateReplacement( Correction.getCorrectionRange(), CorrectedStr); // Maybe we're just missing a module import. if (Correction.requiresImport()) { NamedDecl *Decl = Correction.getFoundDecl(); assert(Decl && "import required but no declaration to import"); diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl, MissingImportKind::Declaration, ErrorRecovery); return; } Diag(Correction.getCorrectionRange().getBegin(), TypoDiag) << CorrectedQuotedStr << (ErrorRecovery ? FixTypo : FixItHint()); NamedDecl *ChosenDecl = Correction.isKeyword() ? nullptr : Correction.getFoundDecl(); if (PrevNote.getDiagID() && ChosenDecl) Diag(ChosenDecl->getLocation(), PrevNote) << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo); // Add any extra diagnostics. for (const PartialDiagnostic &PD : Correction.getExtraDiagnostics()) Diag(Correction.getCorrectionRange().getBegin(), PD); } TypoExpr *Sema::createDelayedTypo(std::unique_ptr TCC, TypoDiagnosticGenerator TDG, TypoRecoveryCallback TRC) { assert(TCC && "createDelayedTypo requires a valid TypoCorrectionConsumer"); auto TE = new (Context) TypoExpr(Context.DependentTy); auto &State = DelayedTypos[TE]; State.Consumer = std::move(TCC); State.DiagHandler = std::move(TDG); State.RecoveryHandler = std::move(TRC); return TE; } const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const { auto Entry = DelayedTypos.find(TE); assert(Entry != DelayedTypos.end() && "Failed to get the state for a TypoExpr!"); return Entry->second; } void Sema::clearDelayedTypo(TypoExpr *TE) { DelayedTypos.erase(TE); } void Sema::ActOnPragmaDump(Scope *S, SourceLocation IILoc, IdentifierInfo *II) { DeclarationNameInfo Name(II, IILoc); LookupResult R(*this, Name, LookupAnyName, Sema::NotForRedeclaration); R.suppressDiagnostics(); R.setHideTags(false); LookupName(R, S); R.dump(); } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp (revision 313894) @@ -1,4010 +1,4012 @@ //===--- SemaStmt.cpp - Semantic Analysis for Statements ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements semantic analysis for statements. // //===----------------------------------------------------------------------===// #include "clang/Sema/SemaInternal.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/CharUnits.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeOrdering.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" using namespace clang; using namespace sema; StmtResult Sema::ActOnExprStmt(ExprResult FE) { if (FE.isInvalid()) return StmtError(); FE = ActOnFinishFullExpr(FE.get(), FE.get()->getExprLoc(), /*DiscardedValue*/ true); if (FE.isInvalid()) return StmtError(); // C99 6.8.3p2: The expression in an expression statement is evaluated as a // void expression for its side effects. Conversion to void allows any // operand, even incomplete types. // Same thing in for stmt first clause (when expr) and third clause. return StmtResult(FE.getAs()); } StmtResult Sema::ActOnExprStmtError() { DiscardCleanupsInEvaluationContext(); return StmtError(); } StmtResult Sema::ActOnNullStmt(SourceLocation SemiLoc, bool HasLeadingEmptyMacro) { return new (Context) NullStmt(SemiLoc, HasLeadingEmptyMacro); } StmtResult Sema::ActOnDeclStmt(DeclGroupPtrTy dg, SourceLocation StartLoc, SourceLocation EndLoc) { DeclGroupRef DG = dg.get(); // If we have an invalid decl, just return an error. if (DG.isNull()) return StmtError(); return new (Context) DeclStmt(DG, StartLoc, EndLoc); } void Sema::ActOnForEachDeclStmt(DeclGroupPtrTy dg) { DeclGroupRef DG = dg.get(); // If we don't have a declaration, or we have an invalid declaration, // just return. if (DG.isNull() || !DG.isSingleDecl()) return; Decl *decl = DG.getSingleDecl(); if (!decl || decl->isInvalidDecl()) return; // Only variable declarations are permitted. VarDecl *var = dyn_cast(decl); if (!var) { Diag(decl->getLocation(), diag::err_non_variable_decl_in_for); decl->setInvalidDecl(); return; } // foreach variables are never actually initialized in the way that // the parser came up with. var->setInit(nullptr); // In ARC, we don't need to retain the iteration variable of a fast // enumeration loop. Rather than actually trying to catch that // during declaration processing, we remove the consequences here. if (getLangOpts().ObjCAutoRefCount) { QualType type = var->getType(); // Only do this if we inferred the lifetime. Inferred lifetime // will show up as a local qualifier because explicit lifetime // should have shown up as an AttributedType instead. if (type.getLocalQualifiers().getObjCLifetime() == Qualifiers::OCL_Strong) { // Add 'const' and mark the variable as pseudo-strong. var->setType(type.withConst()); var->setARCPseudoStrong(true); } } } /// \brief Diagnose unused comparisons, both builtin and overloaded operators. /// For '==' and '!=', suggest fixits for '=' or '|='. /// /// Adding a cast to void (or other expression wrappers) will prevent the /// warning from firing. static bool DiagnoseUnusedComparison(Sema &S, const Expr *E) { SourceLocation Loc; bool IsNotEqual, CanAssign, IsRelational; if (const BinaryOperator *Op = dyn_cast(E)) { if (!Op->isComparisonOp()) return false; IsRelational = Op->isRelationalOp(); Loc = Op->getOperatorLoc(); IsNotEqual = Op->getOpcode() == BO_NE; CanAssign = Op->getLHS()->IgnoreParenImpCasts()->isLValue(); } else if (const CXXOperatorCallExpr *Op = dyn_cast(E)) { switch (Op->getOperator()) { default: return false; case OO_EqualEqual: case OO_ExclaimEqual: IsRelational = false; break; case OO_Less: case OO_Greater: case OO_GreaterEqual: case OO_LessEqual: IsRelational = true; break; } Loc = Op->getOperatorLoc(); IsNotEqual = Op->getOperator() == OO_ExclaimEqual; CanAssign = Op->getArg(0)->IgnoreParenImpCasts()->isLValue(); } else { // Not a typo-prone comparison. return false; } // Suppress warnings when the operator, suspicious as it may be, comes from // a macro expansion. if (S.SourceMgr.isMacroBodyExpansion(Loc)) return false; S.Diag(Loc, diag::warn_unused_comparison) << (unsigned)IsRelational << (unsigned)IsNotEqual << E->getSourceRange(); // If the LHS is a plausible entity to assign to, provide a fixit hint to // correct common typos. if (!IsRelational && CanAssign) { if (IsNotEqual) S.Diag(Loc, diag::note_inequality_comparison_to_or_assign) << FixItHint::CreateReplacement(Loc, "|="); else S.Diag(Loc, diag::note_equality_comparison_to_assign) << FixItHint::CreateReplacement(Loc, "="); } return true; } void Sema::DiagnoseUnusedExprResult(const Stmt *S) { if (const LabelStmt *Label = dyn_cast_or_null(S)) return DiagnoseUnusedExprResult(Label->getSubStmt()); const Expr *E = dyn_cast_or_null(S); if (!E) return; // If we are in an unevaluated expression context, then there can be no unused // results because the results aren't expected to be used in the first place. if (isUnevaluatedContext()) return; SourceLocation ExprLoc = E->IgnoreParenImpCasts()->getExprLoc(); // In most cases, we don't want to warn if the expression is written in a // macro body, or if the macro comes from a system header. If the offending // expression is a call to a function with the warn_unused_result attribute, // we warn no matter the location. Because of the order in which the various // checks need to happen, we factor out the macro-related test here. bool ShouldSuppress = SourceMgr.isMacroBodyExpansion(ExprLoc) || SourceMgr.isInSystemMacro(ExprLoc); const Expr *WarnExpr; SourceLocation Loc; SourceRange R1, R2; if (!E->isUnusedResultAWarning(WarnExpr, Loc, R1, R2, Context)) return; // If this is a GNU statement expression expanded from a macro, it is probably // unused because it is a function-like macro that can be used as either an // expression or statement. Don't warn, because it is almost certainly a // false positive. if (isa(E) && Loc.isMacroID()) return; // Check if this is the UNREFERENCED_PARAMETER from the Microsoft headers. // That macro is frequently used to suppress "unused parameter" warnings, // but its implementation makes clang's -Wunused-value fire. Prevent this. if (isa(E->IgnoreImpCasts()) && Loc.isMacroID()) { SourceLocation SpellLoc = Loc; if (findMacroSpelling(SpellLoc, "UNREFERENCED_PARAMETER")) return; } // Okay, we have an unused result. Depending on what the base expression is, // we might want to make a more specific diagnostic. Check for one of these // cases now. unsigned DiagID = diag::warn_unused_expr; if (const ExprWithCleanups *Temps = dyn_cast(E)) E = Temps->getSubExpr(); if (const CXXBindTemporaryExpr *TempExpr = dyn_cast(E)) E = TempExpr->getSubExpr(); if (DiagnoseUnusedComparison(*this, E)) return; E = WarnExpr; if (const CallExpr *CE = dyn_cast(E)) { if (E->getType()->isVoidType()) return; // If the callee has attribute pure, const, or warn_unused_result, warn with // a more specific message to make it clear what is happening. If the call // is written in a macro body, only warn if it has the warn_unused_result // attribute. if (const Decl *FD = CE->getCalleeDecl()) { if (const Attr *A = isa(FD) ? cast(FD)->getUnusedResultAttr() : FD->getAttr()) { Diag(Loc, diag::warn_unused_result) << A << R1 << R2; return; } if (ShouldSuppress) return; if (FD->hasAttr()) { Diag(Loc, diag::warn_unused_call) << R1 << R2 << "pure"; return; } if (FD->hasAttr()) { Diag(Loc, diag::warn_unused_call) << R1 << R2 << "const"; return; } } } else if (ShouldSuppress) return; if (const ObjCMessageExpr *ME = dyn_cast(E)) { if (getLangOpts().ObjCAutoRefCount && ME->isDelegateInitCall()) { Diag(Loc, diag::err_arc_unused_init_message) << R1; return; } const ObjCMethodDecl *MD = ME->getMethodDecl(); if (MD) { if (const auto *A = MD->getAttr()) { Diag(Loc, diag::warn_unused_result) << A << R1 << R2; return; } } } else if (const PseudoObjectExpr *POE = dyn_cast(E)) { const Expr *Source = POE->getSyntacticForm(); if (isa(Source)) DiagID = diag::warn_unused_container_subscript_expr; else DiagID = diag::warn_unused_property_expr; } else if (const CXXFunctionalCastExpr *FC = dyn_cast(E)) { if (isa(FC->getSubExpr()) || isa(FC->getSubExpr())) return; } // Diagnose "(void*) blah" as a typo for "(void) blah". else if (const CStyleCastExpr *CE = dyn_cast(E)) { TypeSourceInfo *TI = CE->getTypeInfoAsWritten(); QualType T = TI->getType(); // We really do want to use the non-canonical type here. if (T == Context.VoidPtrTy) { PointerTypeLoc TL = TI->getTypeLoc().castAs(); Diag(Loc, diag::warn_unused_voidptr) << FixItHint::CreateRemoval(TL.getStarLoc()); return; } } if (E->isGLValue() && E->getType().isVolatileQualified()) { Diag(Loc, diag::warn_unused_volatile) << R1 << R2; return; } DiagRuntimeBehavior(Loc, nullptr, PDiag(DiagID) << R1 << R2); } void Sema::ActOnStartOfCompoundStmt() { PushCompoundScope(); } void Sema::ActOnFinishOfCompoundStmt() { PopCompoundScope(); } sema::CompoundScopeInfo &Sema::getCurCompoundScope() const { return getCurFunction()->CompoundScopes.back(); } StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R, ArrayRef Elts, bool isStmtExpr) { const unsigned NumElts = Elts.size(); // If we're in C89 mode, check that we don't have any decls after stmts. If // so, emit an extension diagnostic. if (!getLangOpts().C99 && !getLangOpts().CPlusPlus) { // Note that __extension__ can be around a decl. unsigned i = 0; // Skip over all declarations. for (; i != NumElts && isa(Elts[i]); ++i) /*empty*/; // We found the end of the list or a statement. Scan for another declstmt. for (; i != NumElts && !isa(Elts[i]); ++i) /*empty*/; if (i != NumElts) { Decl *D = *cast(Elts[i])->decl_begin(); Diag(D->getLocation(), diag::ext_mixed_decls_code); } } // Warn about unused expressions in statements. for (unsigned i = 0; i != NumElts; ++i) { // Ignore statements that are last in a statement expression. if (isStmtExpr && i == NumElts - 1) continue; DiagnoseUnusedExprResult(Elts[i]); } // Check for suspicious empty body (null statement) in `for' and `while' // statements. Don't do anything for template instantiations, this just adds // noise. if (NumElts != 0 && !CurrentInstantiationScope && getCurCompoundScope().HasEmptyLoopBodies) { for (unsigned i = 0; i != NumElts - 1; ++i) DiagnoseEmptyLoopBody(Elts[i], Elts[i + 1]); } return new (Context) CompoundStmt(Context, Elts, L, R); } StmtResult Sema::ActOnCaseStmt(SourceLocation CaseLoc, Expr *LHSVal, SourceLocation DotDotDotLoc, Expr *RHSVal, SourceLocation ColonLoc) { assert(LHSVal && "missing expression in case statement"); if (getCurFunction()->SwitchStack.empty()) { Diag(CaseLoc, diag::err_case_not_in_switch); return StmtError(); } ExprResult LHS = CorrectDelayedTyposInExpr(LHSVal, [this](class Expr *E) { if (!getLangOpts().CPlusPlus11) return VerifyIntegerConstantExpression(E); if (Expr *CondExpr = getCurFunction()->SwitchStack.back()->getCond()) { QualType CondType = CondExpr->getType(); llvm::APSInt TempVal; return CheckConvertedConstantExpression(E, CondType, TempVal, CCEK_CaseValue); } return ExprError(); }); if (LHS.isInvalid()) return StmtError(); LHSVal = LHS.get(); if (!getLangOpts().CPlusPlus11) { // C99 6.8.4.2p3: The expression shall be an integer constant. // However, GCC allows any evaluatable integer expression. if (!LHSVal->isTypeDependent() && !LHSVal->isValueDependent()) { LHSVal = VerifyIntegerConstantExpression(LHSVal).get(); if (!LHSVal) return StmtError(); } // GCC extension: The expression shall be an integer constant. if (RHSVal && !RHSVal->isTypeDependent() && !RHSVal->isValueDependent()) { RHSVal = VerifyIntegerConstantExpression(RHSVal).get(); // Recover from an error by just forgetting about it. } } LHS = ActOnFinishFullExpr(LHSVal, LHSVal->getExprLoc(), false, getLangOpts().CPlusPlus11); if (LHS.isInvalid()) return StmtError(); auto RHS = RHSVal ? ActOnFinishFullExpr(RHSVal, RHSVal->getExprLoc(), false, getLangOpts().CPlusPlus11) : ExprResult(); if (RHS.isInvalid()) return StmtError(); CaseStmt *CS = new (Context) CaseStmt(LHS.get(), RHS.get(), CaseLoc, DotDotDotLoc, ColonLoc); getCurFunction()->SwitchStack.back()->addSwitchCase(CS); return CS; } /// ActOnCaseStmtBody - This installs a statement as the body of a case. void Sema::ActOnCaseStmtBody(Stmt *caseStmt, Stmt *SubStmt) { DiagnoseUnusedExprResult(SubStmt); CaseStmt *CS = static_cast(caseStmt); CS->setSubStmt(SubStmt); } StmtResult Sema::ActOnDefaultStmt(SourceLocation DefaultLoc, SourceLocation ColonLoc, Stmt *SubStmt, Scope *CurScope) { DiagnoseUnusedExprResult(SubStmt); if (getCurFunction()->SwitchStack.empty()) { Diag(DefaultLoc, diag::err_default_not_in_switch); return SubStmt; } DefaultStmt *DS = new (Context) DefaultStmt(DefaultLoc, ColonLoc, SubStmt); getCurFunction()->SwitchStack.back()->addSwitchCase(DS); return DS; } StmtResult Sema::ActOnLabelStmt(SourceLocation IdentLoc, LabelDecl *TheDecl, SourceLocation ColonLoc, Stmt *SubStmt) { // If the label was multiply defined, reject it now. if (TheDecl->getStmt()) { Diag(IdentLoc, diag::err_redefinition_of_label) << TheDecl->getDeclName(); Diag(TheDecl->getLocation(), diag::note_previous_definition); return SubStmt; } // Otherwise, things are good. Fill in the declaration and return it. LabelStmt *LS = new (Context) LabelStmt(IdentLoc, TheDecl, SubStmt); TheDecl->setStmt(LS); if (!TheDecl->isGnuLocal()) { TheDecl->setLocStart(IdentLoc); if (!TheDecl->isMSAsmLabel()) { // Don't update the location of MS ASM labels. These will result in // a diagnostic, and changing the location here will mess that up. TheDecl->setLocation(IdentLoc); } } return LS; } StmtResult Sema::ActOnAttributedStmt(SourceLocation AttrLoc, ArrayRef Attrs, Stmt *SubStmt) { // Fill in the declaration and return it. AttributedStmt *LS = AttributedStmt::Create(Context, AttrLoc, Attrs, SubStmt); return LS; } namespace { class CommaVisitor : public EvaluatedExprVisitor { typedef EvaluatedExprVisitor Inherited; Sema &SemaRef; public: CommaVisitor(Sema &SemaRef) : Inherited(SemaRef.Context), SemaRef(SemaRef) {} void VisitBinaryOperator(BinaryOperator *E) { if (E->getOpcode() == BO_Comma) SemaRef.DiagnoseCommaOperator(E->getLHS(), E->getExprLoc()); EvaluatedExprVisitor::VisitBinaryOperator(E); } }; } StmtResult Sema::ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr, Stmt *InitStmt, ConditionResult Cond, Stmt *thenStmt, SourceLocation ElseLoc, Stmt *elseStmt) { if (Cond.isInvalid()) Cond = ConditionResult( *this, nullptr, MakeFullExpr(new (Context) OpaqueValueExpr(SourceLocation(), Context.BoolTy, VK_RValue), IfLoc), false); Expr *CondExpr = Cond.get().second; if (!Diags.isIgnored(diag::warn_comma_operator, CondExpr->getExprLoc())) CommaVisitor(*this).Visit(CondExpr); if (!elseStmt) DiagnoseEmptyStmtBody(CondExpr->getLocEnd(), thenStmt, diag::warn_empty_if_body); return BuildIfStmt(IfLoc, IsConstexpr, InitStmt, Cond, thenStmt, ElseLoc, elseStmt); } StmtResult Sema::BuildIfStmt(SourceLocation IfLoc, bool IsConstexpr, Stmt *InitStmt, ConditionResult Cond, Stmt *thenStmt, SourceLocation ElseLoc, Stmt *elseStmt) { if (Cond.isInvalid()) return StmtError(); if (IsConstexpr || isa(Cond.get().second)) getCurFunction()->setHasBranchProtectedScope(); DiagnoseUnusedExprResult(thenStmt); DiagnoseUnusedExprResult(elseStmt); return new (Context) IfStmt(Context, IfLoc, IsConstexpr, InitStmt, Cond.get().first, Cond.get().second, thenStmt, ElseLoc, elseStmt); } namespace { struct CaseCompareFunctor { bool operator()(const std::pair &LHS, const llvm::APSInt &RHS) { return LHS.first < RHS; } bool operator()(const std::pair &LHS, const std::pair &RHS) { return LHS.first < RHS.first; } bool operator()(const llvm::APSInt &LHS, const std::pair &RHS) { return LHS < RHS.first; } }; } /// CmpCaseVals - Comparison predicate for sorting case values. /// static bool CmpCaseVals(const std::pair& lhs, const std::pair& rhs) { if (lhs.first < rhs.first) return true; if (lhs.first == rhs.first && lhs.second->getCaseLoc().getRawEncoding() < rhs.second->getCaseLoc().getRawEncoding()) return true; return false; } /// CmpEnumVals - Comparison predicate for sorting enumeration values. /// static bool CmpEnumVals(const std::pair& lhs, const std::pair& rhs) { return lhs.first < rhs.first; } /// EqEnumVals - Comparison preficate for uniqing enumeration values. /// static bool EqEnumVals(const std::pair& lhs, const std::pair& rhs) { return lhs.first == rhs.first; } /// GetTypeBeforeIntegralPromotion - Returns the pre-promotion type of /// potentially integral-promoted expression @p expr. static QualType GetTypeBeforeIntegralPromotion(Expr *&expr) { if (ExprWithCleanups *cleanups = dyn_cast(expr)) expr = cleanups->getSubExpr(); while (ImplicitCastExpr *impcast = dyn_cast(expr)) { if (impcast->getCastKind() != CK_IntegralCast) break; expr = impcast->getSubExpr(); } return expr->getType(); } ExprResult Sema::CheckSwitchCondition(SourceLocation SwitchLoc, Expr *Cond) { class SwitchConvertDiagnoser : public ICEConvertDiagnoser { Expr *Cond; public: SwitchConvertDiagnoser(Expr *Cond) : ICEConvertDiagnoser(/*AllowScopedEnumerations*/true, false, true), Cond(Cond) {} SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_typecheck_statement_requires_integer) << T; } SemaDiagnosticBuilder diagnoseIncomplete( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_switch_incomplete_class_type) << T << Cond->getSourceRange(); } SemaDiagnosticBuilder diagnoseExplicitConv( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_switch_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_switch_conversion) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_switch_multiple_conversions) << T; } SemaDiagnosticBuilder noteAmbiguous( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_switch_conversion) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseConversion( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { llvm_unreachable("conversion functions are permitted"); } } SwitchDiagnoser(Cond); ExprResult CondResult = PerformContextualImplicitConversion(SwitchLoc, Cond, SwitchDiagnoser); if (CondResult.isInvalid()) return ExprError(); // C99 6.8.4.2p5 - Integer promotions are performed on the controlling expr. return UsualUnaryConversions(CondResult.get()); } StmtResult Sema::ActOnStartOfSwitchStmt(SourceLocation SwitchLoc, Stmt *InitStmt, ConditionResult Cond) { if (Cond.isInvalid()) return StmtError(); getCurFunction()->setHasBranchIntoScope(); SwitchStmt *SS = new (Context) SwitchStmt(Context, InitStmt, Cond.get().first, Cond.get().second); getCurFunction()->SwitchStack.push_back(SS); return SS; } static void AdjustAPSInt(llvm::APSInt &Val, unsigned BitWidth, bool IsSigned) { Val = Val.extOrTrunc(BitWidth); Val.setIsSigned(IsSigned); } /// Check the specified case value is in range for the given unpromoted switch /// type. static void checkCaseValue(Sema &S, SourceLocation Loc, const llvm::APSInt &Val, unsigned UnpromotedWidth, bool UnpromotedSign) { // If the case value was signed and negative and the switch expression is // unsigned, don't bother to warn: this is implementation-defined behavior. // FIXME: Introduce a second, default-ignored warning for this case? if (UnpromotedWidth < Val.getBitWidth()) { llvm::APSInt ConvVal(Val); AdjustAPSInt(ConvVal, UnpromotedWidth, UnpromotedSign); AdjustAPSInt(ConvVal, Val.getBitWidth(), Val.isSigned()); // FIXME: Use different diagnostics for overflow in conversion to promoted // type versus "switch expression cannot have this value". Use proper // IntRange checking rather than just looking at the unpromoted type here. if (ConvVal != Val) S.Diag(Loc, diag::warn_case_value_overflow) << Val.toString(10) << ConvVal.toString(10); } } typedef SmallVector, 64> EnumValsTy; /// Returns true if we should emit a diagnostic about this case expression not /// being a part of the enum used in the switch controlling expression. static bool ShouldDiagnoseSwitchCaseNotInEnum(const Sema &S, const EnumDecl *ED, const Expr *CaseExpr, EnumValsTy::iterator &EI, EnumValsTy::iterator &EIEnd, const llvm::APSInt &Val) { if (const DeclRefExpr *DRE = dyn_cast(CaseExpr->IgnoreParenImpCasts())) { if (const VarDecl *VD = dyn_cast(DRE->getDecl())) { QualType VarType = VD->getType(); QualType EnumType = S.Context.getTypeDeclType(ED); if (VD->hasGlobalStorage() && VarType.isConstQualified() && S.Context.hasSameUnqualifiedType(EnumType, VarType)) return false; } } if (ED->hasAttr()) { return !S.IsValueInFlagEnum(ED, Val, false); } else { while (EI != EIEnd && EI->first < Val) EI++; if (EI != EIEnd && EI->first == Val) return false; } return true; } StmtResult Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, Stmt *BodyStmt) { SwitchStmt *SS = cast(Switch); assert(SS == getCurFunction()->SwitchStack.back() && "switch stack missing push/pop!"); getCurFunction()->SwitchStack.pop_back(); if (!BodyStmt) return StmtError(); SS->setBody(BodyStmt, SwitchLoc); Expr *CondExpr = SS->getCond(); if (!CondExpr) return StmtError(); QualType CondType = CondExpr->getType(); Expr *CondExprBeforePromotion = CondExpr; QualType CondTypeBeforePromotion = GetTypeBeforeIntegralPromotion(CondExprBeforePromotion); // C++ 6.4.2.p2: // Integral promotions are performed (on the switch condition). // // A case value unrepresentable by the original switch condition // type (before the promotion) doesn't make sense, even when it can // be represented by the promoted type. Therefore we need to find // the pre-promotion type of the switch condition. if (!CondExpr->isTypeDependent()) { // We have already converted the expression to an integral or enumeration // type, when we started the switch statement. If we don't have an // appropriate type now, just return an error. if (!CondType->isIntegralOrEnumerationType()) return StmtError(); if (CondExpr->isKnownToHaveBooleanValue()) { // switch(bool_expr) {...} is often a programmer error, e.g. // switch(n && mask) { ... } // Doh - should be "n & mask". // One can always use an if statement instead of switch(bool_expr). Diag(SwitchLoc, diag::warn_bool_switch_condition) << CondExpr->getSourceRange(); } } // Get the bitwidth of the switched-on value after promotions. We must // convert the integer case values to this width before comparison. bool HasDependentValue = CondExpr->isTypeDependent() || CondExpr->isValueDependent(); unsigned CondWidth = HasDependentValue ? 0 : Context.getIntWidth(CondType); bool CondIsSigned = CondType->isSignedIntegerOrEnumerationType(); // Get the width and signedness that the condition might actually have, for // warning purposes. // FIXME: Grab an IntRange for the condition rather than using the unpromoted // type. unsigned CondWidthBeforePromotion = HasDependentValue ? 0 : Context.getIntWidth(CondTypeBeforePromotion); bool CondIsSignedBeforePromotion = CondTypeBeforePromotion->isSignedIntegerOrEnumerationType(); // Accumulate all of the case values in a vector so that we can sort them // and detect duplicates. This vector contains the APInt for the case after // it has been converted to the condition type. typedef SmallVector, 64> CaseValsTy; CaseValsTy CaseVals; // Keep track of any GNU case ranges we see. The APSInt is the low value. typedef std::vector > CaseRangesTy; CaseRangesTy CaseRanges; DefaultStmt *TheDefaultStmt = nullptr; bool CaseListIsErroneous = false; for (SwitchCase *SC = SS->getSwitchCaseList(); SC && !HasDependentValue; SC = SC->getNextSwitchCase()) { if (DefaultStmt *DS = dyn_cast(SC)) { if (TheDefaultStmt) { Diag(DS->getDefaultLoc(), diag::err_multiple_default_labels_defined); Diag(TheDefaultStmt->getDefaultLoc(), diag::note_duplicate_case_prev); // FIXME: Remove the default statement from the switch block so that // we'll return a valid AST. This requires recursing down the AST and // finding it, not something we are set up to do right now. For now, // just lop the entire switch stmt out of the AST. CaseListIsErroneous = true; } TheDefaultStmt = DS; } else { CaseStmt *CS = cast(SC); Expr *Lo = CS->getLHS(); if (Lo->isTypeDependent() || Lo->isValueDependent()) { HasDependentValue = true; break; } llvm::APSInt LoVal; if (getLangOpts().CPlusPlus11) { // C++11 [stmt.switch]p2: the constant-expression shall be a converted // constant expression of the promoted type of the switch condition. ExprResult ConvLo = CheckConvertedConstantExpression(Lo, CondType, LoVal, CCEK_CaseValue); if (ConvLo.isInvalid()) { CaseListIsErroneous = true; continue; } Lo = ConvLo.get(); } else { // We already verified that the expression has a i-c-e value (C99 // 6.8.4.2p3) - get that value now. LoVal = Lo->EvaluateKnownConstInt(Context); // If the LHS is not the same type as the condition, insert an implicit // cast. Lo = DefaultLvalueConversion(Lo).get(); Lo = ImpCastExprToType(Lo, CondType, CK_IntegralCast).get(); } // Check the unconverted value is within the range of possible values of // the switch expression. checkCaseValue(*this, Lo->getLocStart(), LoVal, CondWidthBeforePromotion, CondIsSignedBeforePromotion); // Convert the value to the same width/sign as the condition. AdjustAPSInt(LoVal, CondWidth, CondIsSigned); CS->setLHS(Lo); // If this is a case range, remember it in CaseRanges, otherwise CaseVals. if (CS->getRHS()) { if (CS->getRHS()->isTypeDependent() || CS->getRHS()->isValueDependent()) { HasDependentValue = true; break; } CaseRanges.push_back(std::make_pair(LoVal, CS)); } else CaseVals.push_back(std::make_pair(LoVal, CS)); } } if (!HasDependentValue) { // If we don't have a default statement, check whether the // condition is constant. llvm::APSInt ConstantCondValue; bool HasConstantCond = false; if (!HasDependentValue && !TheDefaultStmt) { HasConstantCond = CondExpr->EvaluateAsInt(ConstantCondValue, Context, Expr::SE_AllowSideEffects); assert(!HasConstantCond || (ConstantCondValue.getBitWidth() == CondWidth && ConstantCondValue.isSigned() == CondIsSigned)); } bool ShouldCheckConstantCond = HasConstantCond; // Sort all the scalar case values so we can easily detect duplicates. std::stable_sort(CaseVals.begin(), CaseVals.end(), CmpCaseVals); if (!CaseVals.empty()) { for (unsigned i = 0, e = CaseVals.size(); i != e; ++i) { if (ShouldCheckConstantCond && CaseVals[i].first == ConstantCondValue) ShouldCheckConstantCond = false; if (i != 0 && CaseVals[i].first == CaseVals[i-1].first) { // If we have a duplicate, report it. // First, determine if either case value has a name StringRef PrevString, CurrString; Expr *PrevCase = CaseVals[i-1].second->getLHS()->IgnoreParenCasts(); Expr *CurrCase = CaseVals[i].second->getLHS()->IgnoreParenCasts(); if (DeclRefExpr *DeclRef = dyn_cast(PrevCase)) { PrevString = DeclRef->getDecl()->getName(); } if (DeclRefExpr *DeclRef = dyn_cast(CurrCase)) { CurrString = DeclRef->getDecl()->getName(); } SmallString<16> CaseValStr; CaseVals[i-1].first.toString(CaseValStr); if (PrevString == CurrString) Diag(CaseVals[i].second->getLHS()->getLocStart(), diag::err_duplicate_case) << (PrevString.empty() ? StringRef(CaseValStr) : PrevString); else Diag(CaseVals[i].second->getLHS()->getLocStart(), diag::err_duplicate_case_differing_expr) << (PrevString.empty() ? StringRef(CaseValStr) : PrevString) << (CurrString.empty() ? StringRef(CaseValStr) : CurrString) << CaseValStr; Diag(CaseVals[i-1].second->getLHS()->getLocStart(), diag::note_duplicate_case_prev); // FIXME: We really want to remove the bogus case stmt from the // substmt, but we have no way to do this right now. CaseListIsErroneous = true; } } } // Detect duplicate case ranges, which usually don't exist at all in // the first place. if (!CaseRanges.empty()) { // Sort all the case ranges by their low value so we can easily detect // overlaps between ranges. std::stable_sort(CaseRanges.begin(), CaseRanges.end()); // Scan the ranges, computing the high values and removing empty ranges. std::vector HiVals; for (unsigned i = 0, e = CaseRanges.size(); i != e; ++i) { llvm::APSInt &LoVal = CaseRanges[i].first; CaseStmt *CR = CaseRanges[i].second; Expr *Hi = CR->getRHS(); llvm::APSInt HiVal; if (getLangOpts().CPlusPlus11) { // C++11 [stmt.switch]p2: the constant-expression shall be a converted // constant expression of the promoted type of the switch condition. ExprResult ConvHi = CheckConvertedConstantExpression(Hi, CondType, HiVal, CCEK_CaseValue); if (ConvHi.isInvalid()) { CaseListIsErroneous = true; continue; } Hi = ConvHi.get(); } else { HiVal = Hi->EvaluateKnownConstInt(Context); // If the RHS is not the same type as the condition, insert an // implicit cast. Hi = DefaultLvalueConversion(Hi).get(); Hi = ImpCastExprToType(Hi, CondType, CK_IntegralCast).get(); } // Check the unconverted value is within the range of possible values of // the switch expression. checkCaseValue(*this, Hi->getLocStart(), HiVal, CondWidthBeforePromotion, CondIsSignedBeforePromotion); // Convert the value to the same width/sign as the condition. AdjustAPSInt(HiVal, CondWidth, CondIsSigned); CR->setRHS(Hi); // If the low value is bigger than the high value, the case is empty. if (LoVal > HiVal) { Diag(CR->getLHS()->getLocStart(), diag::warn_case_empty_range) << SourceRange(CR->getLHS()->getLocStart(), Hi->getLocEnd()); CaseRanges.erase(CaseRanges.begin()+i); --i; --e; continue; } if (ShouldCheckConstantCond && LoVal <= ConstantCondValue && ConstantCondValue <= HiVal) ShouldCheckConstantCond = false; HiVals.push_back(HiVal); } // Rescan the ranges, looking for overlap with singleton values and other // ranges. Since the range list is sorted, we only need to compare case // ranges with their neighbors. for (unsigned i = 0, e = CaseRanges.size(); i != e; ++i) { llvm::APSInt &CRLo = CaseRanges[i].first; llvm::APSInt &CRHi = HiVals[i]; CaseStmt *CR = CaseRanges[i].second; // Check to see whether the case range overlaps with any // singleton cases. CaseStmt *OverlapStmt = nullptr; llvm::APSInt OverlapVal(32); // Find the smallest value >= the lower bound. If I is in the // case range, then we have overlap. CaseValsTy::iterator I = std::lower_bound(CaseVals.begin(), CaseVals.end(), CRLo, CaseCompareFunctor()); if (I != CaseVals.end() && I->first < CRHi) { OverlapVal = I->first; // Found overlap with scalar. OverlapStmt = I->second; } // Find the smallest value bigger than the upper bound. I = std::upper_bound(I, CaseVals.end(), CRHi, CaseCompareFunctor()); if (I != CaseVals.begin() && (I-1)->first >= CRLo) { OverlapVal = (I-1)->first; // Found overlap with scalar. OverlapStmt = (I-1)->second; } // Check to see if this case stmt overlaps with the subsequent // case range. if (i && CRLo <= HiVals[i-1]) { OverlapVal = HiVals[i-1]; // Found overlap with range. OverlapStmt = CaseRanges[i-1].second; } if (OverlapStmt) { // If we have a duplicate, report it. Diag(CR->getLHS()->getLocStart(), diag::err_duplicate_case) << OverlapVal.toString(10); Diag(OverlapStmt->getLHS()->getLocStart(), diag::note_duplicate_case_prev); // FIXME: We really want to remove the bogus case stmt from the // substmt, but we have no way to do this right now. CaseListIsErroneous = true; } } } // Complain if we have a constant condition and we didn't find a match. if (!CaseListIsErroneous && ShouldCheckConstantCond) { // TODO: it would be nice if we printed enums as enums, chars as // chars, etc. Diag(CondExpr->getExprLoc(), diag::warn_missing_case_for_condition) << ConstantCondValue.toString(10) << CondExpr->getSourceRange(); } // Check to see if switch is over an Enum and handles all of its // values. We only issue a warning if there is not 'default:', but // we still do the analysis to preserve this information in the AST // (which can be used by flow-based analyes). // const EnumType *ET = CondTypeBeforePromotion->getAs(); // If switch has default case, then ignore it. if (!CaseListIsErroneous && !HasConstantCond && ET && ET->getDecl()->isCompleteDefinition()) { const EnumDecl *ED = ET->getDecl(); EnumValsTy EnumVals; // Gather all enum values, set their type and sort them, // allowing easier comparison with CaseVals. for (auto *EDI : ED->enumerators()) { llvm::APSInt Val = EDI->getInitVal(); AdjustAPSInt(Val, CondWidth, CondIsSigned); EnumVals.push_back(std::make_pair(Val, EDI)); } std::stable_sort(EnumVals.begin(), EnumVals.end(), CmpEnumVals); auto EI = EnumVals.begin(), EIEnd = std::unique(EnumVals.begin(), EnumVals.end(), EqEnumVals); // See which case values aren't in enum. for (CaseValsTy::const_iterator CI = CaseVals.begin(); CI != CaseVals.end(); CI++) { Expr *CaseExpr = CI->second->getLHS(); if (ShouldDiagnoseSwitchCaseNotInEnum(*this, ED, CaseExpr, EI, EIEnd, CI->first)) Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum) << CondTypeBeforePromotion; } // See which of case ranges aren't in enum EI = EnumVals.begin(); for (CaseRangesTy::const_iterator RI = CaseRanges.begin(); RI != CaseRanges.end(); RI++) { Expr *CaseExpr = RI->second->getLHS(); if (ShouldDiagnoseSwitchCaseNotInEnum(*this, ED, CaseExpr, EI, EIEnd, RI->first)) Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum) << CondTypeBeforePromotion; llvm::APSInt Hi = RI->second->getRHS()->EvaluateKnownConstInt(Context); AdjustAPSInt(Hi, CondWidth, CondIsSigned); CaseExpr = RI->second->getRHS(); if (ShouldDiagnoseSwitchCaseNotInEnum(*this, ED, CaseExpr, EI, EIEnd, Hi)) Diag(CaseExpr->getExprLoc(), diag::warn_not_in_enum) << CondTypeBeforePromotion; } // Check which enum vals aren't in switch auto CI = CaseVals.begin(); auto RI = CaseRanges.begin(); bool hasCasesNotInSwitch = false; SmallVector UnhandledNames; for (EI = EnumVals.begin(); EI != EIEnd; EI++){ // Drop unneeded case values while (CI != CaseVals.end() && CI->first < EI->first) CI++; if (CI != CaseVals.end() && CI->first == EI->first) continue; // Drop unneeded case ranges for (; RI != CaseRanges.end(); RI++) { llvm::APSInt Hi = RI->second->getRHS()->EvaluateKnownConstInt(Context); AdjustAPSInt(Hi, CondWidth, CondIsSigned); if (EI->first <= Hi) break; } if (RI == CaseRanges.end() || EI->first < RI->first) { hasCasesNotInSwitch = true; UnhandledNames.push_back(EI->second->getDeclName()); } } if (TheDefaultStmt && UnhandledNames.empty()) Diag(TheDefaultStmt->getDefaultLoc(), diag::warn_unreachable_default); // Produce a nice diagnostic if multiple values aren't handled. if (!UnhandledNames.empty()) { DiagnosticBuilder DB = Diag(CondExpr->getExprLoc(), TheDefaultStmt ? diag::warn_def_missing_case : diag::warn_missing_case) << (int)UnhandledNames.size(); for (size_t I = 0, E = std::min(UnhandledNames.size(), (size_t)3); I != E; ++I) DB << UnhandledNames[I]; } if (!hasCasesNotInSwitch) SS->setAllEnumCasesCovered(); } } if (BodyStmt) DiagnoseEmptyStmtBody(CondExpr->getLocEnd(), BodyStmt, diag::warn_empty_switch_body); // FIXME: If the case list was broken is some way, we don't have a good system // to patch it up. Instead, just return the whole substmt as broken. if (CaseListIsErroneous) return StmtError(); return SS; } void Sema::DiagnoseAssignmentEnum(QualType DstType, QualType SrcType, Expr *SrcExpr) { if (Diags.isIgnored(diag::warn_not_in_enum_assignment, SrcExpr->getExprLoc())) return; if (const EnumType *ET = DstType->getAs()) if (!Context.hasSameUnqualifiedType(SrcType, DstType) && SrcType->isIntegerType()) { if (!SrcExpr->isTypeDependent() && !SrcExpr->isValueDependent() && SrcExpr->isIntegerConstantExpr(Context)) { // Get the bitwidth of the enum value before promotions. unsigned DstWidth = Context.getIntWidth(DstType); bool DstIsSigned = DstType->isSignedIntegerOrEnumerationType(); llvm::APSInt RhsVal = SrcExpr->EvaluateKnownConstInt(Context); AdjustAPSInt(RhsVal, DstWidth, DstIsSigned); const EnumDecl *ED = ET->getDecl(); if (ED->hasAttr()) { if (!IsValueInFlagEnum(ED, RhsVal, true)) Diag(SrcExpr->getExprLoc(), diag::warn_not_in_enum_assignment) << DstType.getUnqualifiedType(); } else { typedef SmallVector, 64> EnumValsTy; EnumValsTy EnumVals; // Gather all enum values, set their type and sort them, // allowing easier comparison with rhs constant. for (auto *EDI : ED->enumerators()) { llvm::APSInt Val = EDI->getInitVal(); AdjustAPSInt(Val, DstWidth, DstIsSigned); EnumVals.push_back(std::make_pair(Val, EDI)); } if (EnumVals.empty()) return; std::stable_sort(EnumVals.begin(), EnumVals.end(), CmpEnumVals); EnumValsTy::iterator EIend = std::unique(EnumVals.begin(), EnumVals.end(), EqEnumVals); // See which values aren't in the enum. EnumValsTy::const_iterator EI = EnumVals.begin(); while (EI != EIend && EI->first < RhsVal) EI++; if (EI == EIend || EI->first != RhsVal) { Diag(SrcExpr->getExprLoc(), diag::warn_not_in_enum_assignment) << DstType.getUnqualifiedType(); } } } } } StmtResult Sema::ActOnWhileStmt(SourceLocation WhileLoc, ConditionResult Cond, Stmt *Body) { if (Cond.isInvalid()) return StmtError(); auto CondVal = Cond.get(); CheckBreakContinueBinding(CondVal.second); if (CondVal.second && !Diags.isIgnored(diag::warn_comma_operator, CondVal.second->getExprLoc())) CommaVisitor(*this).Visit(CondVal.second); DiagnoseUnusedExprResult(Body); if (isa(Body)) getCurCompoundScope().setHasEmptyLoopBodies(); return new (Context) WhileStmt(Context, CondVal.first, CondVal.second, Body, WhileLoc); } StmtResult Sema::ActOnDoStmt(SourceLocation DoLoc, Stmt *Body, SourceLocation WhileLoc, SourceLocation CondLParen, Expr *Cond, SourceLocation CondRParen) { assert(Cond && "ActOnDoStmt(): missing expression"); CheckBreakContinueBinding(Cond); ExprResult CondResult = CheckBooleanCondition(DoLoc, Cond); if (CondResult.isInvalid()) return StmtError(); Cond = CondResult.get(); CondResult = ActOnFinishFullExpr(Cond, DoLoc); if (CondResult.isInvalid()) return StmtError(); Cond = CondResult.get(); DiagnoseUnusedExprResult(Body); return new (Context) DoStmt(Body, Cond, DoLoc, WhileLoc, CondRParen); } namespace { // This visitor will traverse a conditional statement and store all // the evaluated decls into a vector. Simple is set to true if none // of the excluded constructs are used. class DeclExtractor : public EvaluatedExprVisitor { llvm::SmallPtrSetImpl &Decls; SmallVectorImpl &Ranges; bool Simple; public: typedef EvaluatedExprVisitor Inherited; DeclExtractor(Sema &S, llvm::SmallPtrSetImpl &Decls, SmallVectorImpl &Ranges) : Inherited(S.Context), Decls(Decls), Ranges(Ranges), Simple(true) {} bool isSimple() { return Simple; } // Replaces the method in EvaluatedExprVisitor. void VisitMemberExpr(MemberExpr* E) { Simple = false; } // Any Stmt not whitelisted will cause the condition to be marked complex. void VisitStmt(Stmt *S) { Simple = false; } void VisitBinaryOperator(BinaryOperator *E) { Visit(E->getLHS()); Visit(E->getRHS()); } void VisitCastExpr(CastExpr *E) { Visit(E->getSubExpr()); } void VisitUnaryOperator(UnaryOperator *E) { // Skip checking conditionals with derefernces. if (E->getOpcode() == UO_Deref) Simple = false; else Visit(E->getSubExpr()); } void VisitConditionalOperator(ConditionalOperator *E) { Visit(E->getCond()); Visit(E->getTrueExpr()); Visit(E->getFalseExpr()); } void VisitParenExpr(ParenExpr *E) { Visit(E->getSubExpr()); } void VisitBinaryConditionalOperator(BinaryConditionalOperator *E) { Visit(E->getOpaqueValue()->getSourceExpr()); Visit(E->getFalseExpr()); } void VisitIntegerLiteral(IntegerLiteral *E) { } void VisitFloatingLiteral(FloatingLiteral *E) { } void VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) { } void VisitCharacterLiteral(CharacterLiteral *E) { } void VisitGNUNullExpr(GNUNullExpr *E) { } void VisitImaginaryLiteral(ImaginaryLiteral *E) { } void VisitDeclRefExpr(DeclRefExpr *E) { VarDecl *VD = dyn_cast(E->getDecl()); if (!VD) return; Ranges.push_back(E->getSourceRange()); Decls.insert(VD); } }; // end class DeclExtractor // DeclMatcher checks to see if the decls are used in a non-evaluated // context. class DeclMatcher : public EvaluatedExprVisitor { llvm::SmallPtrSetImpl &Decls; bool FoundDecl; public: typedef EvaluatedExprVisitor Inherited; DeclMatcher(Sema &S, llvm::SmallPtrSetImpl &Decls, Stmt *Statement) : Inherited(S.Context), Decls(Decls), FoundDecl(false) { if (!Statement) return; Visit(Statement); } void VisitReturnStmt(ReturnStmt *S) { FoundDecl = true; } void VisitBreakStmt(BreakStmt *S) { FoundDecl = true; } void VisitGotoStmt(GotoStmt *S) { FoundDecl = true; } void VisitCastExpr(CastExpr *E) { if (E->getCastKind() == CK_LValueToRValue) CheckLValueToRValueCast(E->getSubExpr()); else Visit(E->getSubExpr()); } void CheckLValueToRValueCast(Expr *E) { E = E->IgnoreParenImpCasts(); if (isa(E)) { return; } if (ConditionalOperator *CO = dyn_cast(E)) { Visit(CO->getCond()); CheckLValueToRValueCast(CO->getTrueExpr()); CheckLValueToRValueCast(CO->getFalseExpr()); return; } if (BinaryConditionalOperator *BCO = dyn_cast(E)) { CheckLValueToRValueCast(BCO->getOpaqueValue()->getSourceExpr()); CheckLValueToRValueCast(BCO->getFalseExpr()); return; } Visit(E); } void VisitDeclRefExpr(DeclRefExpr *E) { if (VarDecl *VD = dyn_cast(E->getDecl())) if (Decls.count(VD)) FoundDecl = true; } void VisitPseudoObjectExpr(PseudoObjectExpr *POE) { // Only need to visit the semantics for POE. // SyntaticForm doesn't really use the Decal. for (auto *S : POE->semantics()) { if (auto *OVE = dyn_cast(S)) // Look past the OVE into the expression it binds. Visit(OVE->getSourceExpr()); else Visit(S); } } bool FoundDeclInUse() { return FoundDecl; } }; // end class DeclMatcher void CheckForLoopConditionalStatement(Sema &S, Expr *Second, Expr *Third, Stmt *Body) { // Condition is empty if (!Second) return; if (S.Diags.isIgnored(diag::warn_variables_not_in_loop_body, Second->getLocStart())) return; PartialDiagnostic PDiag = S.PDiag(diag::warn_variables_not_in_loop_body); llvm::SmallPtrSet Decls; SmallVector Ranges; DeclExtractor DE(S, Decls, Ranges); DE.Visit(Second); // Don't analyze complex conditionals. if (!DE.isSimple()) return; // No decls found. if (Decls.size() == 0) return; // Don't warn on volatile, static, or global variables. for (llvm::SmallPtrSetImpl::iterator I = Decls.begin(), E = Decls.end(); I != E; ++I) if ((*I)->getType().isVolatileQualified() || (*I)->hasGlobalStorage()) return; if (DeclMatcher(S, Decls, Second).FoundDeclInUse() || DeclMatcher(S, Decls, Third).FoundDeclInUse() || DeclMatcher(S, Decls, Body).FoundDeclInUse()) return; // Load decl names into diagnostic. if (Decls.size() > 4) PDiag << 0; else { PDiag << Decls.size(); for (llvm::SmallPtrSetImpl::iterator I = Decls.begin(), E = Decls.end(); I != E; ++I) PDiag << (*I)->getDeclName(); } // Load SourceRanges into diagnostic if there is room. // Otherwise, load the SourceRange of the conditional expression. if (Ranges.size() <= PartialDiagnostic::MaxArguments) for (SmallVectorImpl::iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) PDiag << *I; else PDiag << Second->getSourceRange(); S.Diag(Ranges.begin()->getBegin(), PDiag); } // If Statement is an incemement or decrement, return true and sets the // variables Increment and DRE. bool ProcessIterationStmt(Sema &S, Stmt* Statement, bool &Increment, DeclRefExpr *&DRE) { if (auto Cleanups = dyn_cast(Statement)) if (!Cleanups->cleanupsHaveSideEffects()) Statement = Cleanups->getSubExpr(); if (UnaryOperator *UO = dyn_cast(Statement)) { switch (UO->getOpcode()) { default: return false; case UO_PostInc: case UO_PreInc: Increment = true; break; case UO_PostDec: case UO_PreDec: Increment = false; break; } DRE = dyn_cast(UO->getSubExpr()); return DRE; } if (CXXOperatorCallExpr *Call = dyn_cast(Statement)) { FunctionDecl *FD = Call->getDirectCallee(); if (!FD || !FD->isOverloadedOperator()) return false; switch (FD->getOverloadedOperator()) { default: return false; case OO_PlusPlus: Increment = true; break; case OO_MinusMinus: Increment = false; break; } DRE = dyn_cast(Call->getArg(0)); return DRE; } return false; } // A visitor to determine if a continue or break statement is a // subexpression. class BreakContinueFinder : public EvaluatedExprVisitor { SourceLocation BreakLoc; SourceLocation ContinueLoc; public: BreakContinueFinder(Sema &S, Stmt* Body) : Inherited(S.Context) { Visit(Body); } typedef EvaluatedExprVisitor Inherited; void VisitContinueStmt(ContinueStmt* E) { ContinueLoc = E->getContinueLoc(); } void VisitBreakStmt(BreakStmt* E) { BreakLoc = E->getBreakLoc(); } bool ContinueFound() { return ContinueLoc.isValid(); } bool BreakFound() { return BreakLoc.isValid(); } SourceLocation GetContinueLoc() { return ContinueLoc; } SourceLocation GetBreakLoc() { return BreakLoc; } }; // end class BreakContinueFinder // Emit a warning when a loop increment/decrement appears twice per loop // iteration. The conditions which trigger this warning are: // 1) The last statement in the loop body and the third expression in the // for loop are both increment or both decrement of the same variable // 2) No continue statements in the loop body. void CheckForRedundantIteration(Sema &S, Expr *Third, Stmt *Body) { // Return when there is nothing to check. if (!Body || !Third) return; if (S.Diags.isIgnored(diag::warn_redundant_loop_iteration, Third->getLocStart())) return; // Get the last statement from the loop body. CompoundStmt *CS = dyn_cast(Body); if (!CS || CS->body_empty()) return; Stmt *LastStmt = CS->body_back(); if (!LastStmt) return; bool LoopIncrement, LastIncrement; DeclRefExpr *LoopDRE, *LastDRE; if (!ProcessIterationStmt(S, Third, LoopIncrement, LoopDRE)) return; if (!ProcessIterationStmt(S, LastStmt, LastIncrement, LastDRE)) return; // Check that the two statements are both increments or both decrements // on the same variable. if (LoopIncrement != LastIncrement || LoopDRE->getDecl() != LastDRE->getDecl()) return; if (BreakContinueFinder(S, Body).ContinueFound()) return; S.Diag(LastDRE->getLocation(), diag::warn_redundant_loop_iteration) << LastDRE->getDecl() << LastIncrement; S.Diag(LoopDRE->getLocation(), diag::note_loop_iteration_here) << LoopIncrement; } } // end namespace void Sema::CheckBreakContinueBinding(Expr *E) { if (!E || getLangOpts().CPlusPlus) return; BreakContinueFinder BCFinder(*this, E); Scope *BreakParent = CurScope->getBreakParent(); if (BCFinder.BreakFound() && BreakParent) { if (BreakParent->getFlags() & Scope::SwitchScope) { Diag(BCFinder.GetBreakLoc(), diag::warn_break_binds_to_switch); } else { Diag(BCFinder.GetBreakLoc(), diag::warn_loop_ctrl_binds_to_inner) << "break"; } } else if (BCFinder.ContinueFound() && CurScope->getContinueParent()) { Diag(BCFinder.GetContinueLoc(), diag::warn_loop_ctrl_binds_to_inner) << "continue"; } } StmtResult Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, Stmt *First, ConditionResult Second, FullExprArg third, SourceLocation RParenLoc, Stmt *Body) { if (Second.isInvalid()) return StmtError(); if (!getLangOpts().CPlusPlus) { if (DeclStmt *DS = dyn_cast_or_null(First)) { // C99 6.8.5p3: The declaration part of a 'for' statement shall only // declare identifiers for objects having storage class 'auto' or // 'register'. for (auto *DI : DS->decls()) { VarDecl *VD = dyn_cast(DI); if (VD && VD->isLocalVarDecl() && !VD->hasLocalStorage()) VD = nullptr; if (!VD) { Diag(DI->getLocation(), diag::err_non_local_variable_decl_in_for); DI->setInvalidDecl(); } } } } CheckBreakContinueBinding(Second.get().second); CheckBreakContinueBinding(third.get()); if (!Second.get().first) CheckForLoopConditionalStatement(*this, Second.get().second, third.get(), Body); CheckForRedundantIteration(*this, third.get(), Body); if (Second.get().second && !Diags.isIgnored(diag::warn_comma_operator, Second.get().second->getExprLoc())) CommaVisitor(*this).Visit(Second.get().second); Expr *Third = third.release().getAs(); DiagnoseUnusedExprResult(First); DiagnoseUnusedExprResult(Third); DiagnoseUnusedExprResult(Body); if (isa(Body)) getCurCompoundScope().setHasEmptyLoopBodies(); return new (Context) ForStmt(Context, First, Second.get().second, Second.get().first, Third, Body, ForLoc, LParenLoc, RParenLoc); } /// In an Objective C collection iteration statement: /// for (x in y) /// x can be an arbitrary l-value expression. Bind it up as a /// full-expression. StmtResult Sema::ActOnForEachLValueExpr(Expr *E) { // Reduce placeholder expressions here. Note that this rejects the // use of pseudo-object l-values in this position. ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return StmtError(); E = result.get(); ExprResult FullExpr = ActOnFinishFullExpr(E); if (FullExpr.isInvalid()) return StmtError(); return StmtResult(static_cast(FullExpr.get())); } ExprResult Sema::CheckObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { if (!collection) return ExprError(); ExprResult result = CorrectDelayedTyposInExpr(collection); if (!result.isUsable()) return ExprError(); collection = result.get(); // Bail out early if we've got a type-dependent expression. if (collection->isTypeDependent()) return collection; // Perform normal l-value conversion. result = DefaultFunctionArrayLvalueConversion(collection); if (result.isInvalid()) return ExprError(); collection = result.get(); // The operand needs to have object-pointer type. // TODO: should we do a contextual conversion? const ObjCObjectPointerType *pointerType = collection->getType()->getAs(); if (!pointerType) return Diag(forLoc, diag::err_collection_expr_type) << collection->getType() << collection->getSourceRange(); // Check that the operand provides // - countByEnumeratingWithState:objects:count: const ObjCObjectType *objectType = pointerType->getObjectType(); ObjCInterfaceDecl *iface = objectType->getInterface(); // If we have a forward-declared type, we can't do this check. // Under ARC, it is an error not to have a forward-declared class. if (iface && (getLangOpts().ObjCAutoRefCount ? RequireCompleteType(forLoc, QualType(objectType, 0), diag::err_arc_collection_forward, collection) : !isCompleteType(forLoc, QualType(objectType, 0)))) { // Otherwise, if we have any useful type information, check that // the type declares the appropriate method. } else if (iface || !objectType->qual_empty()) { IdentifierInfo *selectorIdents[] = { &Context.Idents.get("countByEnumeratingWithState"), &Context.Idents.get("objects"), &Context.Idents.get("count") }; Selector selector = Context.Selectors.getSelector(3, &selectorIdents[0]); ObjCMethodDecl *method = nullptr; // If there's an interface, look in both the public and private APIs. if (iface) { method = iface->lookupInstanceMethod(selector); if (!method) method = iface->lookupPrivateMethod(selector); } // Also check protocol qualifiers. if (!method) method = LookupMethodInQualifiedType(selector, pointerType, /*instance*/ true); // If we didn't find it anywhere, give up. if (!method) { Diag(forLoc, diag::warn_collection_expr_type) << collection->getType() << selector << collection->getSourceRange(); } // TODO: check for an incompatible signature? } // Wrap up any cleanups in the expression. return collection; } StmtResult Sema::ActOnObjCForCollectionStmt(SourceLocation ForLoc, Stmt *First, Expr *collection, SourceLocation RParenLoc) { ExprResult CollectionExprResult = CheckObjCForCollectionOperand(ForLoc, collection); if (First) { QualType FirstType; if (DeclStmt *DS = dyn_cast(First)) { if (!DS->isSingleDecl()) return StmtError(Diag((*DS->decl_begin())->getLocation(), diag::err_toomany_element_decls)); VarDecl *D = dyn_cast(DS->getSingleDecl()); if (!D || D->isInvalidDecl()) return StmtError(); FirstType = D->getType(); // C99 6.8.5p3: The declaration part of a 'for' statement shall only // declare identifiers for objects having storage class 'auto' or // 'register'. if (!D->hasLocalStorage()) return StmtError(Diag(D->getLocation(), diag::err_non_local_variable_decl_in_for)); // If the type contained 'auto', deduce the 'auto' to 'id'. if (FirstType->getContainedAutoType()) { OpaqueValueExpr OpaqueId(D->getLocation(), Context.getObjCIdType(), VK_RValue); Expr *DeducedInit = &OpaqueId; if (DeduceAutoType(D->getTypeSourceInfo(), DeducedInit, FirstType) == DAR_Failed) DiagnoseAutoDeductionFailure(D, DeducedInit); if (FirstType.isNull()) { D->setInvalidDecl(); return StmtError(); } D->setType(FirstType); if (ActiveTemplateInstantiations.empty()) { SourceLocation Loc = D->getTypeSourceInfo()->getTypeLoc().getBeginLoc(); Diag(Loc, diag::warn_auto_var_is_id) << D->getDeclName(); } } } else { Expr *FirstE = cast(First); if (!FirstE->isTypeDependent() && !FirstE->isLValue()) return StmtError(Diag(First->getLocStart(), diag::err_selector_element_not_lvalue) << First->getSourceRange()); FirstType = static_cast(First)->getType(); if (FirstType.isConstQualified()) Diag(ForLoc, diag::err_selector_element_const_type) << FirstType << First->getSourceRange(); } if (!FirstType->isDependentType() && !FirstType->isObjCObjectPointerType() && !FirstType->isBlockPointerType()) return StmtError(Diag(ForLoc, diag::err_selector_element_type) << FirstType << First->getSourceRange()); } if (CollectionExprResult.isInvalid()) return StmtError(); CollectionExprResult = ActOnFinishFullExpr(CollectionExprResult.get()); if (CollectionExprResult.isInvalid()) return StmtError(); return new (Context) ObjCForCollectionStmt(First, CollectionExprResult.get(), nullptr, ForLoc, RParenLoc); } /// Finish building a variable declaration for a for-range statement. /// \return true if an error occurs. static bool FinishForRangeVarDecl(Sema &SemaRef, VarDecl *Decl, Expr *Init, SourceLocation Loc, int DiagID) { if (Decl->getType()->isUndeducedType()) { ExprResult Res = SemaRef.CorrectDelayedTyposInExpr(Init); if (!Res.isUsable()) { Decl->setInvalidDecl(); return true; } Init = Res.get(); } // Deduce the type for the iterator variable now rather than leaving it to // AddInitializerToDecl, so we can produce a more suitable diagnostic. QualType InitType; if ((!isa(Init) && Init->getType()->isVoidType()) || SemaRef.DeduceAutoType(Decl->getTypeSourceInfo(), Init, InitType) == Sema::DAR_Failed) SemaRef.Diag(Loc, DiagID) << Init->getType(); if (InitType.isNull()) { Decl->setInvalidDecl(); return true; } Decl->setType(InitType); // In ARC, infer lifetime. // FIXME: ARC may want to turn this into 'const __unsafe_unretained' if // we're doing the equivalent of fast iteration. if (SemaRef.getLangOpts().ObjCAutoRefCount && SemaRef.inferObjCARCLifetime(Decl)) Decl->setInvalidDecl(); SemaRef.AddInitializerToDecl(Decl, Init, /*DirectInit=*/false); SemaRef.FinalizeDeclaration(Decl); SemaRef.CurContext->addHiddenDecl(Decl); return false; } namespace { // An enum to represent whether something is dealing with a call to begin() // or a call to end() in a range-based for loop. enum BeginEndFunction { BEF_begin, BEF_end }; /// Produce a note indicating which begin/end function was implicitly called /// by a C++11 for-range statement. This is often not obvious from the code, /// nor from the diagnostics produced when analysing the implicit expressions /// required in a for-range statement. void NoteForRangeBeginEndFunction(Sema &SemaRef, Expr *E, BeginEndFunction BEF) { CallExpr *CE = dyn_cast(E); if (!CE) return; FunctionDecl *D = dyn_cast(CE->getCalleeDecl()); if (!D) return; SourceLocation Loc = D->getLocation(); std::string Description; bool IsTemplate = false; if (FunctionTemplateDecl *FunTmpl = D->getPrimaryTemplate()) { Description = SemaRef.getTemplateArgumentBindingsText( FunTmpl->getTemplateParameters(), *D->getTemplateSpecializationArgs()); IsTemplate = true; } SemaRef.Diag(Loc, diag::note_for_range_begin_end) << BEF << IsTemplate << Description << E->getType(); } /// Build a variable declaration for a for-range statement. VarDecl *BuildForRangeVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type, const char *Name) { DeclContext *DC = SemaRef.CurContext; IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name); TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc); VarDecl *Decl = VarDecl::Create(SemaRef.Context, DC, Loc, Loc, II, Type, TInfo, SC_None); Decl->setImplicit(); return Decl; } } static bool ObjCEnumerationCollection(Expr *Collection) { return !Collection->isTypeDependent() && Collection->getType()->getAs() != nullptr; } /// ActOnCXXForRangeStmt - Check and build a C++11 for-range statement. /// /// C++11 [stmt.ranged]: /// A range-based for statement is equivalent to /// /// { /// auto && __range = range-init; /// for ( auto __begin = begin-expr, /// __end = end-expr; /// __begin != __end; /// ++__begin ) { /// for-range-declaration = *__begin; /// statement /// } /// } /// /// The body of the loop is not available yet, since it cannot be analysed until /// we have determined the type of the for-range-declaration. StmtResult Sema::ActOnCXXForRangeStmt(Scope *S, SourceLocation ForLoc, SourceLocation CoawaitLoc, Stmt *First, SourceLocation ColonLoc, Expr *Range, SourceLocation RParenLoc, BuildForRangeKind Kind) { if (!First) return StmtError(); if (Range && ObjCEnumerationCollection(Range)) return ActOnObjCForCollectionStmt(ForLoc, First, Range, RParenLoc); DeclStmt *DS = dyn_cast(First); assert(DS && "first part of for range not a decl stmt"); if (!DS->isSingleDecl()) { Diag(DS->getStartLoc(), diag::err_type_defined_in_for_range); return StmtError(); } Decl *LoopVar = DS->getSingleDecl(); if (LoopVar->isInvalidDecl() || !Range || DiagnoseUnexpandedParameterPack(Range, UPPC_Expression)) { LoopVar->setInvalidDecl(); return StmtError(); } // Coroutines: 'for co_await' implicitly co_awaits its range. if (CoawaitLoc.isValid()) { ExprResult Coawait = ActOnCoawaitExpr(S, CoawaitLoc, Range); if (Coawait.isInvalid()) return StmtError(); Range = Coawait.get(); } // Build auto && __range = range-init SourceLocation RangeLoc = Range->getLocStart(); VarDecl *RangeVar = BuildForRangeVarDecl(*this, RangeLoc, Context.getAutoRRefDeductType(), "__range"); if (FinishForRangeVarDecl(*this, RangeVar, Range, RangeLoc, diag::err_for_range_deduction_failure)) { LoopVar->setInvalidDecl(); return StmtError(); } // Claim the type doesn't contain auto: we've already done the checking. DeclGroupPtrTy RangeGroup = BuildDeclaratorGroup(MutableArrayRef((Decl **)&RangeVar, 1)); StmtResult RangeDecl = ActOnDeclStmt(RangeGroup, RangeLoc, RangeLoc); if (RangeDecl.isInvalid()) { LoopVar->setInvalidDecl(); return StmtError(); } return BuildCXXForRangeStmt(ForLoc, CoawaitLoc, ColonLoc, RangeDecl.get(), /*BeginStmt=*/nullptr, /*EndStmt=*/nullptr, /*Cond=*/nullptr, /*Inc=*/nullptr, DS, RParenLoc, Kind); } /// \brief Create the initialization, compare, and increment steps for /// the range-based for loop expression. /// This function does not handle array-based for loops, /// which are created in Sema::BuildCXXForRangeStmt. /// /// \returns a ForRangeStatus indicating success or what kind of error occurred. /// BeginExpr and EndExpr are set and FRS_Success is returned on success; /// CandidateSet and BEF are set and some non-success value is returned on /// failure. static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef, Expr *BeginRange, Expr *EndRange, QualType RangeType, VarDecl *BeginVar, VarDecl *EndVar, SourceLocation ColonLoc, OverloadCandidateSet *CandidateSet, ExprResult *BeginExpr, ExprResult *EndExpr, BeginEndFunction *BEF) { DeclarationNameInfo BeginNameInfo( &SemaRef.PP.getIdentifierTable().get("begin"), ColonLoc); DeclarationNameInfo EndNameInfo(&SemaRef.PP.getIdentifierTable().get("end"), ColonLoc); LookupResult BeginMemberLookup(SemaRef, BeginNameInfo, Sema::LookupMemberName); LookupResult EndMemberLookup(SemaRef, EndNameInfo, Sema::LookupMemberName); if (CXXRecordDecl *D = RangeType->getAsCXXRecordDecl()) { // - if _RangeT is a class type, the unqualified-ids begin and end are // looked up in the scope of class _RangeT as if by class member access // lookup (3.4.5), and if either (or both) finds at least one // declaration, begin-expr and end-expr are __range.begin() and // __range.end(), respectively; SemaRef.LookupQualifiedName(BeginMemberLookup, D); SemaRef.LookupQualifiedName(EndMemberLookup, D); if (BeginMemberLookup.empty() != EndMemberLookup.empty()) { SourceLocation RangeLoc = BeginVar->getLocation(); *BEF = BeginMemberLookup.empty() ? BEF_end : BEF_begin; SemaRef.Diag(RangeLoc, diag::err_for_range_member_begin_end_mismatch) << RangeLoc << BeginRange->getType() << *BEF; return Sema::FRS_DiagnosticIssued; } } else { // - otherwise, begin-expr and end-expr are begin(__range) and // end(__range), respectively, where begin and end are looked up with // argument-dependent lookup (3.4.2). For the purposes of this name // lookup, namespace std is an associated namespace. } *BEF = BEF_begin; Sema::ForRangeStatus RangeStatus = SemaRef.BuildForRangeBeginEndCall(ColonLoc, ColonLoc, BeginNameInfo, BeginMemberLookup, CandidateSet, BeginRange, BeginExpr); if (RangeStatus != Sema::FRS_Success) { if (RangeStatus == Sema::FRS_DiagnosticIssued) SemaRef.Diag(BeginRange->getLocStart(), diag::note_in_for_range) << ColonLoc << BEF_begin << BeginRange->getType(); return RangeStatus; } if (FinishForRangeVarDecl(SemaRef, BeginVar, BeginExpr->get(), ColonLoc, diag::err_for_range_iter_deduction_failure)) { NoteForRangeBeginEndFunction(SemaRef, BeginExpr->get(), *BEF); return Sema::FRS_DiagnosticIssued; } *BEF = BEF_end; RangeStatus = SemaRef.BuildForRangeBeginEndCall(ColonLoc, ColonLoc, EndNameInfo, EndMemberLookup, CandidateSet, EndRange, EndExpr); if (RangeStatus != Sema::FRS_Success) { if (RangeStatus == Sema::FRS_DiagnosticIssued) SemaRef.Diag(EndRange->getLocStart(), diag::note_in_for_range) << ColonLoc << BEF_end << EndRange->getType(); return RangeStatus; } if (FinishForRangeVarDecl(SemaRef, EndVar, EndExpr->get(), ColonLoc, diag::err_for_range_iter_deduction_failure)) { NoteForRangeBeginEndFunction(SemaRef, EndExpr->get(), *BEF); return Sema::FRS_DiagnosticIssued; } return Sema::FRS_Success; } /// Speculatively attempt to dereference an invalid range expression. /// If the attempt fails, this function will return a valid, null StmtResult /// and emit no diagnostics. static StmtResult RebuildForRangeWithDereference(Sema &SemaRef, Scope *S, SourceLocation ForLoc, SourceLocation CoawaitLoc, Stmt *LoopVarDecl, SourceLocation ColonLoc, Expr *Range, SourceLocation RangeLoc, SourceLocation RParenLoc) { // Determine whether we can rebuild the for-range statement with a // dereferenced range expression. ExprResult AdjustedRange; { Sema::SFINAETrap Trap(SemaRef); AdjustedRange = SemaRef.BuildUnaryOp(S, RangeLoc, UO_Deref, Range); if (AdjustedRange.isInvalid()) return StmtResult(); StmtResult SR = SemaRef.ActOnCXXForRangeStmt( S, ForLoc, CoawaitLoc, LoopVarDecl, ColonLoc, AdjustedRange.get(), RParenLoc, Sema::BFRK_Check); if (SR.isInvalid()) return StmtResult(); } // The attempt to dereference worked well enough that it could produce a valid // loop. Produce a fixit, and rebuild the loop with diagnostics enabled, in // case there are any other (non-fatal) problems with it. SemaRef.Diag(RangeLoc, diag::err_for_range_dereference) << Range->getType() << FixItHint::CreateInsertion(RangeLoc, "*"); return SemaRef.ActOnCXXForRangeStmt(S, ForLoc, CoawaitLoc, LoopVarDecl, ColonLoc, AdjustedRange.get(), RParenLoc, Sema::BFRK_Rebuild); } namespace { /// RAII object to automatically invalidate a declaration if an error occurs. struct InvalidateOnErrorScope { InvalidateOnErrorScope(Sema &SemaRef, Decl *D, bool Enabled) : Trap(SemaRef.Diags), D(D), Enabled(Enabled) {} ~InvalidateOnErrorScope() { if (Enabled && Trap.hasErrorOccurred()) D->setInvalidDecl(); } DiagnosticErrorTrap Trap; Decl *D; bool Enabled; }; } /// BuildCXXForRangeStmt - Build or instantiate a C++11 for-range statement. StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc, SourceLocation ColonLoc, Stmt *RangeDecl, Stmt *Begin, Stmt *End, Expr *Cond, Expr *Inc, Stmt *LoopVarDecl, SourceLocation RParenLoc, BuildForRangeKind Kind) { // FIXME: This should not be used during template instantiation. We should // pick up the set of unqualified lookup results for the != and + operators // in the initial parse. // // Testcase (accepts-invalid): // template void f() { for (auto x : T()) {} } // namespace N { struct X { X begin(); X end(); int operator*(); }; } // bool operator!=(N::X, N::X); void operator++(N::X); // void g() { f(); } Scope *S = getCurScope(); DeclStmt *RangeDS = cast(RangeDecl); VarDecl *RangeVar = cast(RangeDS->getSingleDecl()); QualType RangeVarType = RangeVar->getType(); DeclStmt *LoopVarDS = cast(LoopVarDecl); VarDecl *LoopVar = cast(LoopVarDS->getSingleDecl()); // If we hit any errors, mark the loop variable as invalid if its type // contains 'auto'. InvalidateOnErrorScope Invalidate(*this, LoopVar, LoopVar->getType()->isUndeducedType()); StmtResult BeginDeclStmt = Begin; StmtResult EndDeclStmt = End; ExprResult NotEqExpr = Cond, IncrExpr = Inc; if (RangeVarType->isDependentType()) { // The range is implicitly used as a placeholder when it is dependent. RangeVar->markUsed(Context); // Deduce any 'auto's in the loop variable as 'DependentTy'. We'll fill // them in properly when we instantiate the loop. if (!LoopVar->isInvalidDecl() && Kind != BFRK_Check) LoopVar->setType(SubstAutoType(LoopVar->getType(), Context.DependentTy)); } else if (!BeginDeclStmt.get()) { SourceLocation RangeLoc = RangeVar->getLocation(); const QualType RangeVarNonRefType = RangeVarType.getNonReferenceType(); ExprResult BeginRangeRef = BuildDeclRefExpr(RangeVar, RangeVarNonRefType, VK_LValue, ColonLoc); if (BeginRangeRef.isInvalid()) return StmtError(); ExprResult EndRangeRef = BuildDeclRefExpr(RangeVar, RangeVarNonRefType, VK_LValue, ColonLoc); if (EndRangeRef.isInvalid()) return StmtError(); QualType AutoType = Context.getAutoDeductType(); Expr *Range = RangeVar->getInit(); if (!Range) return StmtError(); QualType RangeType = Range->getType(); if (RequireCompleteType(RangeLoc, RangeType, diag::err_for_range_incomplete_type)) return StmtError(); // Build auto __begin = begin-expr, __end = end-expr. VarDecl *BeginVar = BuildForRangeVarDecl(*this, ColonLoc, AutoType, "__begin"); VarDecl *EndVar = BuildForRangeVarDecl(*this, ColonLoc, AutoType, "__end"); // Build begin-expr and end-expr and attach to __begin and __end variables. ExprResult BeginExpr, EndExpr; if (const ArrayType *UnqAT = RangeType->getAsArrayTypeUnsafe()) { // - if _RangeT is an array type, begin-expr and end-expr are __range and // __range + __bound, respectively, where __bound is the array bound. If // _RangeT is an array of unknown size or an array of incomplete type, // the program is ill-formed; // begin-expr is __range. BeginExpr = BeginRangeRef; if (FinishForRangeVarDecl(*this, BeginVar, BeginRangeRef.get(), ColonLoc, diag::err_for_range_iter_deduction_failure)) { NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin); return StmtError(); } // Find the array bound. ExprResult BoundExpr; if (const ConstantArrayType *CAT = dyn_cast(UnqAT)) BoundExpr = IntegerLiteral::Create( Context, CAT->getSize(), Context.getPointerDiffType(), RangeLoc); else if (const VariableArrayType *VAT = dyn_cast(UnqAT)) BoundExpr = VAT->getSizeExpr(); else { // Can't be a DependentSizedArrayType or an IncompleteArrayType since // UnqAT is not incomplete and Range is not type-dependent. llvm_unreachable("Unexpected array type in for-range"); } // end-expr is __range + __bound. EndExpr = ActOnBinOp(S, ColonLoc, tok::plus, EndRangeRef.get(), BoundExpr.get()); if (EndExpr.isInvalid()) return StmtError(); if (FinishForRangeVarDecl(*this, EndVar, EndExpr.get(), ColonLoc, diag::err_for_range_iter_deduction_failure)) { NoteForRangeBeginEndFunction(*this, EndExpr.get(), BEF_end); return StmtError(); } } else { OverloadCandidateSet CandidateSet(RangeLoc, OverloadCandidateSet::CSK_Normal); BeginEndFunction BEFFailure; ForRangeStatus RangeStatus = BuildNonArrayForRange(*this, BeginRangeRef.get(), EndRangeRef.get(), RangeType, BeginVar, EndVar, ColonLoc, &CandidateSet, &BeginExpr, &EndExpr, &BEFFailure); if (Kind == BFRK_Build && RangeStatus == FRS_NoViableFunction && BEFFailure == BEF_begin) { // If the range is being built from an array parameter, emit a // a diagnostic that it is being treated as a pointer. if (DeclRefExpr *DRE = dyn_cast(Range)) { if (ParmVarDecl *PVD = dyn_cast(DRE->getDecl())) { QualType ArrayTy = PVD->getOriginalType(); QualType PointerTy = PVD->getType(); if (PointerTy->isPointerType() && ArrayTy->isArrayType()) { Diag(Range->getLocStart(), diag::err_range_on_array_parameter) << RangeLoc << PVD << ArrayTy << PointerTy; Diag(PVD->getLocation(), diag::note_declared_at); return StmtError(); } } } // If building the range failed, try dereferencing the range expression // unless a diagnostic was issued or the end function is problematic. StmtResult SR = RebuildForRangeWithDereference(*this, S, ForLoc, CoawaitLoc, LoopVarDecl, ColonLoc, Range, RangeLoc, RParenLoc); if (SR.isInvalid() || SR.isUsable()) return SR; } // Otherwise, emit diagnostics if we haven't already. if (RangeStatus == FRS_NoViableFunction) { Expr *Range = BEFFailure ? EndRangeRef.get() : BeginRangeRef.get(); Diag(Range->getLocStart(), diag::err_for_range_invalid) << RangeLoc << Range->getType() << BEFFailure; CandidateSet.NoteCandidates(*this, OCD_AllCandidates, Range); } // Return an error if no fix was discovered. if (RangeStatus != FRS_Success) return StmtError(); } assert(!BeginExpr.isInvalid() && !EndExpr.isInvalid() && "invalid range expression in for loop"); // C++11 [dcl.spec.auto]p7: BeginType and EndType must be the same. // C++1z removes this restriction. QualType BeginType = BeginVar->getType(), EndType = EndVar->getType(); if (!Context.hasSameType(BeginType, EndType)) { Diag(RangeLoc, getLangOpts().CPlusPlus1z ? diag::warn_for_range_begin_end_types_differ : diag::ext_for_range_begin_end_types_differ) << BeginType << EndType; NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin); NoteForRangeBeginEndFunction(*this, EndExpr.get(), BEF_end); } BeginDeclStmt = ActOnDeclStmt(ConvertDeclToDeclGroup(BeginVar), ColonLoc, ColonLoc); EndDeclStmt = ActOnDeclStmt(ConvertDeclToDeclGroup(EndVar), ColonLoc, ColonLoc); const QualType BeginRefNonRefType = BeginType.getNonReferenceType(); ExprResult BeginRef = BuildDeclRefExpr(BeginVar, BeginRefNonRefType, VK_LValue, ColonLoc); if (BeginRef.isInvalid()) return StmtError(); ExprResult EndRef = BuildDeclRefExpr(EndVar, EndType.getNonReferenceType(), VK_LValue, ColonLoc); if (EndRef.isInvalid()) return StmtError(); // Build and check __begin != __end expression. NotEqExpr = ActOnBinOp(S, ColonLoc, tok::exclaimequal, BeginRef.get(), EndRef.get()); if (!NotEqExpr.isInvalid()) NotEqExpr = CheckBooleanCondition(ColonLoc, NotEqExpr.get()); if (!NotEqExpr.isInvalid()) NotEqExpr = ActOnFinishFullExpr(NotEqExpr.get()); if (NotEqExpr.isInvalid()) { Diag(RangeLoc, diag::note_for_range_invalid_iterator) << RangeLoc << 0 << BeginRangeRef.get()->getType(); NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin); if (!Context.hasSameType(BeginType, EndType)) NoteForRangeBeginEndFunction(*this, EndExpr.get(), BEF_end); return StmtError(); } // Build and check ++__begin expression. BeginRef = BuildDeclRefExpr(BeginVar, BeginRefNonRefType, VK_LValue, ColonLoc); if (BeginRef.isInvalid()) return StmtError(); IncrExpr = ActOnUnaryOp(S, ColonLoc, tok::plusplus, BeginRef.get()); if (!IncrExpr.isInvalid() && CoawaitLoc.isValid()) IncrExpr = ActOnCoawaitExpr(S, CoawaitLoc, IncrExpr.get()); if (!IncrExpr.isInvalid()) IncrExpr = ActOnFinishFullExpr(IncrExpr.get()); if (IncrExpr.isInvalid()) { Diag(RangeLoc, diag::note_for_range_invalid_iterator) << RangeLoc << 2 << BeginRangeRef.get()->getType() ; NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin); return StmtError(); } // Build and check *__begin expression. BeginRef = BuildDeclRefExpr(BeginVar, BeginRefNonRefType, VK_LValue, ColonLoc); if (BeginRef.isInvalid()) return StmtError(); ExprResult DerefExpr = ActOnUnaryOp(S, ColonLoc, tok::star, BeginRef.get()); if (DerefExpr.isInvalid()) { Diag(RangeLoc, diag::note_for_range_invalid_iterator) << RangeLoc << 1 << BeginRangeRef.get()->getType(); NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin); return StmtError(); } // Attach *__begin as initializer for VD. Don't touch it if we're just // trying to determine whether this would be a valid range. if (!LoopVar->isInvalidDecl() && Kind != BFRK_Check) { AddInitializerToDecl(LoopVar, DerefExpr.get(), /*DirectInit=*/false); if (LoopVar->isInvalidDecl()) NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin); } } // Don't bother to actually allocate the result if we're just trying to // determine whether it would be valid. if (Kind == BFRK_Check) return StmtResult(); return new (Context) CXXForRangeStmt( RangeDS, cast_or_null(BeginDeclStmt.get()), cast_or_null(EndDeclStmt.get()), NotEqExpr.get(), IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc, ColonLoc, RParenLoc); } /// FinishObjCForCollectionStmt - Attach the body to a objective-C foreach /// statement. StmtResult Sema::FinishObjCForCollectionStmt(Stmt *S, Stmt *B) { if (!S || !B) return StmtError(); ObjCForCollectionStmt * ForStmt = cast(S); ForStmt->setBody(B); return S; } // Warn when the loop variable is a const reference that creates a copy. // Suggest using the non-reference type for copies. If a copy can be prevented // suggest the const reference type that would do so. // For instance, given "for (const &Foo : Range)", suggest // "for (const Foo : Range)" to denote a copy is made for the loop. If // possible, also suggest "for (const &Bar : Range)" if this type prevents // the copy altogether. static void DiagnoseForRangeReferenceVariableCopies(Sema &SemaRef, const VarDecl *VD, QualType RangeInitType) { const Expr *InitExpr = VD->getInit(); if (!InitExpr) return; QualType VariableType = VD->getType(); if (auto Cleanups = dyn_cast(InitExpr)) if (!Cleanups->cleanupsHaveSideEffects()) InitExpr = Cleanups->getSubExpr(); const MaterializeTemporaryExpr *MTE = dyn_cast(InitExpr); // No copy made. if (!MTE) return; const Expr *E = MTE->GetTemporaryExpr()->IgnoreImpCasts(); // Searching for either UnaryOperator for dereference of a pointer or // CXXOperatorCallExpr for handling iterators. while (!isa(E) && !isa(E)) { if (const CXXConstructExpr *CCE = dyn_cast(E)) { E = CCE->getArg(0); } else if (const CXXMemberCallExpr *Call = dyn_cast(E)) { const MemberExpr *ME = cast(Call->getCallee()); E = ME->getBase(); } else { const MaterializeTemporaryExpr *MTE = cast(E); E = MTE->GetTemporaryExpr(); } E = E->IgnoreImpCasts(); } bool ReturnsReference = false; if (isa(E)) { ReturnsReference = true; } else { const CXXOperatorCallExpr *Call = cast(E); const FunctionDecl *FD = Call->getDirectCallee(); QualType ReturnType = FD->getReturnType(); ReturnsReference = ReturnType->isReferenceType(); } if (ReturnsReference) { // Loop variable creates a temporary. Suggest either to go with // non-reference loop variable to indiciate a copy is made, or // the correct time to bind a const reference. SemaRef.Diag(VD->getLocation(), diag::warn_for_range_const_reference_copy) << VD << VariableType << E->getType(); QualType NonReferenceType = VariableType.getNonReferenceType(); NonReferenceType.removeLocalConst(); QualType NewReferenceType = SemaRef.Context.getLValueReferenceType(E->getType().withConst()); SemaRef.Diag(VD->getLocStart(), diag::note_use_type_or_non_reference) << NonReferenceType << NewReferenceType << VD->getSourceRange(); } else { // The range always returns a copy, so a temporary is always created. // Suggest removing the reference from the loop variable. SemaRef.Diag(VD->getLocation(), diag::warn_for_range_variable_always_copy) << VD << RangeInitType; QualType NonReferenceType = VariableType.getNonReferenceType(); NonReferenceType.removeLocalConst(); SemaRef.Diag(VD->getLocStart(), diag::note_use_non_reference_type) << NonReferenceType << VD->getSourceRange(); } } // Warns when the loop variable can be changed to a reference type to // prevent a copy. For instance, if given "for (const Foo x : Range)" suggest // "for (const Foo &x : Range)" if this form does not make a copy. static void DiagnoseForRangeConstVariableCopies(Sema &SemaRef, const VarDecl *VD) { const Expr *InitExpr = VD->getInit(); if (!InitExpr) return; QualType VariableType = VD->getType(); if (const CXXConstructExpr *CE = dyn_cast(InitExpr)) { if (!CE->getConstructor()->isCopyConstructor()) return; } else if (const CastExpr *CE = dyn_cast(InitExpr)) { if (CE->getCastKind() != CK_LValueToRValue) return; } else { return; } // TODO: Determine a maximum size that a POD type can be before a diagnostic // should be emitted. Also, only ignore POD types with trivial copy // constructors. if (VariableType.isPODType(SemaRef.Context)) return; // Suggest changing from a const variable to a const reference variable // if doing so will prevent a copy. SemaRef.Diag(VD->getLocation(), diag::warn_for_range_copy) << VD << VariableType << InitExpr->getType(); SemaRef.Diag(VD->getLocStart(), diag::note_use_reference_type) << SemaRef.Context.getLValueReferenceType(VariableType) << VD->getSourceRange(); } /// DiagnoseForRangeVariableCopies - Diagnose three cases and fixes for them. /// 1) for (const foo &x : foos) where foos only returns a copy. Suggest /// using "const foo x" to show that a copy is made /// 2) for (const bar &x : foos) where bar is a temporary intialized by bar. /// Suggest either "const bar x" to keep the copying or "const foo& x" to /// prevent the copy. /// 3) for (const foo x : foos) where x is constructed from a reference foo. /// Suggest "const foo &x" to prevent the copy. static void DiagnoseForRangeVariableCopies(Sema &SemaRef, const CXXForRangeStmt *ForStmt) { if (SemaRef.Diags.isIgnored(diag::warn_for_range_const_reference_copy, ForStmt->getLocStart()) && SemaRef.Diags.isIgnored(diag::warn_for_range_variable_always_copy, ForStmt->getLocStart()) && SemaRef.Diags.isIgnored(diag::warn_for_range_copy, ForStmt->getLocStart())) { return; } const VarDecl *VD = ForStmt->getLoopVariable(); if (!VD) return; QualType VariableType = VD->getType(); if (VariableType->isIncompleteType()) return; const Expr *InitExpr = VD->getInit(); if (!InitExpr) return; if (VariableType->isReferenceType()) { DiagnoseForRangeReferenceVariableCopies(SemaRef, VD, ForStmt->getRangeInit()->getType()); } else if (VariableType.isConstQualified()) { DiagnoseForRangeConstVariableCopies(SemaRef, VD); } } /// FinishCXXForRangeStmt - Attach the body to a C++0x for-range statement. /// This is a separate step from ActOnCXXForRangeStmt because analysis of the /// body cannot be performed until after the type of the range variable is /// determined. StmtResult Sema::FinishCXXForRangeStmt(Stmt *S, Stmt *B) { if (!S || !B) return StmtError(); if (isa(S)) return FinishObjCForCollectionStmt(S, B); CXXForRangeStmt *ForStmt = cast(S); ForStmt->setBody(B); DiagnoseEmptyStmtBody(ForStmt->getRParenLoc(), B, diag::warn_empty_range_based_for_body); DiagnoseForRangeVariableCopies(*this, ForStmt); return S; } StmtResult Sema::ActOnGotoStmt(SourceLocation GotoLoc, SourceLocation LabelLoc, LabelDecl *TheDecl) { getCurFunction()->setHasBranchIntoScope(); TheDecl->markUsed(Context); return new (Context) GotoStmt(TheDecl, GotoLoc, LabelLoc); } StmtResult Sema::ActOnIndirectGotoStmt(SourceLocation GotoLoc, SourceLocation StarLoc, Expr *E) { // Convert operand to void* if (!E->isTypeDependent()) { QualType ETy = E->getType(); QualType DestTy = Context.getPointerType(Context.VoidTy.withConst()); ExprResult ExprRes = E; AssignConvertType ConvTy = CheckSingleAssignmentConstraints(DestTy, ExprRes); if (ExprRes.isInvalid()) return StmtError(); E = ExprRes.get(); if (DiagnoseAssignmentResult(ConvTy, StarLoc, DestTy, ETy, E, AA_Passing)) return StmtError(); } ExprResult ExprRes = ActOnFinishFullExpr(E); if (ExprRes.isInvalid()) return StmtError(); E = ExprRes.get(); getCurFunction()->setHasIndirectGoto(); return new (Context) IndirectGotoStmt(GotoLoc, StarLoc, E); } static void CheckJumpOutOfSEHFinally(Sema &S, SourceLocation Loc, const Scope &DestScope) { if (!S.CurrentSEHFinally.empty() && DestScope.Contains(*S.CurrentSEHFinally.back())) { S.Diag(Loc, diag::warn_jump_out_of_seh_finally); } } StmtResult Sema::ActOnContinueStmt(SourceLocation ContinueLoc, Scope *CurScope) { Scope *S = CurScope->getContinueParent(); if (!S) { // C99 6.8.6.2p1: A break shall appear only in or as a loop body. return StmtError(Diag(ContinueLoc, diag::err_continue_not_in_loop)); } CheckJumpOutOfSEHFinally(*this, ContinueLoc, *S); return new (Context) ContinueStmt(ContinueLoc); } StmtResult Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) { Scope *S = CurScope->getBreakParent(); if (!S) { // C99 6.8.6.3p1: A break shall appear only in or as a switch/loop body. return StmtError(Diag(BreakLoc, diag::err_break_not_in_loop_or_switch)); } if (S->isOpenMPLoopScope()) return StmtError(Diag(BreakLoc, diag::err_omp_loop_cannot_use_stmt) << "break"); CheckJumpOutOfSEHFinally(*this, BreakLoc, *S); return new (Context) BreakStmt(BreakLoc); } /// \brief Determine whether the given expression is a candidate for /// copy elision in either a return statement or a throw expression. /// /// \param ReturnType If we're determining the copy elision candidate for /// a return statement, this is the return type of the function. If we're /// determining the copy elision candidate for a throw expression, this will /// be a NULL type. /// /// \param E The expression being returned from the function or block, or /// being thrown. /// /// \param AllowParamOrMoveConstructible Whether we allow function parameters or /// id-expressions that could be moved out of the function to be considered NRVO /// candidates. C++ prohibits these for NRVO itself, but we re-use this logic to /// determine whether we should try to move as part of a return or throw (which /// does allow function parameters). /// /// \returns The NRVO candidate variable, if the return statement may use the /// NRVO, or NULL if there is no such candidate. VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType, Expr *E, bool AllowParamOrMoveConstructible) { if (!getLangOpts().CPlusPlus) return nullptr; // - in a return statement in a function [where] ... // ... the expression is the name of a non-volatile automatic object ... DeclRefExpr *DR = dyn_cast(E->IgnoreParens()); if (!DR || DR->refersToEnclosingVariableOrCapture()) return nullptr; VarDecl *VD = dyn_cast(DR->getDecl()); if (!VD) return nullptr; if (isCopyElisionCandidate(ReturnType, VD, AllowParamOrMoveConstructible)) return VD; return nullptr; } bool Sema::isCopyElisionCandidate(QualType ReturnType, const VarDecl *VD, bool AllowParamOrMoveConstructible) { QualType VDType = VD->getType(); // - in a return statement in a function with ... // ... a class return type ... if (!ReturnType.isNull() && !ReturnType->isDependentType()) { if (!ReturnType->isRecordType()) return false; // ... the same cv-unqualified type as the function return type ... // When considering moving this expression out, allow dissimilar types. if (!AllowParamOrMoveConstructible && !VDType->isDependentType() && !Context.hasSameUnqualifiedType(ReturnType, VDType)) return false; } // ...object (other than a function or catch-clause parameter)... if (VD->getKind() != Decl::Var && !(AllowParamOrMoveConstructible && VD->getKind() == Decl::ParmVar)) return false; if (VD->isExceptionVariable()) return false; // ...automatic... if (!VD->hasLocalStorage()) return false; + // Return false if VD is a __block variable. We don't want to implicitly move + // out of a __block variable during a return because we cannot assume the + // variable will no longer be used. + if (VD->hasAttr()) return false; + if (AllowParamOrMoveConstructible) return true; // ...non-volatile... if (VD->getType().isVolatileQualified()) return false; - - // __block variables can't be allocated in a way that permits NRVO. - if (VD->hasAttr()) return false; // Variables with higher required alignment than their type's ABI // alignment cannot use NRVO. if (!VD->getType()->isDependentType() && VD->hasAttr() && Context.getDeclAlign(VD) > Context.getTypeAlignInChars(VD->getType())) return false; return true; } /// \brief Perform the initialization of a potentially-movable value, which /// is the result of return value. /// /// This routine implements C++14 [class.copy]p32, which attempts to treat /// returned lvalues as rvalues in certain cases (to prefer move construction), /// then falls back to treating them as lvalues if that failed. ExprResult Sema::PerformMoveOrCopyInitialization(const InitializedEntity &Entity, const VarDecl *NRVOCandidate, QualType ResultType, Expr *Value, bool AllowNRVO) { // C++14 [class.copy]p32: // When the criteria for elision of a copy/move operation are met, but not for // an exception-declaration, and the object to be copied is designated by an // lvalue, or when the expression in a return statement is a (possibly // parenthesized) id-expression that names an object with automatic storage // duration declared in the body or parameter-declaration-clause of the // innermost enclosing function or lambda-expression, overload resolution to // select the constructor for the copy is first performed as if the object // were designated by an rvalue. ExprResult Res = ExprError(); if (AllowNRVO && !NRVOCandidate) NRVOCandidate = getCopyElisionCandidate(ResultType, Value, true); if (AllowNRVO && NRVOCandidate) { ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), CK_NoOp, Value, VK_XValue); Expr *InitExpr = &AsRvalue; InitializationKind Kind = InitializationKind::CreateCopy( Value->getLocStart(), Value->getLocStart()); InitializationSequence Seq(*this, Entity, Kind, InitExpr); if (Seq) { for (const InitializationSequence::Step &Step : Seq.steps()) { if (!(Step.Kind == InitializationSequence::SK_ConstructorInitialization || (Step.Kind == InitializationSequence::SK_UserConversion && isa(Step.Function.Function)))) continue; CXXConstructorDecl *Constructor = cast(Step.Function.Function); const RValueReferenceType *RRefType = Constructor->getParamDecl(0)->getType() ->getAs(); // [...] If the first overload resolution fails or was not performed, or // if the type of the first parameter of the selected constructor is not // an rvalue reference to the object’s type (possibly cv-qualified), // overload resolution is performed again, considering the object as an // lvalue. if (!RRefType || !Context.hasSameUnqualifiedType(RRefType->getPointeeType(), NRVOCandidate->getType())) break; // Promote "AsRvalue" to the heap, since we now need this // expression node to persist. Value = ImplicitCastExpr::Create(Context, Value->getType(), CK_NoOp, Value, nullptr, VK_XValue); // Complete type-checking the initialization of the return type // using the constructor we found. Res = Seq.Perform(*this, Entity, Kind, Value); } } } // Either we didn't meet the criteria for treating an lvalue as an rvalue, // above, or overload resolution failed. Either way, we need to try // (again) now with the return value expression as written. if (Res.isInvalid()) Res = PerformCopyInitialization(Entity, SourceLocation(), Value); return Res; } /// \brief Determine whether the declared return type of the specified function /// contains 'auto'. static bool hasDeducedReturnType(FunctionDecl *FD) { const FunctionProtoType *FPT = FD->getTypeSourceInfo()->getType()->castAs(); return FPT->getReturnType()->isUndeducedType(); } /// ActOnCapScopeReturnStmt - Utility routine to type-check return statements /// for capturing scopes. /// StmtResult Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // If this is the first return we've seen, infer the return type. // [expr.prim.lambda]p4 in C++11; block literals follow the same rules. CapturingScopeInfo *CurCap = cast(getCurFunction()); QualType FnRetType = CurCap->ReturnType; LambdaScopeInfo *CurLambda = dyn_cast(CurCap); bool HasDeducedReturnType = CurLambda && hasDeducedReturnType(CurLambda->CallOperator); if (ExprEvalContexts.back().Context == DiscardedStatement && (HasDeducedReturnType || CurCap->HasImplicitReturnType)) { if (RetValExp) { ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc); if (ER.isInvalid()) return StmtError(); RetValExp = ER.get(); } return new (Context) ReturnStmt(ReturnLoc, RetValExp, nullptr); } if (HasDeducedReturnType) { // In C++1y, the return type may involve 'auto'. // FIXME: Blocks might have a return type of 'auto' explicitly specified. FunctionDecl *FD = CurLambda->CallOperator; if (CurCap->ReturnType.isNull()) CurCap->ReturnType = FD->getReturnType(); AutoType *AT = CurCap->ReturnType->getContainedAutoType(); assert(AT && "lost auto type from lambda return type"); if (DeduceFunctionTypeFromReturnExpr(FD, ReturnLoc, RetValExp, AT)) { FD->setInvalidDecl(); return StmtError(); } CurCap->ReturnType = FnRetType = FD->getReturnType(); } else if (CurCap->HasImplicitReturnType) { // For blocks/lambdas with implicit return types, we check each return // statement individually, and deduce the common return type when the block // or lambda is completed. // FIXME: Fold this into the 'auto' codepath above. if (RetValExp && !isa(RetValExp)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(RetValExp); if (Result.isInvalid()) return StmtError(); RetValExp = Result.get(); // DR1048: even prior to C++14, we should use the 'auto' deduction rules // when deducing a return type for a lambda-expression (or by extension // for a block). These rules differ from the stated C++11 rules only in // that they remove top-level cv-qualifiers. if (!CurContext->isDependentContext()) FnRetType = RetValExp->getType().getUnqualifiedType(); else FnRetType = CurCap->ReturnType = Context.DependentTy; } else { if (RetValExp) { // C++11 [expr.lambda.prim]p4 bans inferring the result from an // initializer list, because it is not an expression (even // though we represent it as one). We still deduce 'void'. Diag(ReturnLoc, diag::err_lambda_return_init_list) << RetValExp->getSourceRange(); } FnRetType = Context.VoidTy; } // Although we'll properly infer the type of the block once it's completed, // make sure we provide a return type now for better error recovery. if (CurCap->ReturnType.isNull()) CurCap->ReturnType = FnRetType; } assert(!FnRetType.isNull()); if (BlockScopeInfo *CurBlock = dyn_cast(CurCap)) { if (CurBlock->FunctionType->getAs()->getNoReturnAttr()) { Diag(ReturnLoc, diag::err_noreturn_block_has_return_expr); return StmtError(); } } else if (CapturedRegionScopeInfo *CurRegion = dyn_cast(CurCap)) { Diag(ReturnLoc, diag::err_return_in_captured_stmt) << CurRegion->getRegionName(); return StmtError(); } else { assert(CurLambda && "unknown kind of captured scope"); if (CurLambda->CallOperator->getType()->getAs() ->getNoReturnAttr()) { Diag(ReturnLoc, diag::err_noreturn_lambda_has_return_expr); return StmtError(); } } // Otherwise, verify that this result type matches the previous one. We are // pickier with blocks than for normal functions because we don't have GCC // compatibility to worry about here. const VarDecl *NRVOCandidate = nullptr; if (FnRetType->isDependentType()) { // Delay processing for now. TODO: there are lots of dependent // types we can conclusively prove aren't void. } else if (FnRetType->isVoidType()) { if (RetValExp && !isa(RetValExp) && !(getLangOpts().CPlusPlus && (RetValExp->isTypeDependent() || RetValExp->getType()->isVoidType()))) { if (!getLangOpts().CPlusPlus && RetValExp->getType()->isVoidType()) Diag(ReturnLoc, diag::ext_return_has_void_expr) << "literal" << 2; else { Diag(ReturnLoc, diag::err_return_block_has_expr); RetValExp = nullptr; } } } else if (!RetValExp) { return StmtError(Diag(ReturnLoc, diag::err_block_return_missing_expr)); } else if (!RetValExp->isTypeDependent()) { // we have a non-void block with an expression, continue checking // C99 6.8.6.4p3(136): The return statement is not an assignment. The // overlap restriction of subclause 6.5.16.1 does not apply to the case of // function return. // In C++ the return statement is handled via a copy initialization. // the C version of which boils down to CheckSingleAssignmentConstraints. NRVOCandidate = getCopyElisionCandidate(FnRetType, RetValExp, false); InitializedEntity Entity = InitializedEntity::InitializeResult(ReturnLoc, FnRetType, NRVOCandidate != nullptr); ExprResult Res = PerformMoveOrCopyInitialization(Entity, NRVOCandidate, FnRetType, RetValExp); if (Res.isInvalid()) { // FIXME: Cleanup temporaries here, anyway? return StmtError(); } RetValExp = Res.get(); CheckReturnValExpr(RetValExp, FnRetType, ReturnLoc); } else { NRVOCandidate = getCopyElisionCandidate(FnRetType, RetValExp, false); } if (RetValExp) { ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc); if (ER.isInvalid()) return StmtError(); RetValExp = ER.get(); } ReturnStmt *Result = new (Context) ReturnStmt(ReturnLoc, RetValExp, NRVOCandidate); // If we need to check for the named return value optimization, // or if we need to infer the return type, // save the return statement in our scope for later processing. if (CurCap->HasImplicitReturnType || NRVOCandidate) FunctionScopes.back()->Returns.push_back(Result); if (FunctionScopes.back()->FirstReturnLoc.isInvalid()) FunctionScopes.back()->FirstReturnLoc = ReturnLoc; return Result; } namespace { /// \brief Marks all typedefs in all local classes in a type referenced. /// /// In a function like /// auto f() { /// struct S { typedef int a; }; /// return S(); /// } /// /// the local type escapes and could be referenced in some TUs but not in /// others. Pretend that all local typedefs are always referenced, to not warn /// on this. This isn't necessary if f has internal linkage, or the typedef /// is private. class LocalTypedefNameReferencer : public RecursiveASTVisitor { public: LocalTypedefNameReferencer(Sema &S) : S(S) {} bool VisitRecordType(const RecordType *RT); private: Sema &S; }; bool LocalTypedefNameReferencer::VisitRecordType(const RecordType *RT) { auto *R = dyn_cast(RT->getDecl()); if (!R || !R->isLocalClass() || !R->isLocalClass()->isExternallyVisible() || R->isDependentType()) return true; for (auto *TmpD : R->decls()) if (auto *T = dyn_cast(TmpD)) if (T->getAccess() != AS_private || R->hasFriends()) S.MarkAnyDeclReferenced(T->getLocation(), T, /*OdrUse=*/false); return true; } } TypeLoc Sema::getReturnTypeLoc(FunctionDecl *FD) const { TypeLoc TL = FD->getTypeSourceInfo()->getTypeLoc().IgnoreParens(); while (auto ATL = TL.getAs()) TL = ATL.getModifiedLoc().IgnoreParens(); return TL.castAs().getReturnLoc(); } /// Deduce the return type for a function from a returned expression, per /// C++1y [dcl.spec.auto]p6. bool Sema::DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD, SourceLocation ReturnLoc, Expr *&RetExpr, AutoType *AT) { TypeLoc OrigResultType = getReturnTypeLoc(FD); QualType Deduced; if (RetExpr && isa(RetExpr)) { // If the deduction is for a return statement and the initializer is // a braced-init-list, the program is ill-formed. Diag(RetExpr->getExprLoc(), getCurLambda() ? diag::err_lambda_return_init_list : diag::err_auto_fn_return_init_list) << RetExpr->getSourceRange(); return true; } if (FD->isDependentContext()) { // C++1y [dcl.spec.auto]p12: // Return type deduction [...] occurs when the definition is // instantiated even if the function body contains a return // statement with a non-type-dependent operand. assert(AT->isDeduced() && "should have deduced to dependent type"); return false; } if (RetExpr) { // Otherwise, [...] deduce a value for U using the rules of template // argument deduction. DeduceAutoResult DAR = DeduceAutoType(OrigResultType, RetExpr, Deduced); if (DAR == DAR_Failed && !FD->isInvalidDecl()) Diag(RetExpr->getExprLoc(), diag::err_auto_fn_deduction_failure) << OrigResultType.getType() << RetExpr->getType(); if (DAR != DAR_Succeeded) return true; // If a local type is part of the returned type, mark its fields as // referenced. LocalTypedefNameReferencer Referencer(*this); Referencer.TraverseType(RetExpr->getType()); } else { // In the case of a return with no operand, the initializer is considered // to be void(). // // Deduction here can only succeed if the return type is exactly 'cv auto' // or 'decltype(auto)', so just check for that case directly. if (!OrigResultType.getType()->getAs()) { Diag(ReturnLoc, diag::err_auto_fn_return_void_but_not_auto) << OrigResultType.getType(); return true; } // We always deduce U = void in this case. Deduced = SubstAutoType(OrigResultType.getType(), Context.VoidTy); if (Deduced.isNull()) return true; } // If a function with a declared return type that contains a placeholder type // has multiple return statements, the return type is deduced for each return // statement. [...] if the type deduced is not the same in each deduction, // the program is ill-formed. QualType DeducedT = AT->getDeducedType(); if (!DeducedT.isNull() && !FD->isInvalidDecl()) { AutoType *NewAT = Deduced->getContainedAutoType(); // It is possible that NewAT->getDeducedType() is null. When that happens, // we should not crash, instead we ignore this deduction. if (NewAT->getDeducedType().isNull()) return false; CanQualType OldDeducedType = Context.getCanonicalFunctionResultType( DeducedT); CanQualType NewDeducedType = Context.getCanonicalFunctionResultType( NewAT->getDeducedType()); if (!FD->isDependentContext() && OldDeducedType != NewDeducedType) { const LambdaScopeInfo *LambdaSI = getCurLambda(); if (LambdaSI && LambdaSI->HasImplicitReturnType) { Diag(ReturnLoc, diag::err_typecheck_missing_return_type_incompatible) << NewAT->getDeducedType() << DeducedT << true /*IsLambda*/; } else { Diag(ReturnLoc, diag::err_auto_fn_different_deductions) << (AT->isDecltypeAuto() ? 1 : 0) << NewAT->getDeducedType() << DeducedT; } return true; } } else if (!FD->isInvalidDecl()) { // Update all declarations of the function to have the deduced return type. Context.adjustDeducedFunctionResultType(FD, Deduced); } return false; } StmtResult Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, Scope *CurScope) { StmtResult R = BuildReturnStmt(ReturnLoc, RetValExp); if (R.isInvalid() || ExprEvalContexts.back().Context == DiscardedStatement) return R; if (VarDecl *VD = const_cast(cast(R.get())->getNRVOCandidate())) { CurScope->addNRVOCandidate(VD); } else { CurScope->setNoNRVO(); } CheckJumpOutOfSEHFinally(*this, ReturnLoc, *CurScope->getFnParent()); return R; } StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // Check for unexpanded parameter packs. if (RetValExp && DiagnoseUnexpandedParameterPack(RetValExp)) return StmtError(); if (isa(getCurFunction())) return ActOnCapScopeReturnStmt(ReturnLoc, RetValExp); QualType FnRetType; QualType RelatedRetType; const AttrVec *Attrs = nullptr; bool isObjCMethod = false; if (const FunctionDecl *FD = getCurFunctionDecl()) { FnRetType = FD->getReturnType(); if (FD->hasAttrs()) Attrs = &FD->getAttrs(); if (FD->isNoReturn()) Diag(ReturnLoc, diag::warn_noreturn_function_has_return_expr) << FD->getDeclName(); if (FD->isMain() && RetValExp) if (isa(RetValExp)) Diag(ReturnLoc, diag::warn_main_returns_bool_literal) << RetValExp->getSourceRange(); } else if (ObjCMethodDecl *MD = getCurMethodDecl()) { FnRetType = MD->getReturnType(); isObjCMethod = true; if (MD->hasAttrs()) Attrs = &MD->getAttrs(); if (MD->hasRelatedResultType() && MD->getClassInterface()) { // In the implementation of a method with a related return type, the // type used to type-check the validity of return statements within the // method body is a pointer to the type of the class being implemented. RelatedRetType = Context.getObjCInterfaceType(MD->getClassInterface()); RelatedRetType = Context.getObjCObjectPointerType(RelatedRetType); } } else // If we don't have a function/method context, bail. return StmtError(); // C++1z: discarded return statements are not considered when deducing a // return type. if (ExprEvalContexts.back().Context == DiscardedStatement && FnRetType->getContainedAutoType()) { if (RetValExp) { ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc); if (ER.isInvalid()) return StmtError(); RetValExp = ER.get(); } return new (Context) ReturnStmt(ReturnLoc, RetValExp, nullptr); } // FIXME: Add a flag to the ScopeInfo to indicate whether we're performing // deduction. if (getLangOpts().CPlusPlus14) { if (AutoType *AT = FnRetType->getContainedAutoType()) { FunctionDecl *FD = cast(CurContext); if (DeduceFunctionTypeFromReturnExpr(FD, ReturnLoc, RetValExp, AT)) { FD->setInvalidDecl(); return StmtError(); } else { FnRetType = FD->getReturnType(); } } } bool HasDependentReturnType = FnRetType->isDependentType(); ReturnStmt *Result = nullptr; if (FnRetType->isVoidType()) { if (RetValExp) { if (isa(RetValExp)) { // We simply never allow init lists as the return value of void // functions. This is compatible because this was never allowed before, // so there's no legacy code to deal with. NamedDecl *CurDecl = getCurFunctionOrMethodDecl(); int FunctionKind = 0; if (isa(CurDecl)) FunctionKind = 1; else if (isa(CurDecl)) FunctionKind = 2; else if (isa(CurDecl)) FunctionKind = 3; Diag(ReturnLoc, diag::err_return_init_list) << CurDecl->getDeclName() << FunctionKind << RetValExp->getSourceRange(); // Drop the expression. RetValExp = nullptr; } else if (!RetValExp->isTypeDependent()) { // C99 6.8.6.4p1 (ext_ since GCC warns) unsigned D = diag::ext_return_has_expr; if (RetValExp->getType()->isVoidType()) { NamedDecl *CurDecl = getCurFunctionOrMethodDecl(); if (isa(CurDecl) || isa(CurDecl)) D = diag::err_ctor_dtor_returns_void; else D = diag::ext_return_has_void_expr; } else { ExprResult Result = RetValExp; Result = IgnoredValueConversions(Result.get()); if (Result.isInvalid()) return StmtError(); RetValExp = Result.get(); RetValExp = ImpCastExprToType(RetValExp, Context.VoidTy, CK_ToVoid).get(); } // return of void in constructor/destructor is illegal in C++. if (D == diag::err_ctor_dtor_returns_void) { NamedDecl *CurDecl = getCurFunctionOrMethodDecl(); Diag(ReturnLoc, D) << CurDecl->getDeclName() << isa(CurDecl) << RetValExp->getSourceRange(); } // return (some void expression); is legal in C++. else if (D != diag::ext_return_has_void_expr || !getLangOpts().CPlusPlus) { NamedDecl *CurDecl = getCurFunctionOrMethodDecl(); int FunctionKind = 0; if (isa(CurDecl)) FunctionKind = 1; else if (isa(CurDecl)) FunctionKind = 2; else if (isa(CurDecl)) FunctionKind = 3; Diag(ReturnLoc, D) << CurDecl->getDeclName() << FunctionKind << RetValExp->getSourceRange(); } } if (RetValExp) { ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc); if (ER.isInvalid()) return StmtError(); RetValExp = ER.get(); } } Result = new (Context) ReturnStmt(ReturnLoc, RetValExp, nullptr); } else if (!RetValExp && !HasDependentReturnType) { FunctionDecl *FD = getCurFunctionDecl(); unsigned DiagID; if (getLangOpts().CPlusPlus11 && FD && FD->isConstexpr()) { // C++11 [stmt.return]p2 DiagID = diag::err_constexpr_return_missing_expr; FD->setInvalidDecl(); } else if (getLangOpts().C99) { // C99 6.8.6.4p1 (ext_ since GCC warns) DiagID = diag::ext_return_missing_expr; } else { // C90 6.6.6.4p4 DiagID = diag::warn_return_missing_expr; } if (FD) Diag(ReturnLoc, DiagID) << FD->getIdentifier() << 0/*fn*/; else Diag(ReturnLoc, DiagID) << getCurMethodDecl()->getDeclName() << 1/*meth*/; Result = new (Context) ReturnStmt(ReturnLoc); } else { assert(RetValExp || HasDependentReturnType); const VarDecl *NRVOCandidate = nullptr; QualType RetType = RelatedRetType.isNull() ? FnRetType : RelatedRetType; // C99 6.8.6.4p3(136): The return statement is not an assignment. The // overlap restriction of subclause 6.5.16.1 does not apply to the case of // function return. // In C++ the return statement is handled via a copy initialization, // the C version of which boils down to CheckSingleAssignmentConstraints. if (RetValExp) NRVOCandidate = getCopyElisionCandidate(FnRetType, RetValExp, false); if (!HasDependentReturnType && !RetValExp->isTypeDependent()) { // we have a non-void function with an expression, continue checking InitializedEntity Entity = InitializedEntity::InitializeResult(ReturnLoc, RetType, NRVOCandidate != nullptr); ExprResult Res = PerformMoveOrCopyInitialization(Entity, NRVOCandidate, RetType, RetValExp); if (Res.isInvalid()) { // FIXME: Clean up temporaries here anyway? return StmtError(); } RetValExp = Res.getAs(); // If we have a related result type, we need to implicitly // convert back to the formal result type. We can't pretend to // initialize the result again --- we might end double-retaining // --- so instead we initialize a notional temporary. if (!RelatedRetType.isNull()) { Entity = InitializedEntity::InitializeRelatedResult(getCurMethodDecl(), FnRetType); Res = PerformCopyInitialization(Entity, ReturnLoc, RetValExp); if (Res.isInvalid()) { // FIXME: Clean up temporaries here anyway? return StmtError(); } RetValExp = Res.getAs(); } CheckReturnValExpr(RetValExp, FnRetType, ReturnLoc, isObjCMethod, Attrs, getCurFunctionDecl()); } if (RetValExp) { ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc); if (ER.isInvalid()) return StmtError(); RetValExp = ER.get(); } Result = new (Context) ReturnStmt(ReturnLoc, RetValExp, NRVOCandidate); } // If we need to check for the named return value optimization, save the // return statement in our scope for later processing. if (Result->getNRVOCandidate()) FunctionScopes.back()->Returns.push_back(Result); if (FunctionScopes.back()->FirstReturnLoc.isInvalid()) FunctionScopes.back()->FirstReturnLoc = ReturnLoc; return Result; } StmtResult Sema::ActOnObjCAtCatchStmt(SourceLocation AtLoc, SourceLocation RParen, Decl *Parm, Stmt *Body) { VarDecl *Var = cast_or_null(Parm); if (Var && Var->isInvalidDecl()) return StmtError(); return new (Context) ObjCAtCatchStmt(AtLoc, RParen, Var, Body); } StmtResult Sema::ActOnObjCAtFinallyStmt(SourceLocation AtLoc, Stmt *Body) { return new (Context) ObjCAtFinallyStmt(AtLoc, Body); } StmtResult Sema::ActOnObjCAtTryStmt(SourceLocation AtLoc, Stmt *Try, MultiStmtArg CatchStmts, Stmt *Finally) { if (!getLangOpts().ObjCExceptions) Diag(AtLoc, diag::err_objc_exceptions_disabled) << "@try"; getCurFunction()->setHasBranchProtectedScope(); unsigned NumCatchStmts = CatchStmts.size(); return ObjCAtTryStmt::Create(Context, AtLoc, Try, CatchStmts.data(), NumCatchStmts, Finally); } StmtResult Sema::BuildObjCAtThrowStmt(SourceLocation AtLoc, Expr *Throw) { if (Throw) { ExprResult Result = DefaultLvalueConversion(Throw); if (Result.isInvalid()) return StmtError(); Result = ActOnFinishFullExpr(Result.get()); if (Result.isInvalid()) return StmtError(); Throw = Result.get(); QualType ThrowType = Throw->getType(); // Make sure the expression type is an ObjC pointer or "void *". if (!ThrowType->isDependentType() && !ThrowType->isObjCObjectPointerType()) { const PointerType *PT = ThrowType->getAs(); if (!PT || !PT->getPointeeType()->isVoidType()) return StmtError(Diag(AtLoc, diag::err_objc_throw_expects_object) << Throw->getType() << Throw->getSourceRange()); } } return new (Context) ObjCAtThrowStmt(AtLoc, Throw); } StmtResult Sema::ActOnObjCAtThrowStmt(SourceLocation AtLoc, Expr *Throw, Scope *CurScope) { if (!getLangOpts().ObjCExceptions) Diag(AtLoc, diag::err_objc_exceptions_disabled) << "@throw"; if (!Throw) { // @throw without an expression designates a rethrow (which must occur // in the context of an @catch clause). Scope *AtCatchParent = CurScope; while (AtCatchParent && !AtCatchParent->isAtCatchScope()) AtCatchParent = AtCatchParent->getParent(); if (!AtCatchParent) return StmtError(Diag(AtLoc, diag::err_rethrow_used_outside_catch)); } return BuildObjCAtThrowStmt(AtLoc, Throw); } ExprResult Sema::ActOnObjCAtSynchronizedOperand(SourceLocation atLoc, Expr *operand) { ExprResult result = DefaultLvalueConversion(operand); if (result.isInvalid()) return ExprError(); operand = result.get(); // Make sure the expression type is an ObjC pointer or "void *". QualType type = operand->getType(); if (!type->isDependentType() && !type->isObjCObjectPointerType()) { const PointerType *pointerType = type->getAs(); if (!pointerType || !pointerType->getPointeeType()->isVoidType()) { if (getLangOpts().CPlusPlus) { if (RequireCompleteType(atLoc, type, diag::err_incomplete_receiver_type)) return Diag(atLoc, diag::err_objc_synchronized_expects_object) << type << operand->getSourceRange(); ExprResult result = PerformContextuallyConvertToObjCPointer(operand); if (result.isInvalid()) return ExprError(); if (!result.isUsable()) return Diag(atLoc, diag::err_objc_synchronized_expects_object) << type << operand->getSourceRange(); operand = result.get(); } else { return Diag(atLoc, diag::err_objc_synchronized_expects_object) << type << operand->getSourceRange(); } } } // The operand to @synchronized is a full-expression. return ActOnFinishFullExpr(operand); } StmtResult Sema::ActOnObjCAtSynchronizedStmt(SourceLocation AtLoc, Expr *SyncExpr, Stmt *SyncBody) { // We can't jump into or indirect-jump out of a @synchronized block. getCurFunction()->setHasBranchProtectedScope(); return new (Context) ObjCAtSynchronizedStmt(AtLoc, SyncExpr, SyncBody); } /// ActOnCXXCatchBlock - Takes an exception declaration and a handler block /// and creates a proper catch handler from them. StmtResult Sema::ActOnCXXCatchBlock(SourceLocation CatchLoc, Decl *ExDecl, Stmt *HandlerBlock) { // There's nothing to test that ActOnExceptionDecl didn't already test. return new (Context) CXXCatchStmt(CatchLoc, cast_or_null(ExDecl), HandlerBlock); } StmtResult Sema::ActOnObjCAutoreleasePoolStmt(SourceLocation AtLoc, Stmt *Body) { getCurFunction()->setHasBranchProtectedScope(); return new (Context) ObjCAutoreleasePoolStmt(AtLoc, Body); } namespace { class CatchHandlerType { QualType QT; unsigned IsPointer : 1; // This is a special constructor to be used only with DenseMapInfo's // getEmptyKey() and getTombstoneKey() functions. friend struct llvm::DenseMapInfo; enum Unique { ForDenseMap }; CatchHandlerType(QualType QT, Unique) : QT(QT), IsPointer(false) {} public: /// Used when creating a CatchHandlerType from a handler type; will determine /// whether the type is a pointer or reference and will strip off the top /// level pointer and cv-qualifiers. CatchHandlerType(QualType Q) : QT(Q), IsPointer(false) { if (QT->isPointerType()) IsPointer = true; if (IsPointer || QT->isReferenceType()) QT = QT->getPointeeType(); QT = QT.getUnqualifiedType(); } /// Used when creating a CatchHandlerType from a base class type; pretends the /// type passed in had the pointer qualifier, does not need to get an /// unqualified type. CatchHandlerType(QualType QT, bool IsPointer) : QT(QT), IsPointer(IsPointer) {} QualType underlying() const { return QT; } bool isPointer() const { return IsPointer; } friend bool operator==(const CatchHandlerType &LHS, const CatchHandlerType &RHS) { // If the pointer qualification does not match, we can return early. if (LHS.IsPointer != RHS.IsPointer) return false; // Otherwise, check the underlying type without cv-qualifiers. return LHS.QT == RHS.QT; } }; } // namespace namespace llvm { template <> struct DenseMapInfo { static CatchHandlerType getEmptyKey() { return CatchHandlerType(DenseMapInfo::getEmptyKey(), CatchHandlerType::ForDenseMap); } static CatchHandlerType getTombstoneKey() { return CatchHandlerType(DenseMapInfo::getTombstoneKey(), CatchHandlerType::ForDenseMap); } static unsigned getHashValue(const CatchHandlerType &Base) { return DenseMapInfo::getHashValue(Base.underlying()); } static bool isEqual(const CatchHandlerType &LHS, const CatchHandlerType &RHS) { return LHS == RHS; } }; } namespace { class CatchTypePublicBases { ASTContext &Ctx; const llvm::DenseMap &TypesToCheck; const bool CheckAgainstPointer; CXXCatchStmt *FoundHandler; CanQualType FoundHandlerType; public: CatchTypePublicBases( ASTContext &Ctx, const llvm::DenseMap &T, bool C) : Ctx(Ctx), TypesToCheck(T), CheckAgainstPointer(C), FoundHandler(nullptr) {} CXXCatchStmt *getFoundHandler() const { return FoundHandler; } CanQualType getFoundHandlerType() const { return FoundHandlerType; } bool operator()(const CXXBaseSpecifier *S, CXXBasePath &) { if (S->getAccessSpecifier() == AccessSpecifier::AS_public) { CatchHandlerType Check(S->getType(), CheckAgainstPointer); const auto &M = TypesToCheck; auto I = M.find(Check); if (I != M.end()) { FoundHandler = I->second; FoundHandlerType = Ctx.getCanonicalType(S->getType()); return true; } } return false; } }; } /// ActOnCXXTryBlock - Takes a try compound-statement and a number of /// handlers and creates a try statement from them. StmtResult Sema::ActOnCXXTryBlock(SourceLocation TryLoc, Stmt *TryBlock, ArrayRef Handlers) { // Don't report an error if 'try' is used in system headers. if (!getLangOpts().CXXExceptions && !getSourceManager().isInSystemHeader(TryLoc)) Diag(TryLoc, diag::err_exceptions_disabled) << "try"; // Exceptions aren't allowed in CUDA device code. if (getLangOpts().CUDA) CUDADiagIfDeviceCode(TryLoc, diag::err_cuda_device_exceptions) << "try" << CurrentCUDATarget(); if (getCurScope() && getCurScope()->isOpenMPSimdDirectiveScope()) Diag(TryLoc, diag::err_omp_simd_region_cannot_use_stmt) << "try"; sema::FunctionScopeInfo *FSI = getCurFunction(); // C++ try is incompatible with SEH __try. if (!getLangOpts().Borland && FSI->FirstSEHTryLoc.isValid()) { Diag(TryLoc, diag::err_mixing_cxx_try_seh_try); Diag(FSI->FirstSEHTryLoc, diag::note_conflicting_try_here) << "'__try'"; } const unsigned NumHandlers = Handlers.size(); assert(!Handlers.empty() && "The parser shouldn't call this if there are no handlers."); llvm::DenseMap HandledTypes; for (unsigned i = 0; i < NumHandlers; ++i) { CXXCatchStmt *H = cast(Handlers[i]); // Diagnose when the handler is a catch-all handler, but it isn't the last // handler for the try block. [except.handle]p5. Also, skip exception // declarations that are invalid, since we can't usefully report on them. if (!H->getExceptionDecl()) { if (i < NumHandlers - 1) return StmtError(Diag(H->getLocStart(), diag::err_early_catch_all)); continue; } else if (H->getExceptionDecl()->isInvalidDecl()) continue; // Walk the type hierarchy to diagnose when this type has already been // handled (duplication), or cannot be handled (derivation inversion). We // ignore top-level cv-qualifiers, per [except.handle]p3 CatchHandlerType HandlerCHT = (QualType)Context.getCanonicalType(H->getCaughtType()); // We can ignore whether the type is a reference or a pointer; we need the // underlying declaration type in order to get at the underlying record // decl, if there is one. QualType Underlying = HandlerCHT.underlying(); if (auto *RD = Underlying->getAsCXXRecordDecl()) { if (!RD->hasDefinition()) continue; // Check that none of the public, unambiguous base classes are in the // map ([except.handle]p1). Give the base classes the same pointer // qualification as the original type we are basing off of. This allows // comparison against the handler type using the same top-level pointer // as the original type. CXXBasePaths Paths; Paths.setOrigin(RD); CatchTypePublicBases CTPB(Context, HandledTypes, HandlerCHT.isPointer()); if (RD->lookupInBases(CTPB, Paths)) { const CXXCatchStmt *Problem = CTPB.getFoundHandler(); if (!Paths.isAmbiguous(CTPB.getFoundHandlerType())) { Diag(H->getExceptionDecl()->getTypeSpecStartLoc(), diag::warn_exception_caught_by_earlier_handler) << H->getCaughtType(); Diag(Problem->getExceptionDecl()->getTypeSpecStartLoc(), diag::note_previous_exception_handler) << Problem->getCaughtType(); } } } // Add the type the list of ones we have handled; diagnose if we've already // handled it. auto R = HandledTypes.insert(std::make_pair(H->getCaughtType(), H)); if (!R.second) { const CXXCatchStmt *Problem = R.first->second; Diag(H->getExceptionDecl()->getTypeSpecStartLoc(), diag::warn_exception_caught_by_earlier_handler) << H->getCaughtType(); Diag(Problem->getExceptionDecl()->getTypeSpecStartLoc(), diag::note_previous_exception_handler) << Problem->getCaughtType(); } } FSI->setHasCXXTry(TryLoc); return CXXTryStmt::Create(Context, TryLoc, TryBlock, Handlers); } StmtResult Sema::ActOnSEHTryBlock(bool IsCXXTry, SourceLocation TryLoc, Stmt *TryBlock, Stmt *Handler) { assert(TryBlock && Handler); sema::FunctionScopeInfo *FSI = getCurFunction(); // SEH __try is incompatible with C++ try. Borland appears to support this, // however. if (!getLangOpts().Borland) { if (FSI->FirstCXXTryLoc.isValid()) { Diag(TryLoc, diag::err_mixing_cxx_try_seh_try); Diag(FSI->FirstCXXTryLoc, diag::note_conflicting_try_here) << "'try'"; } } FSI->setHasSEHTry(TryLoc); // Reject __try in Obj-C methods, blocks, and captured decls, since we don't // track if they use SEH. DeclContext *DC = CurContext; while (DC && !DC->isFunctionOrMethod()) DC = DC->getParent(); FunctionDecl *FD = dyn_cast_or_null(DC); if (FD) FD->setUsesSEHTry(true); else Diag(TryLoc, diag::err_seh_try_outside_functions); // Reject __try on unsupported targets. if (!Context.getTargetInfo().isSEHTrySupported()) Diag(TryLoc, diag::err_seh_try_unsupported); return SEHTryStmt::Create(Context, IsCXXTry, TryLoc, TryBlock, Handler); } StmtResult Sema::ActOnSEHExceptBlock(SourceLocation Loc, Expr *FilterExpr, Stmt *Block) { assert(FilterExpr && Block); if(!FilterExpr->getType()->isIntegerType()) { return StmtError(Diag(FilterExpr->getExprLoc(), diag::err_filter_expression_integral) << FilterExpr->getType()); } return SEHExceptStmt::Create(Context,Loc,FilterExpr,Block); } void Sema::ActOnStartSEHFinallyBlock() { CurrentSEHFinally.push_back(CurScope); } void Sema::ActOnAbortSEHFinallyBlock() { CurrentSEHFinally.pop_back(); } StmtResult Sema::ActOnFinishSEHFinallyBlock(SourceLocation Loc, Stmt *Block) { assert(Block); CurrentSEHFinally.pop_back(); return SEHFinallyStmt::Create(Context, Loc, Block); } StmtResult Sema::ActOnSEHLeaveStmt(SourceLocation Loc, Scope *CurScope) { Scope *SEHTryParent = CurScope; while (SEHTryParent && !SEHTryParent->isSEHTryScope()) SEHTryParent = SEHTryParent->getParent(); if (!SEHTryParent) return StmtError(Diag(Loc, diag::err_ms___leave_not_in___try)); CheckJumpOutOfSEHFinally(*this, Loc, *SEHTryParent); return new (Context) SEHLeaveStmt(Loc); } StmtResult Sema::BuildMSDependentExistsStmt(SourceLocation KeywordLoc, bool IsIfExists, NestedNameSpecifierLoc QualifierLoc, DeclarationNameInfo NameInfo, Stmt *Nested) { return new (Context) MSDependentExistsStmt(KeywordLoc, IsIfExists, QualifierLoc, NameInfo, cast(Nested)); } StmtResult Sema::ActOnMSDependentExistsStmt(SourceLocation KeywordLoc, bool IsIfExists, CXXScopeSpec &SS, UnqualifiedId &Name, Stmt *Nested) { return BuildMSDependentExistsStmt(KeywordLoc, IsIfExists, SS.getWithLocInContext(Context), GetNameFromUnqualifiedId(Name), Nested); } RecordDecl* Sema::CreateCapturedStmtRecordDecl(CapturedDecl *&CD, SourceLocation Loc, unsigned NumParams) { DeclContext *DC = CurContext; while (!(DC->isFunctionOrMethod() || DC->isRecord() || DC->isFileContext())) DC = DC->getParent(); RecordDecl *RD = nullptr; if (getLangOpts().CPlusPlus) RD = CXXRecordDecl::Create(Context, TTK_Struct, DC, Loc, Loc, /*Id=*/nullptr); else RD = RecordDecl::Create(Context, TTK_Struct, DC, Loc, Loc, /*Id=*/nullptr); RD->setCapturedRecord(); DC->addDecl(RD); RD->setImplicit(); RD->startDefinition(); assert(NumParams > 0 && "CapturedStmt requires context parameter"); CD = CapturedDecl::Create(Context, CurContext, NumParams); DC->addDecl(CD); return RD; } static void buildCapturedStmtCaptureList( SmallVectorImpl &Captures, SmallVectorImpl &CaptureInits, ArrayRef Candidates) { typedef ArrayRef::const_iterator CaptureIter; for (CaptureIter Cap = Candidates.begin(); Cap != Candidates.end(); ++Cap) { if (Cap->isThisCapture()) { Captures.push_back(CapturedStmt::Capture(Cap->getLocation(), CapturedStmt::VCK_This)); CaptureInits.push_back(Cap->getInitExpr()); continue; } else if (Cap->isVLATypeCapture()) { Captures.push_back( CapturedStmt::Capture(Cap->getLocation(), CapturedStmt::VCK_VLAType)); CaptureInits.push_back(nullptr); continue; } Captures.push_back(CapturedStmt::Capture(Cap->getLocation(), Cap->isReferenceCapture() ? CapturedStmt::VCK_ByRef : CapturedStmt::VCK_ByCopy, Cap->getVariable())); CaptureInits.push_back(Cap->getInitExpr()); } } void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope, CapturedRegionKind Kind, unsigned NumParams) { CapturedDecl *CD = nullptr; RecordDecl *RD = CreateCapturedStmtRecordDecl(CD, Loc, NumParams); // Build the context parameter DeclContext *DC = CapturedDecl::castToDeclContext(CD); IdentifierInfo *ParamName = &Context.Idents.get("__context"); QualType ParamType = Context.getPointerType(Context.getTagDeclType(RD)); ImplicitParamDecl *Param = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType); DC->addDecl(Param); CD->setContextParam(0, Param); // Enter the capturing scope for this captured region. PushCapturedRegionScope(CurScope, CD, RD, Kind); if (CurScope) PushDeclContext(CurScope, CD); else CurContext = CD; PushExpressionEvaluationContext(PotentiallyEvaluated); } void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope, CapturedRegionKind Kind, ArrayRef Params) { CapturedDecl *CD = nullptr; RecordDecl *RD = CreateCapturedStmtRecordDecl(CD, Loc, Params.size()); // Build the context parameter DeclContext *DC = CapturedDecl::castToDeclContext(CD); bool ContextIsFound = false; unsigned ParamNum = 0; for (ArrayRef::iterator I = Params.begin(), E = Params.end(); I != E; ++I, ++ParamNum) { if (I->second.isNull()) { assert(!ContextIsFound && "null type has been found already for '__context' parameter"); IdentifierInfo *ParamName = &Context.Idents.get("__context"); QualType ParamType = Context.getPointerType(Context.getTagDeclType(RD)); ImplicitParamDecl *Param = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType); DC->addDecl(Param); CD->setContextParam(ParamNum, Param); ContextIsFound = true; } else { IdentifierInfo *ParamName = &Context.Idents.get(I->first); ImplicitParamDecl *Param = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, I->second); DC->addDecl(Param); CD->setParam(ParamNum, Param); } } assert(ContextIsFound && "no null type for '__context' parameter"); if (!ContextIsFound) { // Add __context implicitly if it is not specified. IdentifierInfo *ParamName = &Context.Idents.get("__context"); QualType ParamType = Context.getPointerType(Context.getTagDeclType(RD)); ImplicitParamDecl *Param = ImplicitParamDecl::Create(Context, DC, Loc, ParamName, ParamType); DC->addDecl(Param); CD->setContextParam(ParamNum, Param); } // Enter the capturing scope for this captured region. PushCapturedRegionScope(CurScope, CD, RD, Kind); if (CurScope) PushDeclContext(CurScope, CD); else CurContext = CD; PushExpressionEvaluationContext(PotentiallyEvaluated); } void Sema::ActOnCapturedRegionError() { DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); CapturedRegionScopeInfo *RSI = getCurCapturedRegion(); RecordDecl *Record = RSI->TheRecordDecl; Record->setInvalidDecl(); SmallVector Fields(Record->fields()); ActOnFields(/*Scope=*/nullptr, Record->getLocation(), Record, Fields, SourceLocation(), SourceLocation(), /*AttributeList=*/nullptr); PopDeclContext(); PopFunctionScopeInfo(); } StmtResult Sema::ActOnCapturedRegionEnd(Stmt *S) { CapturedRegionScopeInfo *RSI = getCurCapturedRegion(); SmallVector Captures; SmallVector CaptureInits; buildCapturedStmtCaptureList(Captures, CaptureInits, RSI->Captures); CapturedDecl *CD = RSI->TheCapturedDecl; RecordDecl *RD = RSI->TheRecordDecl; CapturedStmt *Res = CapturedStmt::Create( getASTContext(), S, static_cast(RSI->CapRegionKind), Captures, CaptureInits, CD, RD); CD->setBody(Res->getCapturedStmt()); RD->completeDefinition(); DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); PopDeclContext(); PopFunctionScopeInfo(); return Res; } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp (revision 313894) @@ -1,1083 +1,1090 @@ //===------- SemaTemplateVariadic.cpp - C++ Variadic Templates ------------===/ // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. //===----------------------------------------------------------------------===/ // // This file implements semantic analysis for C++0x variadic templates. //===----------------------------------------------------------------------===/ #include "clang/Sema/Sema.h" #include "TypeLocBuilder.h" #include "clang/AST/Expr.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" using namespace clang; //---------------------------------------------------------------------------- // Visitor that collects unexpanded parameter packs //---------------------------------------------------------------------------- namespace { /// \brief A class that collects unexpanded parameter packs. class CollectUnexpandedParameterPacksVisitor : public RecursiveASTVisitor { typedef RecursiveASTVisitor inherited; SmallVectorImpl &Unexpanded; bool InLambda; public: explicit CollectUnexpandedParameterPacksVisitor( SmallVectorImpl &Unexpanded) : Unexpanded(Unexpanded), InLambda(false) { } bool shouldWalkTypesOfTypeLocs() const { return false; } //------------------------------------------------------------------------ // Recording occurrences of (unexpanded) parameter packs. //------------------------------------------------------------------------ /// \brief Record occurrences of template type parameter packs. bool VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) { if (TL.getTypePtr()->isParameterPack()) Unexpanded.push_back(std::make_pair(TL.getTypePtr(), TL.getNameLoc())); return true; } /// \brief Record occurrences of template type parameter packs /// when we don't have proper source-location information for /// them. /// /// Ideally, this routine would never be used. bool VisitTemplateTypeParmType(TemplateTypeParmType *T) { if (T->isParameterPack()) Unexpanded.push_back(std::make_pair(T, SourceLocation())); return true; } /// \brief Record occurrences of function and non-type template /// parameter packs in an expression. bool VisitDeclRefExpr(DeclRefExpr *E) { if (E->getDecl()->isParameterPack()) Unexpanded.push_back(std::make_pair(E->getDecl(), E->getLocation())); return true; } /// \brief Record occurrences of template template parameter packs. bool TraverseTemplateName(TemplateName Template) { if (TemplateTemplateParmDecl *TTP = dyn_cast_or_null( Template.getAsTemplateDecl())) if (TTP->isParameterPack()) Unexpanded.push_back(std::make_pair(TTP, SourceLocation())); return inherited::TraverseTemplateName(Template); } /// \brief Suppress traversal into Objective-C container literal /// elements that are pack expansions. bool TraverseObjCDictionaryLiteral(ObjCDictionaryLiteral *E) { if (!E->containsUnexpandedParameterPack()) return true; for (unsigned I = 0, N = E->getNumElements(); I != N; ++I) { ObjCDictionaryElement Element = E->getKeyValueElement(I); if (Element.isPackExpansion()) continue; TraverseStmt(Element.Key); TraverseStmt(Element.Value); } return true; } //------------------------------------------------------------------------ // Pruning the search for unexpanded parameter packs. //------------------------------------------------------------------------ /// \brief Suppress traversal into statements and expressions that /// do not contain unexpanded parameter packs. bool TraverseStmt(Stmt *S) { Expr *E = dyn_cast_or_null(S); if ((E && E->containsUnexpandedParameterPack()) || InLambda) return inherited::TraverseStmt(S); return true; } /// \brief Suppress traversal into types that do not contain /// unexpanded parameter packs. bool TraverseType(QualType T) { if ((!T.isNull() && T->containsUnexpandedParameterPack()) || InLambda) return inherited::TraverseType(T); return true; } /// \brief Suppress traversel into types with location information /// that do not contain unexpanded parameter packs. bool TraverseTypeLoc(TypeLoc TL) { if ((!TL.getType().isNull() && TL.getType()->containsUnexpandedParameterPack()) || InLambda) return inherited::TraverseTypeLoc(TL); return true; } /// \brief Suppress traversal of non-parameter declarations, since /// they cannot contain unexpanded parameter packs. bool TraverseDecl(Decl *D) { if ((D && isa(D)) || InLambda) return inherited::TraverseDecl(D); return true; } /// \brief Suppress traversal of template argument pack expansions. bool TraverseTemplateArgument(const TemplateArgument &Arg) { if (Arg.isPackExpansion()) return true; return inherited::TraverseTemplateArgument(Arg); } /// \brief Suppress traversal of template argument pack expansions. bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc) { if (ArgLoc.getArgument().isPackExpansion()) return true; return inherited::TraverseTemplateArgumentLoc(ArgLoc); } /// \brief Note whether we're traversing a lambda containing an unexpanded /// parameter pack. In this case, the unexpanded pack can occur anywhere, /// including all the places where we normally wouldn't look. Within a /// lambda, we don't propagate the 'contains unexpanded parameter pack' bit /// outside an expression. bool TraverseLambdaExpr(LambdaExpr *Lambda) { // The ContainsUnexpandedParameterPack bit on a lambda is always correct, // even if it's contained within another lambda. if (!Lambda->containsUnexpandedParameterPack()) return true; bool WasInLambda = InLambda; InLambda = true; // If any capture names a function parameter pack, that pack is expanded // when the lambda is expanded. for (LambdaExpr::capture_iterator I = Lambda->capture_begin(), E = Lambda->capture_end(); I != E; ++I) { if (I->capturesVariable()) { VarDecl *VD = I->getCapturedVar(); if (VD->isParameterPack()) Unexpanded.push_back(std::make_pair(VD, I->getLocation())); } } inherited::TraverseLambdaExpr(Lambda); InLambda = WasInLambda; return true; } }; } /// \brief Determine whether it's possible for an unexpanded parameter pack to /// be valid in this location. This only happens when we're in a declaration /// that is nested within an expression that could be expanded, such as a /// lambda-expression within a function call. /// /// This is conservatively correct, but may claim that some unexpanded packs are /// permitted when they are not. bool Sema::isUnexpandedParameterPackPermitted() { for (auto *SI : FunctionScopes) if (isa(SI)) return true; return false; } /// \brief Diagnose all of the unexpanded parameter packs in the given /// vector. bool Sema::DiagnoseUnexpandedParameterPacks(SourceLocation Loc, UnexpandedParameterPackContext UPPC, ArrayRef Unexpanded) { if (Unexpanded.empty()) return false; // If we are within a lambda expression, that lambda contains an unexpanded // parameter pack, and we are done. // FIXME: Store 'Unexpanded' on the lambda so we don't need to recompute it // later. for (unsigned N = FunctionScopes.size(); N; --N) { if (sema::LambdaScopeInfo *LSI = dyn_cast(FunctionScopes[N-1])) { LSI->ContainsUnexpandedParameterPack = true; return false; } } SmallVector Locations; SmallVector Names; llvm::SmallPtrSet NamesKnown; for (unsigned I = 0, N = Unexpanded.size(); I != N; ++I) { IdentifierInfo *Name = nullptr; if (const TemplateTypeParmType *TTP = Unexpanded[I].first.dyn_cast()) Name = TTP->getIdentifier(); else Name = Unexpanded[I].first.get()->getIdentifier(); if (Name && NamesKnown.insert(Name).second) Names.push_back(Name); if (Unexpanded[I].second.isValid()) Locations.push_back(Unexpanded[I].second); } DiagnosticBuilder DB = Diag(Loc, diag::err_unexpanded_parameter_pack) << (int)UPPC << (int)Names.size(); for (size_t I = 0, E = std::min(Names.size(), (size_t)2); I != E; ++I) DB << Names[I]; for (unsigned I = 0, N = Locations.size(); I != N; ++I) DB << SourceRange(Locations[I]); return true; } bool Sema::DiagnoseUnexpandedParameterPack(SourceLocation Loc, TypeSourceInfo *T, UnexpandedParameterPackContext UPPC) { // C++0x [temp.variadic]p5: // An appearance of a name of a parameter pack that is not expanded is // ill-formed. if (!T->getType()->containsUnexpandedParameterPack()) return false; SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseTypeLoc( T->getTypeLoc()); assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs"); return DiagnoseUnexpandedParameterPacks(Loc, UPPC, Unexpanded); } bool Sema::DiagnoseUnexpandedParameterPack(Expr *E, UnexpandedParameterPackContext UPPC) { // C++0x [temp.variadic]p5: // An appearance of a name of a parameter pack that is not expanded is // ill-formed. if (!E->containsUnexpandedParameterPack()) return false; SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseStmt(E); assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs"); return DiagnoseUnexpandedParameterPacks(E->getLocStart(), UPPC, Unexpanded); } bool Sema::DiagnoseUnexpandedParameterPack(const CXXScopeSpec &SS, UnexpandedParameterPackContext UPPC) { // C++0x [temp.variadic]p5: // An appearance of a name of a parameter pack that is not expanded is // ill-formed. if (!SS.getScopeRep() || !SS.getScopeRep()->containsUnexpandedParameterPack()) return false; SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseNestedNameSpecifier(SS.getScopeRep()); assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs"); return DiagnoseUnexpandedParameterPacks(SS.getRange().getBegin(), UPPC, Unexpanded); } bool Sema::DiagnoseUnexpandedParameterPack(const DeclarationNameInfo &NameInfo, UnexpandedParameterPackContext UPPC) { // C++0x [temp.variadic]p5: // An appearance of a name of a parameter pack that is not expanded is // ill-formed. switch (NameInfo.getName().getNameKind()) { case DeclarationName::Identifier: case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: case DeclarationName::CXXOperatorName: case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXUsingDirective: return false; case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: // FIXME: We shouldn't need this null check! if (TypeSourceInfo *TSInfo = NameInfo.getNamedTypeInfo()) return DiagnoseUnexpandedParameterPack(NameInfo.getLoc(), TSInfo, UPPC); if (!NameInfo.getName().getCXXNameType()->containsUnexpandedParameterPack()) return false; break; } SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseType(NameInfo.getName().getCXXNameType()); assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs"); return DiagnoseUnexpandedParameterPacks(NameInfo.getLoc(), UPPC, Unexpanded); } bool Sema::DiagnoseUnexpandedParameterPack(SourceLocation Loc, TemplateName Template, UnexpandedParameterPackContext UPPC) { if (Template.isNull() || !Template.containsUnexpandedParameterPack()) return false; SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseTemplateName(Template); assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs"); return DiagnoseUnexpandedParameterPacks(Loc, UPPC, Unexpanded); } bool Sema::DiagnoseUnexpandedParameterPack(TemplateArgumentLoc Arg, UnexpandedParameterPackContext UPPC) { if (Arg.getArgument().isNull() || !Arg.getArgument().containsUnexpandedParameterPack()) return false; SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseTemplateArgumentLoc(Arg); assert(!Unexpanded.empty() && "Unable to find unexpanded parameter packs"); return DiagnoseUnexpandedParameterPacks(Arg.getLocation(), UPPC, Unexpanded); } void Sema::collectUnexpandedParameterPacks(TemplateArgument Arg, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseTemplateArgument(Arg); } void Sema::collectUnexpandedParameterPacks(TemplateArgumentLoc Arg, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseTemplateArgumentLoc(Arg); } void Sema::collectUnexpandedParameterPacks(QualType T, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseType(T); } void Sema::collectUnexpandedParameterPacks(TypeLoc TL, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseTypeLoc(TL); } void Sema::collectUnexpandedParameterPacks( NestedNameSpecifierLoc NNS, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseNestedNameSpecifierLoc(NNS); } void Sema::collectUnexpandedParameterPacks( const DeclarationNameInfo &NameInfo, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseDeclarationNameInfo(NameInfo); } ParsedTemplateArgument Sema::ActOnPackExpansion(const ParsedTemplateArgument &Arg, SourceLocation EllipsisLoc) { if (Arg.isInvalid()) return Arg; switch (Arg.getKind()) { case ParsedTemplateArgument::Type: { TypeResult Result = ActOnPackExpansion(Arg.getAsType(), EllipsisLoc); if (Result.isInvalid()) return ParsedTemplateArgument(); return ParsedTemplateArgument(Arg.getKind(), Result.get().getAsOpaquePtr(), Arg.getLocation()); } case ParsedTemplateArgument::NonType: { ExprResult Result = ActOnPackExpansion(Arg.getAsExpr(), EllipsisLoc); if (Result.isInvalid()) return ParsedTemplateArgument(); return ParsedTemplateArgument(Arg.getKind(), Result.get(), Arg.getLocation()); } case ParsedTemplateArgument::Template: if (!Arg.getAsTemplate().get().containsUnexpandedParameterPack()) { SourceRange R(Arg.getLocation()); if (Arg.getScopeSpec().isValid()) R.setBegin(Arg.getScopeSpec().getBeginLoc()); Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) << R; return ParsedTemplateArgument(); } return Arg.getTemplatePackExpansion(EllipsisLoc); } llvm_unreachable("Unhandled template argument kind?"); } TypeResult Sema::ActOnPackExpansion(ParsedType Type, SourceLocation EllipsisLoc) { TypeSourceInfo *TSInfo; GetTypeFromParser(Type, &TSInfo); if (!TSInfo) return true; TypeSourceInfo *TSResult = CheckPackExpansion(TSInfo, EllipsisLoc, None); if (!TSResult) return true; return CreateParsedType(TSResult->getType(), TSResult); } TypeSourceInfo * Sema::CheckPackExpansion(TypeSourceInfo *Pattern, SourceLocation EllipsisLoc, Optional NumExpansions) { // Create the pack expansion type and source-location information. QualType Result = CheckPackExpansion(Pattern->getType(), Pattern->getTypeLoc().getSourceRange(), EllipsisLoc, NumExpansions); if (Result.isNull()) return nullptr; TypeLocBuilder TLB; TLB.pushFullCopy(Pattern->getTypeLoc()); PackExpansionTypeLoc TL = TLB.push(Result); TL.setEllipsisLoc(EllipsisLoc); return TLB.getTypeSourceInfo(Context, Result); } QualType Sema::CheckPackExpansion(QualType Pattern, SourceRange PatternRange, SourceLocation EllipsisLoc, Optional NumExpansions) { // C++0x [temp.variadic]p5: // The pattern of a pack expansion shall name one or more // parameter packs that are not expanded by a nested pack // expansion. if (!Pattern->containsUnexpandedParameterPack()) { Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) << PatternRange; return QualType(); } return Context.getPackExpansionType(Pattern, NumExpansions); } ExprResult Sema::ActOnPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc) { return CheckPackExpansion(Pattern, EllipsisLoc, None); } ExprResult Sema::CheckPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc, Optional NumExpansions) { if (!Pattern) return ExprError(); // C++0x [temp.variadic]p5: // The pattern of a pack expansion shall name one or more // parameter packs that are not expanded by a nested pack // expansion. if (!Pattern->containsUnexpandedParameterPack()) { Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) << Pattern->getSourceRange(); return ExprError(); } // Create the pack expansion expression and source-location information. return new (Context) PackExpansionExpr(Context.DependentTy, Pattern, EllipsisLoc, NumExpansions); } /// \brief Retrieve the depth and index of a parameter pack. static std::pair getDepthAndIndex(NamedDecl *ND) { if (TemplateTypeParmDecl *TTP = dyn_cast(ND)) return std::make_pair(TTP->getDepth(), TTP->getIndex()); if (NonTypeTemplateParmDecl *NTTP = dyn_cast(ND)) return std::make_pair(NTTP->getDepth(), NTTP->getIndex()); TemplateTemplateParmDecl *TTP = cast(ND); return std::make_pair(TTP->getDepth(), TTP->getIndex()); } bool Sema::CheckParameterPacksForExpansion( SourceLocation EllipsisLoc, SourceRange PatternRange, ArrayRef Unexpanded, const MultiLevelTemplateArgumentList &TemplateArgs, bool &ShouldExpand, bool &RetainExpansion, Optional &NumExpansions) { ShouldExpand = true; RetainExpansion = false; std::pair FirstPack; bool HaveFirstPack = false; for (ArrayRef::iterator i = Unexpanded.begin(), end = Unexpanded.end(); i != end; ++i) { // Compute the depth and index for this parameter pack. unsigned Depth = 0, Index = 0; IdentifierInfo *Name; bool IsFunctionParameterPack = false; if (const TemplateTypeParmType *TTP = i->first.dyn_cast()) { Depth = TTP->getDepth(); Index = TTP->getIndex(); Name = TTP->getIdentifier(); } else { NamedDecl *ND = i->first.get(); if (isa(ND)) IsFunctionParameterPack = true; else std::tie(Depth, Index) = getDepthAndIndex(ND); Name = ND->getIdentifier(); } // Determine the size of this argument pack. unsigned NewPackSize; if (IsFunctionParameterPack) { // Figure out whether we're instantiating to an argument pack or not. typedef LocalInstantiationScope::DeclArgumentPack DeclArgumentPack; llvm::PointerUnion *Instantiation = CurrentInstantiationScope->findInstantiationOf( i->first.get()); if (Instantiation->is()) { // We could expand this function parameter pack. NewPackSize = Instantiation->get()->size(); } else { // We can't expand this function parameter pack, so we can't expand // the pack expansion. ShouldExpand = false; continue; } } else { // If we don't have a template argument at this depth/index, then we // cannot expand the pack expansion. Make a note of this, but we still // want to check any parameter packs we *do* have arguments for. if (Depth >= TemplateArgs.getNumLevels() || !TemplateArgs.hasTemplateArgument(Depth, Index)) { ShouldExpand = false; continue; } // Determine the size of the argument pack. NewPackSize = TemplateArgs(Depth, Index).pack_size(); } // C++0x [temp.arg.explicit]p9: // Template argument deduction can extend the sequence of template // arguments corresponding to a template parameter pack, even when the // sequence contains explicitly specified template arguments. if (!IsFunctionParameterPack && CurrentInstantiationScope) { if (NamedDecl *PartialPack = CurrentInstantiationScope->getPartiallySubstitutedPack()){ unsigned PartialDepth, PartialIndex; std::tie(PartialDepth, PartialIndex) = getDepthAndIndex(PartialPack); if (PartialDepth == Depth && PartialIndex == Index) RetainExpansion = true; } } if (!NumExpansions) { // The is the first pack we've seen for which we have an argument. // Record it. NumExpansions = NewPackSize; FirstPack.first = Name; FirstPack.second = i->second; HaveFirstPack = true; continue; } if (NewPackSize != *NumExpansions) { // C++0x [temp.variadic]p5: // All of the parameter packs expanded by a pack expansion shall have // the same number of arguments specified. if (HaveFirstPack) Diag(EllipsisLoc, diag::err_pack_expansion_length_conflict) << FirstPack.first << Name << *NumExpansions << NewPackSize << SourceRange(FirstPack.second) << SourceRange(i->second); else Diag(EllipsisLoc, diag::err_pack_expansion_length_conflict_multilevel) << Name << *NumExpansions << NewPackSize << SourceRange(i->second); return true; } } return false; } Optional Sema::getNumArgumentsInExpansion(QualType T, const MultiLevelTemplateArgumentList &TemplateArgs) { QualType Pattern = cast(T)->getPattern(); SmallVector Unexpanded; CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseType(Pattern); Optional Result; for (unsigned I = 0, N = Unexpanded.size(); I != N; ++I) { // Compute the depth and index for this parameter pack. unsigned Depth; unsigned Index; if (const TemplateTypeParmType *TTP = Unexpanded[I].first.dyn_cast()) { Depth = TTP->getDepth(); Index = TTP->getIndex(); } else { NamedDecl *ND = Unexpanded[I].first.get(); if (isa(ND)) { // Function parameter pack. typedef LocalInstantiationScope::DeclArgumentPack DeclArgumentPack; llvm::PointerUnion *Instantiation = CurrentInstantiationScope->findInstantiationOf( Unexpanded[I].first.get()); if (Instantiation->is()) // The pattern refers to an unexpanded pack. We're not ready to expand // this pack yet. return None; unsigned Size = Instantiation->get()->size(); assert((!Result || *Result == Size) && "inconsistent pack sizes"); Result = Size; continue; } std::tie(Depth, Index) = getDepthAndIndex(ND); } if (Depth >= TemplateArgs.getNumLevels() || !TemplateArgs.hasTemplateArgument(Depth, Index)) // The pattern refers to an unknown template argument. We're not ready to // expand this pack yet. return None; // Determine the size of the argument pack. unsigned Size = TemplateArgs(Depth, Index).pack_size(); assert((!Result || *Result == Size) && "inconsistent pack sizes"); Result = Size; } return Result; } bool Sema::containsUnexpandedParameterPacks(Declarator &D) { const DeclSpec &DS = D.getDeclSpec(); switch (DS.getTypeSpecType()) { case TST_typename: case TST_typeofType: case TST_underlyingType: case TST_atomic: { QualType T = DS.getRepAsType().get(); if (!T.isNull() && T->containsUnexpandedParameterPack()) return true; break; } case TST_typeofExpr: case TST_decltype: if (DS.getRepAsExpr() && DS.getRepAsExpr()->containsUnexpandedParameterPack()) return true; break; case TST_unspecified: case TST_void: case TST_char: case TST_wchar: case TST_char16: case TST_char32: case TST_int: case TST_int128: case TST_half: case TST_float: case TST_double: case TST_float128: case TST_bool: case TST_decimal32: case TST_decimal64: case TST_decimal128: case TST_enum: case TST_union: case TST_struct: case TST_interface: case TST_class: case TST_auto: case TST_auto_type: case TST_decltype_auto: #define GENERIC_IMAGE_TYPE(ImgType, Id) case TST_##ImgType##_t: #include "clang/Basic/OpenCLImageTypes.def" case TST_unknown_anytype: case TST_error: break; } for (unsigned I = 0, N = D.getNumTypeObjects(); I != N; ++I) { const DeclaratorChunk &Chunk = D.getTypeObject(I); switch (Chunk.Kind) { case DeclaratorChunk::Pointer: case DeclaratorChunk::Reference: case DeclaratorChunk::Paren: case DeclaratorChunk::Pipe: case DeclaratorChunk::BlockPointer: // These declarator chunks cannot contain any parameter packs. break; case DeclaratorChunk::Array: if (Chunk.Arr.NumElts && Chunk.Arr.NumElts->containsUnexpandedParameterPack()) return true; break; case DeclaratorChunk::Function: for (unsigned i = 0, e = Chunk.Fun.NumParams; i != e; ++i) { ParmVarDecl *Param = cast(Chunk.Fun.Params[i].Param); QualType ParamTy = Param->getType(); assert(!ParamTy.isNull() && "Couldn't parse type?"); if (ParamTy->containsUnexpandedParameterPack()) return true; } if (Chunk.Fun.getExceptionSpecType() == EST_Dynamic) { for (unsigned i = 0; i != Chunk.Fun.getNumExceptions(); ++i) { if (Chunk.Fun.Exceptions[i] .Ty.get() ->containsUnexpandedParameterPack()) return true; } } else if (Chunk.Fun.getExceptionSpecType() == EST_ComputedNoexcept && Chunk.Fun.NoexceptExpr->containsUnexpandedParameterPack()) return true; if (Chunk.Fun.hasTrailingReturnType()) { QualType T = Chunk.Fun.getTrailingReturnType().get(); if (!T.isNull() && T->containsUnexpandedParameterPack()) return true; } break; case DeclaratorChunk::MemberPointer: if (Chunk.Mem.Scope().getScopeRep() && Chunk.Mem.Scope().getScopeRep()->containsUnexpandedParameterPack()) return true; break; } } return false; } namespace { // Callback to only accept typo corrections that refer to parameter packs. class ParameterPackValidatorCCC : public CorrectionCandidateCallback { public: bool ValidateCandidate(const TypoCorrection &candidate) override { NamedDecl *ND = candidate.getCorrectionDecl(); return ND && ND->isParameterPack(); } }; } /// \brief Called when an expression computing the size of a parameter pack /// is parsed. /// /// \code /// template struct count { /// static const unsigned value = sizeof...(Types); /// }; /// \endcode /// // /// \param OpLoc The location of the "sizeof" keyword. /// \param Name The name of the parameter pack whose size will be determined. /// \param NameLoc The source location of the name of the parameter pack. /// \param RParenLoc The location of the closing parentheses. ExprResult Sema::ActOnSizeofParameterPackExpr(Scope *S, SourceLocation OpLoc, IdentifierInfo &Name, SourceLocation NameLoc, SourceLocation RParenLoc) { // C++0x [expr.sizeof]p5: // The identifier in a sizeof... expression shall name a parameter pack. LookupResult R(*this, &Name, NameLoc, LookupOrdinaryName); LookupName(R, S); NamedDecl *ParameterPack = nullptr; switch (R.getResultKind()) { case LookupResult::Found: ParameterPack = R.getFoundDecl(); break; case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: if (TypoCorrection Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, nullptr, llvm::make_unique(), CTK_ErrorRecovery)) { diagnoseTypo(Corrected, PDiag(diag::err_sizeof_pack_no_pack_name_suggest) << &Name, PDiag(diag::note_parameter_pack_here)); ParameterPack = Corrected.getCorrectionDecl(); } case LookupResult::FoundOverloaded: case LookupResult::FoundUnresolvedValue: break; case LookupResult::Ambiguous: DiagnoseAmbiguousLookup(R); return ExprError(); } if (!ParameterPack || !ParameterPack->isParameterPack()) { Diag(NameLoc, diag::err_sizeof_pack_no_pack_name) << &Name; return ExprError(); } MarkAnyDeclReferenced(OpLoc, ParameterPack, true); return SizeOfPackExpr::Create(Context, OpLoc, ParameterPack, NameLoc, RParenLoc); } TemplateArgumentLoc Sema::getTemplateArgumentPackExpansionPattern( TemplateArgumentLoc OrigLoc, SourceLocation &Ellipsis, Optional &NumExpansions) const { const TemplateArgument &Argument = OrigLoc.getArgument(); assert(Argument.isPackExpansion()); switch (Argument.getKind()) { case TemplateArgument::Type: { // FIXME: We shouldn't ever have to worry about missing // type-source info! TypeSourceInfo *ExpansionTSInfo = OrigLoc.getTypeSourceInfo(); if (!ExpansionTSInfo) ExpansionTSInfo = Context.getTrivialTypeSourceInfo(Argument.getAsType(), Ellipsis); PackExpansionTypeLoc Expansion = ExpansionTSInfo->getTypeLoc().castAs(); Ellipsis = Expansion.getEllipsisLoc(); TypeLoc Pattern = Expansion.getPatternLoc(); NumExpansions = Expansion.getTypePtr()->getNumExpansions(); // We need to copy the TypeLoc because TemplateArgumentLocs store a // TypeSourceInfo. // FIXME: Find some way to avoid the copy? TypeLocBuilder TLB; TLB.pushFullCopy(Pattern); TypeSourceInfo *PatternTSInfo = TLB.getTypeSourceInfo(Context, Pattern.getType()); return TemplateArgumentLoc(TemplateArgument(Pattern.getType()), PatternTSInfo); } case TemplateArgument::Expression: { PackExpansionExpr *Expansion = cast(Argument.getAsExpr()); Expr *Pattern = Expansion->getPattern(); Ellipsis = Expansion->getEllipsisLoc(); NumExpansions = Expansion->getNumExpansions(); return TemplateArgumentLoc(Pattern, Pattern); } case TemplateArgument::TemplateExpansion: Ellipsis = OrigLoc.getTemplateEllipsisLoc(); NumExpansions = Argument.getNumTemplateExpansions(); return TemplateArgumentLoc(Argument.getPackExpansionPattern(), OrigLoc.getTemplateQualifierLoc(), OrigLoc.getTemplateNameLoc()); case TemplateArgument::Declaration: case TemplateArgument::NullPtr: case TemplateArgument::Template: case TemplateArgument::Integral: case TemplateArgument::Pack: case TemplateArgument::Null: return TemplateArgumentLoc(); } llvm_unreachable("Invalid TemplateArgument Kind!"); } Optional Sema::getFullyPackExpandedSize(TemplateArgument Arg) { assert(Arg.containsUnexpandedParameterPack()); // If this is a substituted pack, grab that pack. If not, we don't know // the size yet. // FIXME: We could find a size in more cases by looking for a substituted // pack anywhere within this argument, but that's not necessary in the common // case for 'sizeof...(A)' handling. TemplateArgument Pack; switch (Arg.getKind()) { case TemplateArgument::Type: if (auto *Subst = Arg.getAsType()->getAs()) Pack = Subst->getArgumentPack(); else return None; break; case TemplateArgument::Expression: if (auto *Subst = dyn_cast(Arg.getAsExpr())) Pack = Subst->getArgumentPack(); else if (auto *Subst = dyn_cast(Arg.getAsExpr())) { for (ParmVarDecl *PD : *Subst) if (PD->isParameterPack()) return None; return Subst->getNumExpansions(); } else return None; break; case TemplateArgument::Template: if (SubstTemplateTemplateParmPackStorage *Subst = Arg.getAsTemplate().getAsSubstTemplateTemplateParmPack()) Pack = Subst->getArgumentPack(); else return None; break; case TemplateArgument::Declaration: case TemplateArgument::NullPtr: case TemplateArgument::TemplateExpansion: case TemplateArgument::Integral: case TemplateArgument::Pack: case TemplateArgument::Null: return None; } // Check that no argument in the pack is itself a pack expansion. for (TemplateArgument Elem : Pack.pack_elements()) { // There's no point recursing in this case; we would have already // expanded this pack expansion into the enclosing pack if we could. if (Elem.isPackExpansion()) return None; } return Pack.pack_size(); } static void CheckFoldOperand(Sema &S, Expr *E) { if (!E) return; E = E->IgnoreImpCasts(); auto *OCE = dyn_cast(E); if ((OCE && OCE->isInfixBinaryOp()) || isa(E) || isa(E)) { S.Diag(E->getExprLoc(), diag::err_fold_expression_bad_operand) << E->getSourceRange() << FixItHint::CreateInsertion(E->getLocStart(), "(") << FixItHint::CreateInsertion(E->getLocEnd(), ")"); } } ExprResult Sema::ActOnCXXFoldExpr(SourceLocation LParenLoc, Expr *LHS, tok::TokenKind Operator, SourceLocation EllipsisLoc, Expr *RHS, SourceLocation RParenLoc) { // LHS and RHS must be cast-expressions. We allow an arbitrary expression // in the parser and reduce down to just cast-expressions here. CheckFoldOperand(*this, LHS); CheckFoldOperand(*this, RHS); + auto DiscardOperands = [&] { + CorrectDelayedTyposInExpr(LHS); + CorrectDelayedTyposInExpr(RHS); + }; + // [expr.prim.fold]p3: // In a binary fold, op1 and op2 shall be the same fold-operator, and // either e1 shall contain an unexpanded parameter pack or e2 shall contain // an unexpanded parameter pack, but not both. if (LHS && RHS && LHS->containsUnexpandedParameterPack() == RHS->containsUnexpandedParameterPack()) { + DiscardOperands(); return Diag(EllipsisLoc, LHS->containsUnexpandedParameterPack() ? diag::err_fold_expression_packs_both_sides : diag::err_pack_expansion_without_parameter_packs) << LHS->getSourceRange() << RHS->getSourceRange(); } // [expr.prim.fold]p2: // In a unary fold, the cast-expression shall contain an unexpanded // parameter pack. if (!LHS || !RHS) { Expr *Pack = LHS ? LHS : RHS; assert(Pack && "fold expression with neither LHS nor RHS"); + DiscardOperands(); if (!Pack->containsUnexpandedParameterPack()) return Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) << Pack->getSourceRange(); } BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Operator); return BuildCXXFoldExpr(LParenLoc, LHS, Opc, EllipsisLoc, RHS, RParenLoc); } ExprResult Sema::BuildCXXFoldExpr(SourceLocation LParenLoc, Expr *LHS, BinaryOperatorKind Operator, SourceLocation EllipsisLoc, Expr *RHS, SourceLocation RParenLoc) { return new (Context) CXXFoldExpr(Context.DependentTy, LParenLoc, LHS, Operator, EllipsisLoc, RHS, RParenLoc); } ExprResult Sema::BuildEmptyCXXFoldExpr(SourceLocation EllipsisLoc, BinaryOperatorKind Operator) { // [temp.variadic]p9: // If N is zero for a unary fold-expression, the value of the expression is // && -> true // || -> false // , -> void() // if the operator is not listed [above], the instantiation is ill-formed. // // Note that we need to use something like int() here, not merely 0, to // prevent the result from being a null pointer constant. QualType ScalarType; switch (Operator) { case BO_LOr: return ActOnCXXBoolLiteral(EllipsisLoc, tok::kw_false); case BO_LAnd: return ActOnCXXBoolLiteral(EllipsisLoc, tok::kw_true); case BO_Comma: ScalarType = Context.VoidTy; break; default: return Diag(EllipsisLoc, diag::err_fold_expression_empty) << BinaryOperator::getOpcodeStr(Operator); } return new (Context) CXXScalarValueInitExpr( ScalarType, Context.getTrivialTypeSourceInfo(ScalarType, EllipsisLoc), EllipsisLoc); } Index: projects/clang400-import/contrib/llvm/tools/clang =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/clang (revision 313894) Property changes on: projects/clang400-import/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/clang/dist:r313643-313891 Index: projects/clang400-import/contrib/llvm/tools/lld =================================================================== --- projects/clang400-import/contrib/llvm/tools/lld (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/lld (revision 313894) Property changes on: projects/clang400-import/contrib/llvm/tools/lld ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/lld/dist:r313643-313891 Index: projects/clang400-import/contrib/llvm/tools/lldb =================================================================== --- projects/clang400-import/contrib/llvm/tools/lldb (revision 313893) +++ projects/clang400-import/contrib/llvm/tools/lldb (revision 313894) Property changes on: projects/clang400-import/contrib/llvm/tools/lldb ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/lldb/dist:r313643-313891 Index: projects/clang400-import/contrib/llvm =================================================================== --- projects/clang400-import/contrib/llvm (revision 313893) +++ projects/clang400-import/contrib/llvm (revision 313894) Property changes on: projects/clang400-import/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/llvm/dist:r313643-313891 Index: projects/clang400-import/lib/clang/include/clang/Basic/Version.inc =================================================================== --- projects/clang400-import/lib/clang/include/clang/Basic/Version.inc (revision 313893) +++ projects/clang400-import/lib/clang/include/clang/Basic/Version.inc (revision 313894) @@ -1,11 +1,11 @@ /* $FreeBSD$ */ #define CLANG_VERSION 4.0.0 #define CLANG_VERSION_STRING "4.0.0" #define CLANG_VERSION_MAJOR 4 #define CLANG_VERSION_MINOR 0 #define CLANG_VERSION_PATCHLEVEL 0 #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "294803" +#define SVN_REVISION "295380" Index: projects/clang400-import/lib/clang/include/lld/Config/Version.inc =================================================================== --- projects/clang400-import/lib/clang/include/lld/Config/Version.inc (revision 313893) +++ projects/clang400-import/lib/clang/include/lld/Config/Version.inc (revision 313894) @@ -1,8 +1,8 @@ // $FreeBSD$ #define LLD_VERSION 4.0.0 #define LLD_VERSION_STRING "4.0.0" #define LLD_VERSION_MAJOR 4 #define LLD_VERSION_MINOR 0 -#define LLD_REVISION_STRING "294803" +#define LLD_REVISION_STRING "295380" #define LLD_REPOSITORY_STRING "FreeBSD"