Index: vendor/llvm/dist-release_70/include/llvm/MC/MCAsmBackend.h =================================================================== --- vendor/llvm/dist-release_70/include/llvm/MC/MCAsmBackend.h (revision 341364) +++ vendor/llvm/dist-release_70/include/llvm/MC/MCAsmBackend.h (revision 341365) @@ -1,205 +1,210 @@ //===- llvm/MC/MCAsmBackend.h - MC Asm Backend ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLVM_MC_MCASMBACKEND_H #define LLVM_MC_MCASMBACKEND_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFragment.h" #include "llvm/Support/Endian.h" #include #include namespace llvm { class MCAsmLayout; class MCAssembler; class MCCFIInstruction; class MCCodePadder; struct MCFixupKindInfo; class MCFragment; class MCInst; class MCObjectStreamer; class MCObjectTargetWriter; class MCObjectWriter; struct MCCodePaddingContext; class MCRelaxableFragment; class MCSubtargetInfo; class MCValue; class raw_pwrite_stream; /// Generic interface to target specific assembler backends. class MCAsmBackend { std::unique_ptr CodePadder; protected: // Can only create subclasses. MCAsmBackend(support::endianness Endian); public: MCAsmBackend(const MCAsmBackend &) = delete; MCAsmBackend &operator=(const MCAsmBackend &) = delete; virtual ~MCAsmBackend(); const support::endianness Endian; /// lifetime management virtual void reset() {} /// Create a new MCObjectWriter instance for use by the assembler backend to /// emit the final object file. std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const; /// Create an MCObjectWriter that writes two object files: a .o file which is /// linked into the final program and a .dwo file which is used by debuggers. /// This function is only supported with ELF targets. std::unique_ptr createDwoObjectWriter(raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS) const; virtual std::unique_ptr createObjectTargetWriter() const = 0; /// \name Target Fixup Interfaces /// @{ /// Get the number of target specific fixup kinds. virtual unsigned getNumFixupKinds() const = 0; /// Map a relocation name used in .reloc to a fixup kind. virtual Optional getFixupKind(StringRef Name) const; /// Get information on a fixup kind. virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const; /// Hook to check if a relocation is needed for some target specific reason. virtual bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) { return false; } /// Apply the \p Value for given \p Fixup into the provided data fragment, at /// the offset specified by the fixup and following the fixup kind as /// appropriate. Errors (such as an out of range fixup value) should be /// reported via \p Ctx. /// The \p STI is present only for fragments of type MCRelaxableFragment and /// MCDataFragment with hasInstructions() == true. virtual void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const = 0; /// Check whether the given target requires emitting differences of two /// symbols as a set of relocations. virtual bool requiresDiffExpressionRelocations() const { return false; } /// @} /// \name Target Relaxation Interfaces /// @{ /// Check whether the given instruction may need relaxation. /// /// \param Inst - The instruction to test. /// \param STI - The MCSubtargetInfo in effect when the instruction was /// encoded. virtual bool mayNeedRelaxation(const MCInst &Inst, const MCSubtargetInfo &STI) const = 0; /// Target specific predicate for whether a given fixup requires the /// associated instruction to be relaxed. virtual bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout, const bool WasForced) const; /// Simple predicate for targets where !Resolved implies requiring relaxation virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const = 0; /// Relax the instruction in the given fragment to the next wider instruction. /// /// \param Inst The instruction to relax, which may be the same as the /// output. /// \param STI the subtarget information for the associated instruction. /// \param [out] Res On return, the relaxed instruction. virtual void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const = 0; /// @} /// Returns the minimum size of a nop in bytes on this target. The assembler /// will use this to emit excess padding in situations where the padding /// required for simple alignment would be less than the minimum nop size. /// virtual unsigned getMinimumNopSize() const { return 1; } /// Write an (optimal) nop sequence of Count bytes to the given output. If the /// target cannot generate such a sequence, it should return an error. /// /// \return - True on success. virtual bool writeNopData(raw_ostream &OS, uint64_t Count) const = 0; /// Give backend an opportunity to finish layout after relaxation virtual void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const {} /// Handle any target-specific assembler flags. By default, do nothing. virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {} /// Generate the compact unwind encoding for the CFI instructions. virtual uint32_t generateCompactUnwindEncoding(ArrayRef) const { return 0; } + /// Check whether a given symbol has been flagged with MICROMIPS flag. + virtual bool isMicroMips(const MCSymbol *Sym) const { + return false; + } + /// Handles all target related code padding when starting to write a new /// basic block to an object file. /// /// \param OS The streamer used for writing the padding data and function. /// \param Context the context of the padding, Embeds the basic block's /// parameters. void handleCodePaddingBasicBlockStart(MCObjectStreamer *OS, const MCCodePaddingContext &Context); /// Handles all target related code padding after writing a block to an object /// file. /// /// \param Context the context of the padding, Embeds the basic block's /// parameters. void handleCodePaddingBasicBlockEnd(const MCCodePaddingContext &Context); /// Handles all target related code padding before writing a new instruction /// to an object file. /// /// \param Inst the instruction. void handleCodePaddingInstructionBegin(const MCInst &Inst); /// Handles all target related code padding after writing an instruction to an /// object file. /// /// \param Inst the instruction. void handleCodePaddingInstructionEnd(const MCInst &Inst); /// Relaxes a fragment (changes the size of the padding) according to target /// requirements. The new size computation is done w.r.t a layout. /// /// \param PF The fragment to relax. /// \param Layout Code layout information. /// /// \returns true iff any relaxation occurred. bool relaxFragment(MCPaddingFragment *PF, MCAsmLayout &Layout); }; } // end namespace llvm #endif // LLVM_MC_MCASMBACKEND_H Index: vendor/llvm/dist-release_70/include/llvm/Support/GenericDomTreeConstruction.h =================================================================== --- vendor/llvm/dist-release_70/include/llvm/Support/GenericDomTreeConstruction.h (revision 341364) +++ vendor/llvm/dist-release_70/include/llvm/Support/GenericDomTreeConstruction.h (revision 341365) @@ -1,1690 +1,1704 @@ //===- GenericDomTreeConstruction.h - Dominator Calculation ------*- C++ -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// \file /// /// Generic dominator tree construction - This file provides routines to /// construct immediate dominator information for a flow-graph based on the /// Semi-NCA algorithm described in this dissertation: /// /// Linear-Time Algorithms for Dominators and Related Problems /// Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: /// ftp://ftp.cs.princeton.edu/reports/2005/737.pdf /// /// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns /// out that the theoretically slower O(n*log(n)) implementation is actually /// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs. /// /// The file uses the Depth Based Search algorithm to perform incremental /// updates (insertion and deletions). The implemented algorithm is based on /// this publication: /// /// An Experimental Study of Dynamic Dominators /// Loukas Georgiadis, et al., April 12 2016, pp. 5-7, 9-10: /// https://arxiv.org/pdf/1604.02711.pdf /// //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H #include #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GenericDomTree.h" #define DEBUG_TYPE "dom-tree-builder" namespace llvm { namespace DomTreeBuilder { template struct SemiNCAInfo { using NodePtr = typename DomTreeT::NodePtr; using NodeT = typename DomTreeT::NodeType; using TreeNodePtr = DomTreeNodeBase *; using RootsT = decltype(DomTreeT::Roots); static constexpr bool IsPostDom = DomTreeT::IsPostDominator; // Information record used by Semi-NCA during tree construction. struct InfoRec { unsigned DFSNum = 0; unsigned Parent = 0; unsigned Semi = 0; NodePtr Label = nullptr; NodePtr IDom = nullptr; SmallVector ReverseChildren; }; // Number to node mapping is 1-based. Initialize the mapping to start with // a dummy element. std::vector NumToNode = {nullptr}; DenseMap NodeToInfo; using UpdateT = typename DomTreeT::UpdateType; struct BatchUpdateInfo { SmallVector Updates; using NodePtrAndKind = PointerIntPair; // In order to be able to walk a CFG that is out of sync with the CFG // DominatorTree last knew about, use the list of updates to reconstruct // previous CFG versions of the current CFG. For each node, we store a set // of its virtually added/deleted future successors and predecessors. // Note that these children are from the future relative to what the // DominatorTree knows about -- using them to gets us some snapshot of the // CFG from the past (relative to the state of the CFG). DenseMap> FutureSuccessors; DenseMap> FuturePredecessors; // Remembers if the whole tree was recalculated at some point during the // current batch update. bool IsRecalculated = false; }; BatchUpdateInfo *BatchUpdates; using BatchUpdatePtr = BatchUpdateInfo *; // If BUI is a nullptr, then there's no batch update in progress. SemiNCAInfo(BatchUpdatePtr BUI) : BatchUpdates(BUI) {} void clear() { NumToNode = {nullptr}; // Restore to initial state with a dummy start node. NodeToInfo.clear(); // Don't reset the pointer to BatchUpdateInfo here -- if there's an update // in progress, we need this information to continue it. } template struct ChildrenGetter { using ResultTy = SmallVector; static ResultTy Get(NodePtr N, std::integral_constant) { auto RChildren = reverse(children(N)); return ResultTy(RChildren.begin(), RChildren.end()); } static ResultTy Get(NodePtr N, std::integral_constant) { auto IChildren = inverse_children(N); return ResultTy(IChildren.begin(), IChildren.end()); } using Tag = std::integral_constant; // The function below is the core part of the batch updater. It allows the // Depth Based Search algorithm to perform incremental updates in lockstep // with updates to the CFG. We emulated lockstep CFG updates by getting its // next snapshots by reverse-applying future updates. static ResultTy Get(NodePtr N, BatchUpdatePtr BUI) { ResultTy Res = Get(N, Tag()); // If there's no batch update in progress, simply return node's children. if (!BUI) return Res; // CFG children are actually its *most current* children, and we have to // reverse-apply the future updates to get the node's children at the // point in time the update was performed. auto &FutureChildren = (Inverse != IsPostDom) ? BUI->FuturePredecessors : BUI->FutureSuccessors; auto FCIt = FutureChildren.find(N); if (FCIt == FutureChildren.end()) return Res; for (auto ChildAndKind : FCIt->second) { const NodePtr Child = ChildAndKind.getPointer(); const UpdateKind UK = ChildAndKind.getInt(); // Reverse-apply the future update. if (UK == UpdateKind::Insert) { // If there's an insertion in the future, it means that the edge must // exist in the current CFG, but was not present in it before. assert(llvm::find(Res, Child) != Res.end() && "Expected child not found in the CFG"); Res.erase(std::remove(Res.begin(), Res.end(), Child), Res.end()); LLVM_DEBUG(dbgs() << "\tHiding edge " << BlockNamePrinter(N) << " -> " << BlockNamePrinter(Child) << "\n"); } else { // If there's an deletion in the future, it means that the edge cannot // exist in the current CFG, but existed in it before. assert(llvm::find(Res, Child) == Res.end() && "Unexpected child found in the CFG"); LLVM_DEBUG(dbgs() << "\tShowing virtual edge " << BlockNamePrinter(N) << " -> " << BlockNamePrinter(Child) << "\n"); Res.push_back(Child); } } return Res; } }; NodePtr getIDom(NodePtr BB) const { auto InfoIt = NodeToInfo.find(BB); if (InfoIt == NodeToInfo.end()) return nullptr; return InfoIt->second.IDom; } TreeNodePtr getNodeForBlock(NodePtr BB, DomTreeT &DT) { if (TreeNodePtr Node = DT.getNode(BB)) return Node; // Haven't calculated this node yet? Get or calculate the node for the // immediate dominator. NodePtr IDom = getIDom(BB); assert(IDom || DT.DomTreeNodes[nullptr]); TreeNodePtr IDomNode = getNodeForBlock(IDom, DT); // Add a new tree node for this NodeT, and link it as a child of // IDomNode return (DT.DomTreeNodes[BB] = IDomNode->addChild( llvm::make_unique>(BB, IDomNode))) .get(); } static bool AlwaysDescend(NodePtr, NodePtr) { return true; } struct BlockNamePrinter { NodePtr N; BlockNamePrinter(NodePtr Block) : N(Block) {} BlockNamePrinter(TreeNodePtr TN) : N(TN ? TN->getBlock() : nullptr) {} friend raw_ostream &operator<<(raw_ostream &O, const BlockNamePrinter &BP) { if (!BP.N) O << "nullptr"; else BP.N->printAsOperand(O, false); return O; } }; // Custom DFS implementation which can skip nodes based on a provided // predicate. It also collects ReverseChildren so that we don't have to spend // time getting predecessors in SemiNCA. // // If IsReverse is set to true, the DFS walk will be performed backwards // relative to IsPostDom -- using reverse edges for dominators and forward // edges for postdominators. template unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, unsigned AttachToNum) { assert(V); SmallVector WorkList = {V}; if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; while (!WorkList.empty()) { const NodePtr BB = WorkList.pop_back_val(); auto &BBInfo = NodeToInfo[BB]; // Visited nodes always have positive DFS numbers. if (BBInfo.DFSNum != 0) continue; BBInfo.DFSNum = BBInfo.Semi = ++LastNum; BBInfo.Label = BB; NumToNode.push_back(BB); constexpr bool Direction = IsReverse != IsPostDom; // XOR. for (const NodePtr Succ : ChildrenGetter::Get(BB, BatchUpdates)) { const auto SIT = NodeToInfo.find(Succ); // Don't visit nodes more than once but remember to collect // ReverseChildren. if (SIT != NodeToInfo.end() && SIT->second.DFSNum != 0) { if (Succ != BB) SIT->second.ReverseChildren.push_back(BB); continue; } if (!Condition(BB, Succ)) continue; // It's fine to add Succ to the map, because we know that it will be // visited later. auto &SuccInfo = NodeToInfo[Succ]; WorkList.push_back(Succ); SuccInfo.Parent = LastNum; SuccInfo.ReverseChildren.push_back(BB); } } return LastNum; } NodePtr eval(NodePtr VIn, unsigned LastLinked) { auto &VInInfo = NodeToInfo[VIn]; if (VInInfo.DFSNum < LastLinked) return VIn; SmallVector Work; SmallPtrSet Visited; if (VInInfo.Parent >= LastLinked) Work.push_back(VIn); while (!Work.empty()) { NodePtr V = Work.back(); auto &VInfo = NodeToInfo[V]; NodePtr VAncestor = NumToNode[VInfo.Parent]; // Process Ancestor first if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) { Work.push_back(VAncestor); continue; } Work.pop_back(); // Update VInfo based on Ancestor info if (VInfo.Parent < LastLinked) continue; auto &VAInfo = NodeToInfo[VAncestor]; NodePtr VAncestorLabel = VAInfo.Label; NodePtr VLabel = VInfo.Label; if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi) VInfo.Label = VAncestorLabel; VInfo.Parent = VAInfo.Parent; } return VInInfo.Label; } // This function requires DFS to be run before calling it. void runSemiNCA(DomTreeT &DT, const unsigned MinLevel = 0) { const unsigned NextDFSNum(NumToNode.size()); // Initialize IDoms to spanning tree parents. for (unsigned i = 1; i < NextDFSNum; ++i) { const NodePtr V = NumToNode[i]; auto &VInfo = NodeToInfo[V]; VInfo.IDom = NumToNode[VInfo.Parent]; } // Step #1: Calculate the semidominators of all vertices. for (unsigned i = NextDFSNum - 1; i >= 2; --i) { NodePtr W = NumToNode[i]; auto &WInfo = NodeToInfo[W]; // Initialize the semi dominator to point to the parent node. WInfo.Semi = WInfo.Parent; for (const auto &N : WInfo.ReverseChildren) { if (NodeToInfo.count(N) == 0) // Skip unreachable predecessors. continue; const TreeNodePtr TN = DT.getNode(N); // Skip predecessors whose level is above the subtree we are processing. if (TN && TN->getLevel() < MinLevel) continue; unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi; if (SemiU < WInfo.Semi) WInfo.Semi = SemiU; } } // Step #2: Explicitly define the immediate dominator of each vertex. // IDom[i] = NCA(SDom[i], SpanningTreeParent(i)). // Note that the parents were stored in IDoms and later got invalidated // during path compression in Eval. for (unsigned i = 2; i < NextDFSNum; ++i) { const NodePtr W = NumToNode[i]; auto &WInfo = NodeToInfo[W]; const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum; NodePtr WIDomCandidate = WInfo.IDom; while (NodeToInfo[WIDomCandidate].DFSNum > SDomNum) WIDomCandidate = NodeToInfo[WIDomCandidate].IDom; WInfo.IDom = WIDomCandidate; } } // PostDominatorTree always has a virtual root that represents a virtual CFG // node that serves as a single exit from the function. All the other exits // (CFG nodes with terminators and nodes in infinite loops are logically // connected to this virtual CFG exit node). // This functions maps a nullptr CFG node to the virtual root tree node. void addVirtualRoot() { assert(IsPostDom && "Only postdominators have a virtual root"); assert(NumToNode.size() == 1 && "SNCAInfo must be freshly constructed"); auto &BBInfo = NodeToInfo[nullptr]; BBInfo.DFSNum = BBInfo.Semi = 1; BBInfo.Label = nullptr; NumToNode.push_back(nullptr); // NumToNode[1] = nullptr; } // For postdominators, nodes with no forward successors are trivial roots that // are always selected as tree roots. Roots with forward successors correspond // to CFG nodes within infinite loops. static bool HasForwardSuccessors(const NodePtr N, BatchUpdatePtr BUI) { assert(N && "N must be a valid node"); return !ChildrenGetter::Get(N, BUI).empty(); } static NodePtr GetEntryNode(const DomTreeT &DT) { assert(DT.Parent && "Parent not set"); return GraphTraits::getEntryNode(DT.Parent); } // Finds all roots without relaying on the set of roots already stored in the // tree. // We define roots to be some non-redundant set of the CFG nodes static RootsT FindRoots(const DomTreeT &DT, BatchUpdatePtr BUI) { assert(DT.Parent && "Parent pointer is not set"); RootsT Roots; // For dominators, function entry CFG node is always a tree root node. if (!IsPostDom) { Roots.push_back(GetEntryNode(DT)); return Roots; } SemiNCAInfo SNCA(BUI); // PostDominatorTree always has a virtual root. SNCA.addVirtualRoot(); unsigned Num = 1; LLVM_DEBUG(dbgs() << "\t\tLooking for trivial roots\n"); // Step #1: Find all the trivial roots that are going to will definitely // remain tree roots. unsigned Total = 0; // It may happen that there are some new nodes in the CFG that are result of // the ongoing batch update, but we cannot really pretend that they don't // exist -- we won't see any outgoing or incoming edges to them, so it's // fine to discover them here, as they would end up appearing in the CFG at // some point anyway. for (const NodePtr N : nodes(DT.Parent)) { ++Total; // If it has no *successors*, it is definitely a root. if (!HasForwardSuccessors(N, BUI)) { Roots.push_back(N); // Run DFS not to walk this part of CFG later. Num = SNCA.runDFS(N, Num, AlwaysDescend, 1); LLVM_DEBUG(dbgs() << "Found a new trivial root: " << BlockNamePrinter(N) << "\n"); LLVM_DEBUG(dbgs() << "Last visited node: " << BlockNamePrinter(SNCA.NumToNode[Num]) << "\n"); } } LLVM_DEBUG(dbgs() << "\t\tLooking for non-trivial roots\n"); // Step #2: Find all non-trivial root candidates. Those are CFG nodes that // are reverse-unreachable were not visited by previous DFS walks (i.e. CFG // nodes in infinite loops). bool HasNonTrivialRoots = false; // Accounting for the virtual exit, see if we had any reverse-unreachable // nodes. if (Total + 1 != Num) { HasNonTrivialRoots = true; // Make another DFS pass over all other nodes to find the // reverse-unreachable blocks, and find the furthest paths we'll be able // to make. // Note that this looks N^2, but it's really 2N worst case, if every node // is unreachable. This is because we are still going to only visit each // unreachable node once, we may just visit it in two directions, // depending on how lucky we get. SmallPtrSet ConnectToExitBlock; for (const NodePtr I : nodes(DT.Parent)) { if (SNCA.NodeToInfo.count(I) == 0) { LLVM_DEBUG(dbgs() << "\t\t\tVisiting node " << BlockNamePrinter(I) << "\n"); // Find the furthest away we can get by following successors, then // follow them in reverse. This gives us some reasonable answer about // the post-dom tree inside any infinite loop. In particular, it // guarantees we get to the farthest away point along *some* // path. This also matches the GCC's behavior. // If we really wanted a totally complete picture of dominance inside // this infinite loop, we could do it with SCC-like algorithms to find // the lowest and highest points in the infinite loop. In theory, it // would be nice to give the canonical backedge for the loop, but it's // expensive and does not always lead to a minimal set of roots. LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n"); const unsigned NewNum = SNCA.runDFS(I, Num, AlwaysDescend, Num); const NodePtr FurthestAway = SNCA.NumToNode[NewNum]; LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " << BlockNamePrinter(FurthestAway) << "\n"); ConnectToExitBlock.insert(FurthestAway); Roots.push_back(FurthestAway); LLVM_DEBUG(dbgs() << "\t\t\tPrev DFSNum: " << Num << ", new DFSNum: " << NewNum << "\n\t\t\tRemoving DFS info\n"); for (unsigned i = NewNum; i > Num; --i) { const NodePtr N = SNCA.NumToNode[i]; LLVM_DEBUG(dbgs() << "\t\t\t\tRemoving DFS info for " << BlockNamePrinter(N) << "\n"); SNCA.NodeToInfo.erase(N); SNCA.NumToNode.pop_back(); } const unsigned PrevNum = Num; LLVM_DEBUG(dbgs() << "\t\t\tRunning reverse DFS\n"); Num = SNCA.runDFS(FurthestAway, Num, AlwaysDescend, 1); for (unsigned i = PrevNum + 1; i <= Num; ++i) LLVM_DEBUG(dbgs() << "\t\t\t\tfound node " << BlockNamePrinter(SNCA.NumToNode[i]) << "\n"); } } } LLVM_DEBUG(dbgs() << "Total: " << Total << ", Num: " << Num << "\n"); LLVM_DEBUG(dbgs() << "Discovered CFG nodes:\n"); LLVM_DEBUG(for (size_t i = 0; i <= Num; ++i) dbgs() << i << ": " << BlockNamePrinter(SNCA.NumToNode[i]) << "\n"); assert((Total + 1 == Num) && "Everything should have been visited"); // Step #3: If we found some non-trivial roots, make them non-redundant. if (HasNonTrivialRoots) RemoveRedundantRoots(DT, BUI, Roots); LLVM_DEBUG(dbgs() << "Found roots: "); LLVM_DEBUG(for (auto *Root : Roots) dbgs() << BlockNamePrinter(Root) << " "); LLVM_DEBUG(dbgs() << "\n"); return Roots; } // This function only makes sense for postdominators. // We define roots to be some set of CFG nodes where (reverse) DFS walks have // to start in order to visit all the CFG nodes (including the // reverse-unreachable ones). // When the search for non-trivial roots is done it may happen that some of // the non-trivial roots are reverse-reachable from other non-trivial roots, // which makes them redundant. This function removes them from the set of // input roots. static void RemoveRedundantRoots(const DomTreeT &DT, BatchUpdatePtr BUI, RootsT &Roots) { assert(IsPostDom && "This function is for postdominators only"); LLVM_DEBUG(dbgs() << "Removing redundant roots\n"); SemiNCAInfo SNCA(BUI); for (unsigned i = 0; i < Roots.size(); ++i) { auto &Root = Roots[i]; // Trivial roots are always non-redundant. if (!HasForwardSuccessors(Root, BUI)) continue; LLVM_DEBUG(dbgs() << "\tChecking if " << BlockNamePrinter(Root) << " remains a root\n"); SNCA.clear(); // Do a forward walk looking for the other roots. const unsigned Num = SNCA.runDFS(Root, 0, AlwaysDescend, 0); // Skip the start node and begin from the second one (note that DFS uses // 1-based indexing). for (unsigned x = 2; x <= Num; ++x) { const NodePtr N = SNCA.NumToNode[x]; // If we wound another root in a (forward) DFS walk, remove the current // root from the set of roots, as it is reverse-reachable from the other // one. if (llvm::find(Roots, N) != Roots.end()) { LLVM_DEBUG(dbgs() << "\tForward DFS walk found another root " << BlockNamePrinter(N) << "\n\tRemoving root " << BlockNamePrinter(Root) << "\n"); std::swap(Root, Roots.back()); Roots.pop_back(); // Root at the back takes the current root's place. // Start the next loop iteration with the same index. --i; break; } } } } template void doFullDFSWalk(const DomTreeT &DT, DescendCondition DC) { if (!IsPostDom) { assert(DT.Roots.size() == 1 && "Dominators should have a singe root"); runDFS(DT.Roots[0], 0, DC, 0); return; } addVirtualRoot(); unsigned Num = 1; for (const NodePtr Root : DT.Roots) Num = runDFS(Root, Num, DC, 0); } static void CalculateFromScratch(DomTreeT &DT, BatchUpdatePtr BUI) { auto *Parent = DT.Parent; DT.reset(); DT.Parent = Parent; SemiNCAInfo SNCA(nullptr); // Since we are rebuilding the whole tree, // there's no point doing it incrementally. // Step #0: Number blocks in depth-first order and initialize variables used // in later stages of the algorithm. DT.Roots = FindRoots(DT, nullptr); SNCA.doFullDFSWalk(DT, AlwaysDescend); SNCA.runSemiNCA(DT); if (BUI) { BUI->IsRecalculated = true; LLVM_DEBUG( dbgs() << "DomTree recalculated, skipping future batch updates\n"); } if (DT.Roots.empty()) return; // Add a node for the root. If the tree is a PostDominatorTree it will be // the virtual exit (denoted by (BasicBlock *) nullptr) which postdominates // all real exits (including multiple exit blocks, infinite loops). NodePtr Root = IsPostDom ? nullptr : DT.Roots[0]; DT.RootNode = (DT.DomTreeNodes[Root] = llvm::make_unique>(Root, nullptr)) .get(); SNCA.attachNewSubtree(DT, DT.RootNode); } void attachNewSubtree(DomTreeT& DT, const TreeNodePtr AttachTo) { // Attach the first unreachable block to AttachTo. NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock(); // Loop over all of the discovered blocks in the function... for (size_t i = 1, e = NumToNode.size(); i != e; ++i) { NodePtr W = NumToNode[i]; LLVM_DEBUG(dbgs() << "\tdiscovered a new reachable node " << BlockNamePrinter(W) << "\n"); // Don't replace this with 'count', the insertion side effect is important if (DT.DomTreeNodes[W]) continue; // Haven't calculated this node yet? NodePtr ImmDom = getIDom(W); // Get or calculate the node for the immediate dominator. TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT); // Add a new tree node for this BasicBlock, and link it as a child of // IDomNode. DT.DomTreeNodes[W] = IDomNode->addChild( llvm::make_unique>(W, IDomNode)); } } void reattachExistingSubtree(DomTreeT &DT, const TreeNodePtr AttachTo) { NodeToInfo[NumToNode[1]].IDom = AttachTo->getBlock(); for (size_t i = 1, e = NumToNode.size(); i != e; ++i) { const NodePtr N = NumToNode[i]; const TreeNodePtr TN = DT.getNode(N); assert(TN); const TreeNodePtr NewIDom = DT.getNode(NodeToInfo[N].IDom); TN->setIDom(NewIDom); } } // Helper struct used during edge insertions. struct InsertionInfo { using BucketElementTy = std::pair; struct DecreasingLevel { bool operator()(const BucketElementTy &First, const BucketElementTy &Second) const { return First.first > Second.first; } }; std::priority_queue, DecreasingLevel> Bucket; // Queue of tree nodes sorted by level in descending order. SmallDenseSet Affected; SmallDenseMap Visited; SmallVector AffectedQueue; SmallVector VisitedNotAffectedQueue; }; static void InsertEdge(DomTreeT &DT, const BatchUpdatePtr BUI, const NodePtr From, const NodePtr To) { assert((From || IsPostDom) && "From has to be a valid CFG node or a virtual root"); assert(To && "Cannot be a nullptr"); LLVM_DEBUG(dbgs() << "Inserting edge " << BlockNamePrinter(From) << " -> " << BlockNamePrinter(To) << "\n"); TreeNodePtr FromTN = DT.getNode(From); if (!FromTN) { // Ignore edges from unreachable nodes for (forward) dominators. if (!IsPostDom) return; // The unreachable node becomes a new root -- a tree node for it. TreeNodePtr VirtualRoot = DT.getNode(nullptr); FromTN = (DT.DomTreeNodes[From] = VirtualRoot->addChild( llvm::make_unique>(From, VirtualRoot))) .get(); DT.Roots.push_back(From); } DT.DFSInfoValid = false; const TreeNodePtr ToTN = DT.getNode(To); if (!ToTN) InsertUnreachable(DT, BUI, FromTN, To); else InsertReachable(DT, BUI, FromTN, ToTN); } // Determines if some existing root becomes reverse-reachable after the // insertion. Rebuilds the whole tree if that situation happens. static bool UpdateRootsBeforeInsertion(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr From, const TreeNodePtr To) { assert(IsPostDom && "This function is only for postdominators"); // Destination node is not attached to the virtual root, so it cannot be a // root. if (!DT.isVirtualRoot(To->getIDom())) return false; auto RIt = llvm::find(DT.Roots, To->getBlock()); if (RIt == DT.Roots.end()) return false; // To is not a root, nothing to update. LLVM_DEBUG(dbgs() << "\t\tAfter the insertion, " << BlockNamePrinter(To) << " is no longer a root\n\t\tRebuilding the tree!!!\n"); CalculateFromScratch(DT, BUI); return true; } // Updates the set of roots after insertion or deletion. This ensures that // roots are the same when after a series of updates and when the tree would // be built from scratch. static void UpdateRootsAfterUpdate(DomTreeT &DT, const BatchUpdatePtr BUI) { assert(IsPostDom && "This function is only for postdominators"); // The tree has only trivial roots -- nothing to update. if (std::none_of(DT.Roots.begin(), DT.Roots.end(), [BUI](const NodePtr N) { return HasForwardSuccessors(N, BUI); })) return; // Recalculate the set of roots. auto Roots = FindRoots(DT, BUI); if (DT.Roots.size() != Roots.size() || !std::is_permutation(DT.Roots.begin(), DT.Roots.end(), Roots.begin())) { // The roots chosen in the CFG have changed. This is because the // incremental algorithm does not really know or use the set of roots and // can make a different (implicit) decision about which node within an // infinite loop becomes a root. LLVM_DEBUG(dbgs() << "Roots are different in updated trees\n" << "The entire tree needs to be rebuilt\n"); // It may be possible to update the tree without recalculating it, but // we do not know yet how to do it, and it happens rarely in practise. CalculateFromScratch(DT, BUI); return; } } // Handles insertion to a node already in the dominator tree. static void InsertReachable(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr From, const TreeNodePtr To) { LLVM_DEBUG(dbgs() << "\tReachable " << BlockNamePrinter(From->getBlock()) << " -> " << BlockNamePrinter(To->getBlock()) << "\n"); if (IsPostDom && UpdateRootsBeforeInsertion(DT, BUI, From, To)) return; // DT.findNCD expects both pointers to be valid. When From is a virtual // root, then its CFG block pointer is a nullptr, so we have to 'compute' // the NCD manually. const NodePtr NCDBlock = (From->getBlock() && To->getBlock()) ? DT.findNearestCommonDominator(From->getBlock(), To->getBlock()) : nullptr; assert(NCDBlock || DT.isPostDominator()); const TreeNodePtr NCD = DT.getNode(NCDBlock); assert(NCD); LLVM_DEBUG(dbgs() << "\t\tNCA == " << BlockNamePrinter(NCD) << "\n"); const TreeNodePtr ToIDom = To->getIDom(); // Nothing affected -- NCA property holds. // (Based on the lemma 2.5 from the second paper.) if (NCD == To || NCD == ToIDom) return; // Identify and collect affected nodes. InsertionInfo II; LLVM_DEBUG(dbgs() << "Marking " << BlockNamePrinter(To) << " as affected\n"); II.Affected.insert(To); const unsigned ToLevel = To->getLevel(); LLVM_DEBUG(dbgs() << "Putting " << BlockNamePrinter(To) << " into a Bucket\n"); II.Bucket.push({ToLevel, To}); while (!II.Bucket.empty()) { const TreeNodePtr CurrentNode = II.Bucket.top().second; const unsigned CurrentLevel = CurrentNode->getLevel(); II.Bucket.pop(); LLVM_DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: " << BlockNamePrinter(CurrentNode) << "\n"); II.Visited.insert({CurrentNode, CurrentLevel}); II.AffectedQueue.push_back(CurrentNode); // Discover and collect affected successors of the current node. VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II); } // Finish by updating immediate dominators and levels. UpdateInsertion(DT, BUI, NCD, II); } // Visits an affected node and collect its affected successors. static void VisitInsertion(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr TN, const unsigned RootLevel, const TreeNodePtr NCD, InsertionInfo &II) { const unsigned NCDLevel = NCD->getLevel(); LLVM_DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ", RootLevel " << RootLevel << "\n"); SmallVector Stack = {TN}; assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!"); SmallPtrSet Processed; do { TreeNodePtr Next = Stack.pop_back_val(); LLVM_DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n"); for (const NodePtr Succ : ChildrenGetter::Get(Next->getBlock(), BUI)) { const TreeNodePtr SuccTN = DT.getNode(Succ); assert(SuccTN && "Unreachable successor found at reachable insertion"); const unsigned SuccLevel = SuccTN->getLevel(); LLVM_DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) << ", level = " << SuccLevel << "\n"); // Do not process the same node multiple times. if (Processed.count(Next) > 0) continue; // Succ dominated by subtree From -- not affected. // (Based on the lemma 2.5 from the second paper.) if (SuccLevel > RootLevel) { LLVM_DEBUG(dbgs() << "\t\tDominated by subtree From\n"); if (II.Visited.count(SuccTN) != 0) { LLVM_DEBUG(dbgs() << "\t\t\talready visited at level " << II.Visited[SuccTN] << "\n\t\t\tcurrent level " << RootLevel << ")\n"); // A node can be necessary to visit again if we see it again at // a lower level than before. if (II.Visited[SuccTN] >= RootLevel) continue; } LLVM_DEBUG(dbgs() << "\t\tMarking visited not affected " << BlockNamePrinter(Succ) << "\n"); II.Visited.insert({SuccTN, RootLevel}); II.VisitedNotAffectedQueue.push_back(SuccTN); Stack.push_back(SuccTN); } else if ((SuccLevel > NCDLevel + 1) && II.Affected.count(SuccTN) == 0) { LLVM_DEBUG(dbgs() << "\t\tMarking affected and adding " << BlockNamePrinter(Succ) << " to a Bucket\n"); II.Affected.insert(SuccTN); II.Bucket.push({SuccLevel, SuccTN}); } } Processed.insert(Next); } while (!Stack.empty()); } // Updates immediate dominators and levels after insertion. static void UpdateInsertion(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr NCD, InsertionInfo &II) { LLVM_DEBUG(dbgs() << "Updating NCD = " << BlockNamePrinter(NCD) << "\n"); for (const TreeNodePtr TN : II.AffectedQueue) { LLVM_DEBUG(dbgs() << "\tIDom(" << BlockNamePrinter(TN) << ") = " << BlockNamePrinter(NCD) << "\n"); TN->setIDom(NCD); } UpdateLevelsAfterInsertion(II); if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI); } static void UpdateLevelsAfterInsertion(InsertionInfo &II) { LLVM_DEBUG( dbgs() << "Updating levels for visited but not affected nodes\n"); for (const TreeNodePtr TN : II.VisitedNotAffectedQueue) { LLVM_DEBUG(dbgs() << "\tlevel(" << BlockNamePrinter(TN) << ") = (" << BlockNamePrinter(TN->getIDom()) << ") " << TN->getIDom()->getLevel() << " + 1\n"); TN->UpdateLevel(); } } // Handles insertion to previously unreachable nodes. static void InsertUnreachable(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr From, const NodePtr To) { LLVM_DEBUG(dbgs() << "Inserting " << BlockNamePrinter(From) << " -> (unreachable) " << BlockNamePrinter(To) << "\n"); // Collect discovered edges to already reachable nodes. SmallVector, 8> DiscoveredEdgesToReachable; // Discover and connect nodes that became reachable with the insertion. ComputeUnreachableDominators(DT, BUI, To, From, DiscoveredEdgesToReachable); LLVM_DEBUG(dbgs() << "Inserted " << BlockNamePrinter(From) << " -> (prev unreachable) " << BlockNamePrinter(To) << "\n"); // Used the discovered edges and inset discovered connecting (incoming) // edges. for (const auto &Edge : DiscoveredEdgesToReachable) { LLVM_DEBUG(dbgs() << "\tInserting discovered connecting edge " << BlockNamePrinter(Edge.first) << " -> " << BlockNamePrinter(Edge.second) << "\n"); InsertReachable(DT, BUI, DT.getNode(Edge.first), Edge.second); } } // Connects nodes that become reachable with an insertion. static void ComputeUnreachableDominators( DomTreeT &DT, const BatchUpdatePtr BUI, const NodePtr Root, const TreeNodePtr Incoming, SmallVectorImpl> &DiscoveredConnectingEdges) { assert(!DT.getNode(Root) && "Root must not be reachable"); // Visit only previously unreachable nodes. auto UnreachableDescender = [&DT, &DiscoveredConnectingEdges](NodePtr From, NodePtr To) { const TreeNodePtr ToTN = DT.getNode(To); if (!ToTN) return true; DiscoveredConnectingEdges.push_back({From, ToTN}); return false; }; SemiNCAInfo SNCA(BUI); SNCA.runDFS(Root, 0, UnreachableDescender, 0); SNCA.runSemiNCA(DT); SNCA.attachNewSubtree(DT, Incoming); LLVM_DEBUG(dbgs() << "After adding unreachable nodes\n"); } static void DeleteEdge(DomTreeT &DT, const BatchUpdatePtr BUI, const NodePtr From, const NodePtr To) { assert(From && To && "Cannot disconnect nullptrs"); LLVM_DEBUG(dbgs() << "Deleting edge " << BlockNamePrinter(From) << " -> " << BlockNamePrinter(To) << "\n"); #ifndef NDEBUG // Ensure that the edge was in fact deleted from the CFG before informing // the DomTree about it. // The check is O(N), so run it only in debug configuration. auto IsSuccessor = [BUI](const NodePtr SuccCandidate, const NodePtr Of) { auto Successors = ChildrenGetter::Get(Of, BUI); return llvm::find(Successors, SuccCandidate) != Successors.end(); }; (void)IsSuccessor; assert(!IsSuccessor(To, From) && "Deleted edge still exists in the CFG!"); #endif const TreeNodePtr FromTN = DT.getNode(From); // Deletion in an unreachable subtree -- nothing to do. if (!FromTN) return; const TreeNodePtr ToTN = DT.getNode(To); if (!ToTN) { LLVM_DEBUG( dbgs() << "\tTo (" << BlockNamePrinter(To) << ") already unreachable -- there is no edge to delete\n"); return; } const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To); const TreeNodePtr NCD = DT.getNode(NCDBlock); // If To dominates From -- nothing to do. if (ToTN != NCD) { DT.DFSInfoValid = false; const TreeNodePtr ToIDom = ToTN->getIDom(); LLVM_DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " << BlockNamePrinter(ToIDom) << "\n"); // To remains reachable after deletion. // (Based on the caption under Figure 4. from the second paper.) if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN)) DeleteReachable(DT, BUI, FromTN, ToTN); else DeleteUnreachable(DT, BUI, ToTN); } if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI); } // Handles deletions that leave destination nodes reachable. static void DeleteReachable(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr FromTN, const TreeNodePtr ToTN) { LLVM_DEBUG(dbgs() << "Deleting reachable " << BlockNamePrinter(FromTN) << " -> " << BlockNamePrinter(ToTN) << "\n"); LLVM_DEBUG(dbgs() << "\tRebuilding subtree\n"); // Find the top of the subtree that needs to be rebuilt. // (Based on the lemma 2.6 from the second paper.) const NodePtr ToIDom = DT.findNearestCommonDominator(FromTN->getBlock(), ToTN->getBlock()); assert(ToIDom || DT.isPostDominator()); const TreeNodePtr ToIDomTN = DT.getNode(ToIDom); assert(ToIDomTN); const TreeNodePtr PrevIDomSubTree = ToIDomTN->getIDom(); // Top of the subtree to rebuild is the root node. Rebuild the tree from // scratch. if (!PrevIDomSubTree) { LLVM_DEBUG(dbgs() << "The entire tree needs to be rebuilt\n"); CalculateFromScratch(DT, BUI); return; } // Only visit nodes in the subtree starting at To. const unsigned Level = ToIDomTN->getLevel(); auto DescendBelow = [Level, &DT](NodePtr, NodePtr To) { return DT.getNode(To)->getLevel() > Level; }; LLVM_DEBUG(dbgs() << "\tTop of subtree: " << BlockNamePrinter(ToIDomTN) << "\n"); SemiNCAInfo SNCA(BUI); SNCA.runDFS(ToIDom, 0, DescendBelow, 0); LLVM_DEBUG(dbgs() << "\tRunning Semi-NCA\n"); SNCA.runSemiNCA(DT, Level); SNCA.reattachExistingSubtree(DT, PrevIDomSubTree); } // Checks if a node has proper support, as defined on the page 3 and later // explained on the page 7 of the second paper. static bool HasProperSupport(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr TN) { LLVM_DEBUG(dbgs() << "IsReachableFromIDom " << BlockNamePrinter(TN) << "\n"); for (const NodePtr Pred : ChildrenGetter::Get(TN->getBlock(), BUI)) { LLVM_DEBUG(dbgs() << "\tPred " << BlockNamePrinter(Pred) << "\n"); if (!DT.getNode(Pred)) continue; const NodePtr Support = DT.findNearestCommonDominator(TN->getBlock(), Pred); LLVM_DEBUG(dbgs() << "\tSupport " << BlockNamePrinter(Support) << "\n"); if (Support != TN->getBlock()) { LLVM_DEBUG(dbgs() << "\t" << BlockNamePrinter(TN) << " is reachable from support " << BlockNamePrinter(Support) << "\n"); return true; } } return false; } // Handle deletions that make destination node unreachable. // (Based on the lemma 2.7 from the second paper.) static void DeleteUnreachable(DomTreeT &DT, const BatchUpdatePtr BUI, const TreeNodePtr ToTN) { LLVM_DEBUG(dbgs() << "Deleting unreachable subtree " << BlockNamePrinter(ToTN) << "\n"); assert(ToTN); assert(ToTN->getBlock()); if (IsPostDom) { // Deletion makes a region reverse-unreachable and creates a new root. // Simulate that by inserting an edge from the virtual root to ToTN and // adding it as a new root. LLVM_DEBUG(dbgs() << "\tDeletion made a region reverse-unreachable\n"); LLVM_DEBUG(dbgs() << "\tAdding new root " << BlockNamePrinter(ToTN) << "\n"); DT.Roots.push_back(ToTN->getBlock()); InsertReachable(DT, BUI, DT.getNode(nullptr), ToTN); return; } SmallVector AffectedQueue; const unsigned Level = ToTN->getLevel(); // Traverse destination node's descendants with greater level in the tree // and collect visited nodes. auto DescendAndCollect = [Level, &AffectedQueue, &DT](NodePtr, NodePtr To) { const TreeNodePtr TN = DT.getNode(To); assert(TN); if (TN->getLevel() > Level) return true; if (llvm::find(AffectedQueue, To) == AffectedQueue.end()) AffectedQueue.push_back(To); return false; }; SemiNCAInfo SNCA(BUI); unsigned LastDFSNum = SNCA.runDFS(ToTN->getBlock(), 0, DescendAndCollect, 0); TreeNodePtr MinNode = ToTN; // Identify the top of the subtree to rebuild by finding the NCD of all // the affected nodes. for (const NodePtr N : AffectedQueue) { const TreeNodePtr TN = DT.getNode(N); const NodePtr NCDBlock = DT.findNearestCommonDominator(TN->getBlock(), ToTN->getBlock()); assert(NCDBlock || DT.isPostDominator()); const TreeNodePtr NCD = DT.getNode(NCDBlock); assert(NCD); LLVM_DEBUG(dbgs() << "Processing affected node " << BlockNamePrinter(TN) << " with NCD = " << BlockNamePrinter(NCD) << ", MinNode =" << BlockNamePrinter(MinNode) << "\n"); if (NCD != TN && NCD->getLevel() < MinNode->getLevel()) MinNode = NCD; } // Root reached, rebuild the whole tree from scratch. if (!MinNode->getIDom()) { LLVM_DEBUG(dbgs() << "The entire tree needs to be rebuilt\n"); CalculateFromScratch(DT, BUI); return; } // Erase the unreachable subtree in reverse preorder to process all children // before deleting their parent. for (unsigned i = LastDFSNum; i > 0; --i) { const NodePtr N = SNCA.NumToNode[i]; const TreeNodePtr TN = DT.getNode(N); LLVM_DEBUG(dbgs() << "Erasing node " << BlockNamePrinter(TN) << "\n"); EraseNode(DT, TN); } // The affected subtree start at the To node -- there's no extra work to do. if (MinNode == ToTN) return; LLVM_DEBUG(dbgs() << "DeleteUnreachable: running DFS with MinNode = " << BlockNamePrinter(MinNode) << "\n"); const unsigned MinLevel = MinNode->getLevel(); const TreeNodePtr PrevIDom = MinNode->getIDom(); assert(PrevIDom); SNCA.clear(); // Identify nodes that remain in the affected subtree. auto DescendBelow = [MinLevel, &DT](NodePtr, NodePtr To) { const TreeNodePtr ToTN = DT.getNode(To); return ToTN && ToTN->getLevel() > MinLevel; }; SNCA.runDFS(MinNode->getBlock(), 0, DescendBelow, 0); LLVM_DEBUG(dbgs() << "Previous IDom(MinNode) = " << BlockNamePrinter(PrevIDom) << "\nRunning Semi-NCA\n"); // Rebuild the remaining part of affected subtree. SNCA.runSemiNCA(DT, MinLevel); SNCA.reattachExistingSubtree(DT, PrevIDom); } // Removes leaf tree nodes from the dominator tree. static void EraseNode(DomTreeT &DT, const TreeNodePtr TN) { assert(TN); assert(TN->getNumChildren() == 0 && "Not a tree leaf"); const TreeNodePtr IDom = TN->getIDom(); assert(IDom); auto ChIt = llvm::find(IDom->Children, TN); assert(ChIt != IDom->Children.end()); std::swap(*ChIt, IDom->Children.back()); IDom->Children.pop_back(); DT.DomTreeNodes.erase(TN->getBlock()); } //~~ //===--------------------- DomTree Batch Updater --------------------------=== //~~ static void ApplyUpdates(DomTreeT &DT, ArrayRef Updates) { const size_t NumUpdates = Updates.size(); if (NumUpdates == 0) return; // Take the fast path for a single update and avoid running the batch update // machinery. if (NumUpdates == 1) { const auto &Update = Updates.front(); if (Update.getKind() == UpdateKind::Insert) DT.insertEdge(Update.getFrom(), Update.getTo()); else DT.deleteEdge(Update.getFrom(), Update.getTo()); return; } BatchUpdateInfo BUI; LegalizeUpdates(Updates, BUI.Updates); const size_t NumLegalized = BUI.Updates.size(); BUI.FutureSuccessors.reserve(NumLegalized); BUI.FuturePredecessors.reserve(NumLegalized); // Use the legalized future updates to initialize future successors and // predecessors. Note that these sets will only decrease size over time, as // the next CFG snapshots slowly approach the actual (current) CFG. for (UpdateT &U : BUI.Updates) { BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()}); BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()}); } LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n"); LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U : reverse(BUI.Updates)) dbgs() << '\t' << U << "\n"); LLVM_DEBUG(dbgs() << "\n"); + // Recalculate the DominatorTree when the number of updates + // exceeds a threshold, which usually makes direct updating slower than + // recalculation. We select this threshold proportional to the + // size of the DominatorTree. The constant is selected + // by choosing the one with an acceptable performance on some real-world + // inputs. + + // Make unittests of the incremental algorithm work + if (DT.DomTreeNodes.size() <= 100) { + if (NumLegalized > DT.DomTreeNodes.size()) + CalculateFromScratch(DT, &BUI); + } else if (NumLegalized > DT.DomTreeNodes.size() / 40) + CalculateFromScratch(DT, &BUI); + // If the DominatorTree was recalculated at some point, stop the batch // updates. Full recalculations ignore batch updates and look at the actual // CFG. for (size_t i = 0; i < NumLegalized && !BUI.IsRecalculated; ++i) ApplyNextUpdate(DT, BUI); } // This function serves double purpose: // a) It removes redundant updates, which makes it easier to reverse-apply // them when traversing CFG. // b) It optimizes away updates that cancel each other out, as the end result // is the same. // // It relies on the property of the incremental updates that says that the // order of updates doesn't matter. This allows us to reorder them and end up // with the exact same DomTree every time. // // Following the same logic, the function doesn't care about the order of // input updates, so it's OK to pass it an unordered sequence of updates, that // doesn't make sense when applied sequentially, eg. performing double // insertions or deletions and then doing an opposite update. // // In the future, it should be possible to schedule updates in way that // minimizes the amount of work needed done during incremental updates. static void LegalizeUpdates(ArrayRef AllUpdates, SmallVectorImpl &Result) { LLVM_DEBUG(dbgs() << "Legalizing " << AllUpdates.size() << " updates\n"); // Count the total number of inserions of each edge. // Each insertion adds 1 and deletion subtracts 1. The end number should be // one of {-1 (deletion), 0 (NOP), +1 (insertion)}. Otherwise, the sequence // of updates contains multiple updates of the same kind and we assert for // that case. SmallDenseMap, int, 4> Operations; Operations.reserve(AllUpdates.size()); for (const auto &U : AllUpdates) { NodePtr From = U.getFrom(); NodePtr To = U.getTo(); if (IsPostDom) std::swap(From, To); // Reverse edge for postdominators. Operations[{From, To}] += (U.getKind() == UpdateKind::Insert ? 1 : -1); } Result.clear(); Result.reserve(Operations.size()); for (auto &Op : Operations) { const int NumInsertions = Op.second; assert(std::abs(NumInsertions) <= 1 && "Unbalanced operations!"); if (NumInsertions == 0) continue; const UpdateKind UK = NumInsertions > 0 ? UpdateKind::Insert : UpdateKind::Delete; Result.push_back({UK, Op.first.first, Op.first.second}); } // Make the order consistent by not relying on pointer values within the // set. Reuse the old Operations map. // In the future, we should sort by something else to minimize the amount // of work needed to perform the series of updates. for (size_t i = 0, e = AllUpdates.size(); i != e; ++i) { const auto &U = AllUpdates[i]; if (!IsPostDom) Operations[{U.getFrom(), U.getTo()}] = int(i); else Operations[{U.getTo(), U.getFrom()}] = int(i); } llvm::sort(Result.begin(), Result.end(), [&Operations](const UpdateT &A, const UpdateT &B) { return Operations[{A.getFrom(), A.getTo()}] > Operations[{B.getFrom(), B.getTo()}]; }); } static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) { assert(!BUI.Updates.empty() && "No updates to apply!"); UpdateT CurrentUpdate = BUI.Updates.pop_back_val(); LLVM_DEBUG(dbgs() << "Applying update: " << CurrentUpdate << "\n"); // Move to the next snapshot of the CFG by removing the reverse-applied // current update. Since updates are performed in the same order they are // legalized it's sufficient to pop the last item here. auto &FS = BUI.FutureSuccessors[CurrentUpdate.getFrom()]; assert(FS.back().getPointer() == CurrentUpdate.getTo() && FS.back().getInt() == CurrentUpdate.getKind()); FS.pop_back(); if (FS.empty()) BUI.FutureSuccessors.erase(CurrentUpdate.getFrom()); auto &FP = BUI.FuturePredecessors[CurrentUpdate.getTo()]; assert(FP.back().getPointer() == CurrentUpdate.getFrom() && FP.back().getInt() == CurrentUpdate.getKind()); FP.pop_back(); if (FP.empty()) BUI.FuturePredecessors.erase(CurrentUpdate.getTo()); if (CurrentUpdate.getKind() == UpdateKind::Insert) InsertEdge(DT, &BUI, CurrentUpdate.getFrom(), CurrentUpdate.getTo()); else DeleteEdge(DT, &BUI, CurrentUpdate.getFrom(), CurrentUpdate.getTo()); } //~~ //===--------------- DomTree correctness verification ---------------------=== //~~ // Check if the tree has correct roots. A DominatorTree always has a single // root which is the function's entry node. A PostDominatorTree can have // multiple roots - one for each node with no successors and for infinite // loops. // Running time: O(N). bool verifyRoots(const DomTreeT &DT) { if (!DT.Parent && !DT.Roots.empty()) { errs() << "Tree has no parent but has roots!\n"; errs().flush(); return false; } if (!IsPostDom) { if (DT.Roots.empty()) { errs() << "Tree doesn't have a root!\n"; errs().flush(); return false; } if (DT.getRoot() != GetEntryNode(DT)) { errs() << "Tree's root is not its parent's entry node!\n"; errs().flush(); return false; } } RootsT ComputedRoots = FindRoots(DT, nullptr); if (DT.Roots.size() != ComputedRoots.size() || !std::is_permutation(DT.Roots.begin(), DT.Roots.end(), ComputedRoots.begin())) { errs() << "Tree has different roots than freshly computed ones!\n"; errs() << "\tPDT roots: "; for (const NodePtr N : DT.Roots) errs() << BlockNamePrinter(N) << ", "; errs() << "\n\tComputed roots: "; for (const NodePtr N : ComputedRoots) errs() << BlockNamePrinter(N) << ", "; errs() << "\n"; errs().flush(); return false; } return true; } // Checks if the tree contains all reachable nodes in the input graph. // Running time: O(N). bool verifyReachability(const DomTreeT &DT) { clear(); doFullDFSWalk(DT, AlwaysDescend); for (auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr TN = NodeToTN.second.get(); const NodePtr BB = TN->getBlock(); // Virtual root has a corresponding virtual CFG node. if (DT.isVirtualRoot(TN)) continue; if (NodeToInfo.count(BB) == 0) { errs() << "DomTree node " << BlockNamePrinter(BB) << " not found by DFS walk!\n"; errs().flush(); return false; } } for (const NodePtr N : NumToNode) { if (N && !DT.getNode(N)) { errs() << "CFG node " << BlockNamePrinter(N) << " not found in the DomTree!\n"; errs().flush(); return false; } } return true; } // Check if for every parent with a level L in the tree all of its children // have level L + 1. // Running time: O(N). static bool VerifyLevels(const DomTreeT &DT) { for (auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr TN = NodeToTN.second.get(); const NodePtr BB = TN->getBlock(); if (!BB) continue; const TreeNodePtr IDom = TN->getIDom(); if (!IDom && TN->getLevel() != 0) { errs() << "Node without an IDom " << BlockNamePrinter(BB) << " has a nonzero level " << TN->getLevel() << "!\n"; errs().flush(); return false; } if (IDom && TN->getLevel() != IDom->getLevel() + 1) { errs() << "Node " << BlockNamePrinter(BB) << " has level " << TN->getLevel() << " while its IDom " << BlockNamePrinter(IDom->getBlock()) << " has level " << IDom->getLevel() << "!\n"; errs().flush(); return false; } } return true; } // Check if the computed DFS numbers are correct. Note that DFS info may not // be valid, and when that is the case, we don't verify the numbers. // Running time: O(N log(N)). static bool VerifyDFSNumbers(const DomTreeT &DT) { if (!DT.DFSInfoValid || !DT.Parent) return true; const NodePtr RootBB = IsPostDom ? nullptr : DT.getRoots()[0]; const TreeNodePtr Root = DT.getNode(RootBB); auto PrintNodeAndDFSNums = [](const TreeNodePtr TN) { errs() << BlockNamePrinter(TN) << " {" << TN->getDFSNumIn() << ", " << TN->getDFSNumOut() << '}'; }; // Verify the root's DFS In number. Although DFS numbering would also work // if we started from some other value, we assume 0-based numbering. if (Root->getDFSNumIn() != 0) { errs() << "DFSIn number for the tree root is not:\n\t"; PrintNodeAndDFSNums(Root); errs() << '\n'; errs().flush(); return false; } // For each tree node verify if children's DFS numbers cover their parent's // DFS numbers with no gaps. for (const auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr Node = NodeToTN.second.get(); // Handle tree leaves. if (Node->getChildren().empty()) { if (Node->getDFSNumIn() + 1 != Node->getDFSNumOut()) { errs() << "Tree leaf should have DFSOut = DFSIn + 1:\n\t"; PrintNodeAndDFSNums(Node); errs() << '\n'; errs().flush(); return false; } continue; } // Make a copy and sort it such that it is possible to check if there are // no gaps between DFS numbers of adjacent children. SmallVector Children(Node->begin(), Node->end()); llvm::sort(Children.begin(), Children.end(), [](const TreeNodePtr Ch1, const TreeNodePtr Ch2) { return Ch1->getDFSNumIn() < Ch2->getDFSNumIn(); }); auto PrintChildrenError = [Node, &Children, PrintNodeAndDFSNums]( const TreeNodePtr FirstCh, const TreeNodePtr SecondCh) { assert(FirstCh); errs() << "Incorrect DFS numbers for:\n\tParent "; PrintNodeAndDFSNums(Node); errs() << "\n\tChild "; PrintNodeAndDFSNums(FirstCh); if (SecondCh) { errs() << "\n\tSecond child "; PrintNodeAndDFSNums(SecondCh); } errs() << "\nAll children: "; for (const TreeNodePtr Ch : Children) { PrintNodeAndDFSNums(Ch); errs() << ", "; } errs() << '\n'; errs().flush(); }; if (Children.front()->getDFSNumIn() != Node->getDFSNumIn() + 1) { PrintChildrenError(Children.front(), nullptr); return false; } if (Children.back()->getDFSNumOut() + 1 != Node->getDFSNumOut()) { PrintChildrenError(Children.back(), nullptr); return false; } for (size_t i = 0, e = Children.size() - 1; i != e; ++i) { if (Children[i]->getDFSNumOut() + 1 != Children[i + 1]->getDFSNumIn()) { PrintChildrenError(Children[i], Children[i + 1]); return false; } } } return true; } // The below routines verify the correctness of the dominator tree relative to // the CFG it's coming from. A tree is a dominator tree iff it has two // properties, called the parent property and the sibling property. Tarjan // and Lengauer prove (but don't explicitly name) the properties as part of // the proofs in their 1972 paper, but the proofs are mostly part of proving // things about semidominators and idoms, and some of them are simply asserted // based on even earlier papers (see, e.g., lemma 2). Some papers refer to // these properties as "valid" and "co-valid". See, e.g., "Dominators, // directed bipolar orders, and independent spanning trees" by Loukas // Georgiadis and Robert E. Tarjan, as well as "Dominator Tree Verification // and Vertex-Disjoint Paths " by the same authors. // A very simple and direct explanation of these properties can be found in // "An Experimental Study of Dynamic Dominators", found at // https://arxiv.org/abs/1604.02711 // The easiest way to think of the parent property is that it's a requirement // of being a dominator. Let's just take immediate dominators. For PARENT to // be an immediate dominator of CHILD, all paths in the CFG must go through // PARENT before they hit CHILD. This implies that if you were to cut PARENT // out of the CFG, there should be no paths to CHILD that are reachable. If // there are, then you now have a path from PARENT to CHILD that goes around // PARENT and still reaches CHILD, which by definition, means PARENT can't be // a dominator of CHILD (let alone an immediate one). // The sibling property is similar. It says that for each pair of sibling // nodes in the dominator tree (LEFT and RIGHT) , they must not dominate each // other. If sibling LEFT dominated sibling RIGHT, it means there are no // paths in the CFG from sibling LEFT to sibling RIGHT that do not go through // LEFT, and thus, LEFT is really an ancestor (in the dominator tree) of // RIGHT, not a sibling. // It is possible to verify the parent and sibling properties in // linear time, but the algorithms are complex. Instead, we do it in a // straightforward N^2 and N^3 way below, using direct path reachability. // Checks if the tree has the parent property: if for all edges from V to W in // the input graph, such that V is reachable, the parent of W in the tree is // an ancestor of V in the tree. // Running time: O(N^2). // // This means that if a node gets disconnected from the graph, then all of // the nodes it dominated previously will now become unreachable. bool verifyParentProperty(const DomTreeT &DT) { for (auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr TN = NodeToTN.second.get(); const NodePtr BB = TN->getBlock(); if (!BB || TN->getChildren().empty()) continue; LLVM_DEBUG(dbgs() << "Verifying parent property of node " << BlockNamePrinter(TN) << "\n"); clear(); doFullDFSWalk(DT, [BB](NodePtr From, NodePtr To) { return From != BB && To != BB; }); for (TreeNodePtr Child : TN->getChildren()) if (NodeToInfo.count(Child->getBlock()) != 0) { errs() << "Child " << BlockNamePrinter(Child) << " reachable after its parent " << BlockNamePrinter(BB) << " is removed!\n"; errs().flush(); return false; } } return true; } // Check if the tree has sibling property: if a node V does not dominate a // node W for all siblings V and W in the tree. // Running time: O(N^3). // // This means that if a node gets disconnected from the graph, then all of its // siblings will now still be reachable. bool verifySiblingProperty(const DomTreeT &DT) { for (auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr TN = NodeToTN.second.get(); const NodePtr BB = TN->getBlock(); if (!BB || TN->getChildren().empty()) continue; const auto &Siblings = TN->getChildren(); for (const TreeNodePtr N : Siblings) { clear(); NodePtr BBN = N->getBlock(); doFullDFSWalk(DT, [BBN](NodePtr From, NodePtr To) { return From != BBN && To != BBN; }); for (const TreeNodePtr S : Siblings) { if (S == N) continue; if (NodeToInfo.count(S->getBlock()) == 0) { errs() << "Node " << BlockNamePrinter(S) << " not reachable when its sibling " << BlockNamePrinter(N) << " is removed!\n"; errs().flush(); return false; } } } } return true; } // Check if the given tree is the same as a freshly computed one for the same // Parent. // Running time: O(N^2), but faster in practise (same as tree construction). // // Note that this does not check if that the tree construction algorithm is // correct and should be only used for fast (but possibly unsound) // verification. static bool IsSameAsFreshTree(const DomTreeT &DT) { DomTreeT FreshTree; FreshTree.recalculate(*DT.Parent); const bool Different = DT.compare(FreshTree); if (Different) { errs() << (DT.isPostDominator() ? "Post" : "") << "DominatorTree is different than a freshly computed one!\n" << "\tCurrent:\n"; DT.print(errs()); errs() << "\n\tFreshly computed tree:\n"; FreshTree.print(errs()); errs().flush(); } return !Different; } }; template void Calculate(DomTreeT &DT) { SemiNCAInfo::CalculateFromScratch(DT, nullptr); } template void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, typename DomTreeT::NodePtr To) { if (DT.isPostDominator()) std::swap(From, To); SemiNCAInfo::InsertEdge(DT, nullptr, From, To); } template void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, typename DomTreeT::NodePtr To) { if (DT.isPostDominator()) std::swap(From, To); SemiNCAInfo::DeleteEdge(DT, nullptr, From, To); } template void ApplyUpdates(DomTreeT &DT, ArrayRef Updates) { SemiNCAInfo::ApplyUpdates(DT, Updates); } template bool Verify(const DomTreeT &DT, typename DomTreeT::VerificationLevel VL) { SemiNCAInfo SNCA(nullptr); // Simplist check is to compare against a new tree. This will also // usefully print the old and new trees, if they are different. if (!SNCA.IsSameAsFreshTree(DT)) return false; // Common checks to verify the properties of the tree. O(N log N) at worst if (!SNCA.verifyRoots(DT) || !SNCA.verifyReachability(DT) || !SNCA.VerifyLevels(DT) || !SNCA.VerifyDFSNumbers(DT)) return false; // Extra checks depending on VerificationLevel. Up to O(N^3) if (VL == DomTreeT::VerificationLevel::Basic || VL == DomTreeT::VerificationLevel::Full) if (!SNCA.verifyParentProperty(DT)) return false; if (VL == DomTreeT::VerificationLevel::Full) if (!SNCA.verifySiblingProperty(DT)) return false; return true; } } // namespace DomTreeBuilder } // namespace llvm #undef DEBUG_TYPE #endif Index: vendor/llvm/dist-release_70/include/llvm/Transforms/Utils/SSAUpdater.h =================================================================== --- vendor/llvm/dist-release_70/include/llvm/Transforms/Utils/SSAUpdater.h (revision 341364) +++ vendor/llvm/dist-release_70/include/llvm/Transforms/Utils/SSAUpdater.h (revision 341365) @@ -1,173 +1,177 @@ //===- SSAUpdater.h - Unstructured SSA Update Tool --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file declares the SSAUpdater class. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include namespace llvm { class BasicBlock; class Instruction; class LoadInst; class PHINode; template class SmallVectorImpl; template class SSAUpdaterTraits; class Type; class Use; class Value; /// Helper class for SSA formation on a set of values defined in /// multiple blocks. /// /// This is used when code duplication or another unstructured /// transformation wants to rewrite a set of uses of one value with uses of a /// set of values. class SSAUpdater { friend class SSAUpdaterTraits; private: /// This keeps track of which value to use on a per-block basis. When we /// insert PHI nodes, we keep track of them here. void *AV = nullptr; /// ProtoType holds the type of the values being rewritten. Type *ProtoType = nullptr; /// PHI nodes are given a name based on ProtoName. std::string ProtoName; /// If this is non-null, the SSAUpdater adds all PHI nodes that it creates to /// the vector. SmallVectorImpl *InsertedPHIs; public: /// If InsertedPHIs is specified, it will be filled /// in with all PHI Nodes created by rewriting. explicit SSAUpdater(SmallVectorImpl *InsertedPHIs = nullptr); SSAUpdater(const SSAUpdater &) = delete; SSAUpdater &operator=(const SSAUpdater &) = delete; ~SSAUpdater(); /// Reset this object to get ready for a new set of SSA updates with /// type 'Ty'. /// /// PHI nodes get a name based on 'Name'. void Initialize(Type *Ty, StringRef Name); /// Indicate that a rewritten value is available in the specified block /// with the specified value. void AddAvailableValue(BasicBlock *BB, Value *V); /// Return true if the SSAUpdater already has a value for the specified /// block. bool HasValueForBlock(BasicBlock *BB) const; + /// Return the value for the specified block if the SSAUpdater has one, + /// otherwise return nullptr. + Value *FindValueForBlock(BasicBlock *BB) const; + /// Construct SSA form, materializing a value that is live at the end /// of the specified block. Value *GetValueAtEndOfBlock(BasicBlock *BB); /// Construct SSA form, materializing a value that is live in the /// middle of the specified block. /// /// \c GetValueInMiddleOfBlock is the same as \c GetValueAtEndOfBlock except /// in one important case: if there is a definition of the rewritten value /// after the 'use' in BB. Consider code like this: /// /// \code /// X1 = ... /// SomeBB: /// use(X) /// X2 = ... /// br Cond, SomeBB, OutBB /// \endcode /// /// In this case, there are two values (X1 and X2) added to the AvailableVals /// set by the client of the rewriter, and those values are both live out of /// their respective blocks. However, the use of X happens in the *middle* of /// a block. Because of this, we need to insert a new PHI node in SomeBB to /// merge the appropriate values, and this value isn't live out of the block. Value *GetValueInMiddleOfBlock(BasicBlock *BB); /// Rewrite a use of the symbolic value. /// /// This handles PHI nodes, which use their value in the corresponding /// predecessor. Note that this will not work if the use is supposed to be /// rewritten to a value defined in the same block as the use, but above it. /// Any 'AddAvailableValue's added for the use's block will be considered to /// be below it. void RewriteUse(Use &U); /// Rewrite a use like \c RewriteUse but handling in-block definitions. /// /// This version of the method can rewrite uses in the same block as /// a definition, because it assumes that all uses of a value are below any /// inserted values. void RewriteUseAfterInsertions(Use &U); private: Value *GetValueAtEndOfBlockInternal(BasicBlock *BB); }; /// Helper class for promoting a collection of loads and stores into SSA /// Form using the SSAUpdater. /// /// This handles complexities that SSAUpdater doesn't, such as multiple loads /// and stores in one block. /// /// Clients of this class are expected to subclass this and implement the /// virtual methods. class LoadAndStorePromoter { protected: SSAUpdater &SSA; public: LoadAndStorePromoter(ArrayRef Insts, SSAUpdater &S, StringRef Name = StringRef()); virtual ~LoadAndStorePromoter() = default; /// This does the promotion. /// /// Insts is a list of loads and stores to promote, and Name is the basename /// for the PHIs to insert. After this is complete, the loads and stores are /// removed from the code. void run(const SmallVectorImpl &Insts) const; /// Return true if the specified instruction is in the Inst list. /// /// The Insts list is the one passed into the constructor. Clients should /// implement this with a more efficient version if possible. virtual bool isInstInList(Instruction *I, const SmallVectorImpl &Insts) const; /// This hook is invoked after all the stores are found and inserted as /// available values. virtual void doExtraRewritesBeforeFinalDeletion() const {} /// Clients can choose to implement this to get notified right before /// a load is RAUW'd another value. virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {} /// Called before each instruction is deleted. virtual void instructionDeleted(Instruction *I) const {} /// Called to update debug info associated with the instruction. virtual void updateDebugInfo(Instruction *I) const {} }; } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_SSAUPDATER_H Index: vendor/llvm/dist-release_70/include/llvm/Transforms/Utils/SSAUpdaterImpl.h =================================================================== --- vendor/llvm/dist-release_70/include/llvm/Transforms/Utils/SSAUpdaterImpl.h (revision 341364) +++ vendor/llvm/dist-release_70/include/llvm/Transforms/Utils/SSAUpdaterImpl.h (revision 341365) @@ -1,469 +1,468 @@ //===- SSAUpdaterImpl.h - SSA Updater Implementation ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file provides a template that implements the core algorithm for the // SSAUpdater and MachineSSAUpdater. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "ssaupdater" namespace llvm { template class SSAUpdaterTraits; template class SSAUpdaterImpl { private: UpdaterT *Updater; using Traits = SSAUpdaterTraits; using BlkT = typename Traits::BlkT; using ValT = typename Traits::ValT; using PhiT = typename Traits::PhiT; /// BBInfo - Per-basic block information used internally by SSAUpdaterImpl. /// The predecessors of each block are cached here since pred_iterator is /// slow and we need to iterate over the blocks at least a few times. class BBInfo { public: // Back-pointer to the corresponding block. BlkT *BB; // Value to use in this block. ValT AvailableVal; // Block that defines the available value. BBInfo *DefBB; // Postorder number. int BlkNum = 0; // Immediate dominator. BBInfo *IDom = nullptr; // Number of predecessor blocks. unsigned NumPreds = 0; // Array[NumPreds] of predecessor blocks. BBInfo **Preds = nullptr; // Marker for existing PHIs that match. PhiT *PHITag = nullptr; BBInfo(BlkT *ThisBB, ValT V) : BB(ThisBB), AvailableVal(V), DefBB(V ? this : nullptr) {} }; using AvailableValsTy = DenseMap; AvailableValsTy *AvailableVals; SmallVectorImpl *InsertedPHIs; using BlockListTy = SmallVectorImpl; using BBMapTy = DenseMap; BBMapTy BBMap; BumpPtrAllocator Allocator; public: explicit SSAUpdaterImpl(UpdaterT *U, AvailableValsTy *A, SmallVectorImpl *Ins) : Updater(U), AvailableVals(A), InsertedPHIs(Ins) {} /// GetValue - Check to see if AvailableVals has an entry for the specified /// BB and if so, return it. If not, construct SSA form by first /// calculating the required placement of PHIs and then inserting new PHIs /// where needed. ValT GetValue(BlkT *BB) { SmallVector BlockList; BBInfo *PseudoEntry = BuildBlockList(BB, &BlockList); // Special case: bail out if BB is unreachable. if (BlockList.size() == 0) { ValT V = Traits::GetUndefVal(BB, Updater); (*AvailableVals)[BB] = V; return V; } FindDominators(&BlockList, PseudoEntry); FindPHIPlacement(&BlockList); FindAvailableVals(&BlockList); return BBMap[BB]->DefBB->AvailableVal; } /// BuildBlockList - Starting from the specified basic block, traverse back /// through its predecessors until reaching blocks with known values. /// Create BBInfo structures for the blocks and append them to the block /// list. BBInfo *BuildBlockList(BlkT *BB, BlockListTy *BlockList) { SmallVector RootList; SmallVector WorkList; BBInfo *Info = new (Allocator) BBInfo(BB, 0); BBMap[BB] = Info; WorkList.push_back(Info); // Search backward from BB, creating BBInfos along the way and stopping // when reaching blocks that define the value. Record those defining // blocks on the RootList. SmallVector Preds; while (!WorkList.empty()) { Info = WorkList.pop_back_val(); Preds.clear(); Traits::FindPredecessorBlocks(Info->BB, &Preds); Info->NumPreds = Preds.size(); if (Info->NumPreds == 0) Info->Preds = nullptr; else Info->Preds = static_cast(Allocator.Allocate( Info->NumPreds * sizeof(BBInfo *), alignof(BBInfo *))); for (unsigned p = 0; p != Info->NumPreds; ++p) { BlkT *Pred = Preds[p]; // Check if BBMap already has a BBInfo for the predecessor block. typename BBMapTy::value_type &BBMapBucket = BBMap.FindAndConstruct(Pred); if (BBMapBucket.second) { Info->Preds[p] = BBMapBucket.second; continue; } // Create a new BBInfo for the predecessor. ValT PredVal = AvailableVals->lookup(Pred); BBInfo *PredInfo = new (Allocator) BBInfo(Pred, PredVal); BBMapBucket.second = PredInfo; Info->Preds[p] = PredInfo; if (PredInfo->AvailableVal) { RootList.push_back(PredInfo); continue; } WorkList.push_back(PredInfo); } } // Now that we know what blocks are backwards-reachable from the starting // block, do a forward depth-first traversal to assign postorder numbers // to those blocks. BBInfo *PseudoEntry = new (Allocator) BBInfo(nullptr, 0); unsigned BlkNum = 1; // Initialize the worklist with the roots from the backward traversal. while (!RootList.empty()) { Info = RootList.pop_back_val(); Info->IDom = PseudoEntry; Info->BlkNum = -1; WorkList.push_back(Info); } while (!WorkList.empty()) { Info = WorkList.back(); if (Info->BlkNum == -2) { // All the successors have been handled; assign the postorder number. Info->BlkNum = BlkNum++; // If not a root, put it on the BlockList. if (!Info->AvailableVal) BlockList->push_back(Info); WorkList.pop_back(); continue; } // Leave this entry on the worklist, but set its BlkNum to mark that its // successors have been put on the worklist. When it returns to the top // the list, after handling its successors, it will be assigned a // number. Info->BlkNum = -2; // Add unvisited successors to the work list. for (typename Traits::BlkSucc_iterator SI = Traits::BlkSucc_begin(Info->BB), E = Traits::BlkSucc_end(Info->BB); SI != E; ++SI) { BBInfo *SuccInfo = BBMap[*SI]; if (!SuccInfo || SuccInfo->BlkNum) continue; SuccInfo->BlkNum = -1; WorkList.push_back(SuccInfo); } } PseudoEntry->BlkNum = BlkNum; return PseudoEntry; } /// IntersectDominators - This is the dataflow lattice "meet" operation for /// finding dominators. Given two basic blocks, it walks up the dominator /// tree until it finds a common dominator of both. It uses the postorder /// number of the blocks to determine how to do that. BBInfo *IntersectDominators(BBInfo *Blk1, BBInfo *Blk2) { while (Blk1 != Blk2) { while (Blk1->BlkNum < Blk2->BlkNum) { Blk1 = Blk1->IDom; if (!Blk1) return Blk2; } while (Blk2->BlkNum < Blk1->BlkNum) { Blk2 = Blk2->IDom; if (!Blk2) return Blk1; } } return Blk1; } /// FindDominators - Calculate the dominator tree for the subset of the CFG /// corresponding to the basic blocks on the BlockList. This uses the /// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey /// and Kennedy, published in Software--Practice and Experience, 2001, /// 4:1-10. Because the CFG subset does not include any edges leading into /// blocks that define the value, the results are not the usual dominator /// tree. The CFG subset has a single pseudo-entry node with edges to a set /// of root nodes for blocks that define the value. The dominators for this /// subset CFG are not the standard dominators but they are adequate for /// placing PHIs within the subset CFG. void FindDominators(BlockListTy *BlockList, BBInfo *PseudoEntry) { bool Changed; do { Changed = false; // Iterate over the list in reverse order, i.e., forward on CFG edges. for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(), E = BlockList->rend(); I != E; ++I) { BBInfo *Info = *I; BBInfo *NewIDom = nullptr; // Iterate through the block's predecessors. for (unsigned p = 0; p != Info->NumPreds; ++p) { BBInfo *Pred = Info->Preds[p]; // Treat an unreachable predecessor as a definition with 'undef'. if (Pred->BlkNum == 0) { Pred->AvailableVal = Traits::GetUndefVal(Pred->BB, Updater); (*AvailableVals)[Pred->BB] = Pred->AvailableVal; Pred->DefBB = Pred; Pred->BlkNum = PseudoEntry->BlkNum; PseudoEntry->BlkNum++; } if (!NewIDom) NewIDom = Pred; else NewIDom = IntersectDominators(NewIDom, Pred); } // Check if the IDom value has changed. if (NewIDom && NewIDom != Info->IDom) { Info->IDom = NewIDom; Changed = true; } } } while (Changed); } /// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for /// any blocks containing definitions of the value. If one is found, then /// the successor of Pred is in the dominance frontier for the definition, /// and this function returns true. bool IsDefInDomFrontier(const BBInfo *Pred, const BBInfo *IDom) { for (; Pred != IDom; Pred = Pred->IDom) { if (Pred->DefBB == Pred) return true; } return false; } /// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers /// of the known definitions. Iteratively add PHIs in the dom frontiers /// until nothing changes. Along the way, keep track of the nearest /// dominating definitions for non-PHI blocks. void FindPHIPlacement(BlockListTy *BlockList) { bool Changed; do { Changed = false; // Iterate over the list in reverse order, i.e., forward on CFG edges. for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(), E = BlockList->rend(); I != E; ++I) { BBInfo *Info = *I; // If this block already needs a PHI, there is nothing to do here. if (Info->DefBB == Info) continue; // Default to use the same def as the immediate dominator. BBInfo *NewDefBB = Info->IDom->DefBB; for (unsigned p = 0; p != Info->NumPreds; ++p) { if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { // Need a PHI here. NewDefBB = Info; break; } } // Check if anything changed. if (NewDefBB != Info->DefBB) { Info->DefBB = NewDefBB; Changed = true; } } } while (Changed); } /// FindAvailableVal - If this block requires a PHI, first check if an /// existing PHI matches the PHI placement and reaching definitions computed /// earlier, and if not, create a new PHI. Visit all the block's /// predecessors to calculate the available value for each one and fill in /// the incoming values for a new PHI. void FindAvailableVals(BlockListTy *BlockList) { // Go through the worklist in forward order (i.e., backward through the CFG) // and check if existing PHIs can be used. If not, create empty PHIs where // they are needed. for (typename BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); I != E; ++I) { BBInfo *Info = *I; // Check if there needs to be a PHI in BB. if (Info->DefBB != Info) continue; // Look for an existing PHI. FindExistingPHI(Info->BB, BlockList); if (Info->AvailableVal) continue; ValT PHI = Traits::CreateEmptyPHI(Info->BB, Info->NumPreds, Updater); Info->AvailableVal = PHI; (*AvailableVals)[Info->BB] = PHI; } // Now go back through the worklist in reverse order to fill in the // arguments for any new PHIs added in the forward traversal. for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(), E = BlockList->rend(); I != E; ++I) { BBInfo *Info = *I; if (Info->DefBB != Info) { - // Record the available value at join nodes to speed up subsequent - // uses of this SSAUpdater for the same value. - if (Info->NumPreds > 1) - (*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal; + // Record the available value to speed up subsequent uses of this + // SSAUpdater for the same value. + (*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal; continue; } // Check if this block contains a newly added PHI. PhiT *PHI = Traits::ValueIsNewPHI(Info->AvailableVal, Updater); if (!PHI) continue; // Iterate through the block's predecessors. for (unsigned p = 0; p != Info->NumPreds; ++p) { BBInfo *PredInfo = Info->Preds[p]; BlkT *Pred = PredInfo->BB; // Skip to the nearest preceding definition. if (PredInfo->DefBB != PredInfo) PredInfo = PredInfo->DefBB; Traits::AddPHIOperand(PHI, PredInfo->AvailableVal, Pred); } LLVM_DEBUG(dbgs() << " Inserted PHI: " << *PHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(PHI); } } /// FindExistingPHI - Look through the PHI nodes in a block to see if any of /// them match what is needed. void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) { for (auto &SomePHI : BB->phis()) { if (CheckIfPHIMatches(&SomePHI)) { RecordMatchingPHIs(BlockList); break; } // Match failed: clear all the PHITag values. for (typename BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); I != E; ++I) (*I)->PHITag = nullptr; } } /// CheckIfPHIMatches - Check if a PHI node matches the placement and values /// in the BBMap. bool CheckIfPHIMatches(PhiT *PHI) { SmallVector WorkList; WorkList.push_back(PHI); // Mark that the block containing this PHI has been visited. BBMap[PHI->getParent()]->PHITag = PHI; while (!WorkList.empty()) { PHI = WorkList.pop_back_val(); // Iterate through the PHI's incoming values. for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI), E = Traits::PHI_end(PHI); I != E; ++I) { ValT IncomingVal = I.getIncomingValue(); BBInfo *PredInfo = BBMap[I.getIncomingBlock()]; // Skip to the nearest preceding definition. if (PredInfo->DefBB != PredInfo) PredInfo = PredInfo->DefBB; // Check if it matches the expected value. if (PredInfo->AvailableVal) { if (IncomingVal == PredInfo->AvailableVal) continue; return false; } // Check if the value is a PHI in the correct block. PhiT *IncomingPHIVal = Traits::ValueIsPHI(IncomingVal, Updater); if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB) return false; // If this block has already been visited, check if this PHI matches. if (PredInfo->PHITag) { if (IncomingPHIVal == PredInfo->PHITag) continue; return false; } PredInfo->PHITag = IncomingPHIVal; WorkList.push_back(IncomingPHIVal); } } return true; } /// RecordMatchingPHIs - For each PHI node that matches, record it in both /// the BBMap and the AvailableVals mapping. void RecordMatchingPHIs(BlockListTy *BlockList) { for (typename BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); I != E; ++I) if (PhiT *PHI = (*I)->PHITag) { BlkT *BB = PHI->getParent(); ValT PHIVal = Traits::GetPHIValue(PHI); (*AvailableVals)[BB] = PHIVal; BBMap[BB]->AvailableVal = PHIVal; } } }; } // end namespace llvm #undef DEBUG_TYPE // "ssaupdater" #endif // LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H Index: vendor/llvm/dist-release_70/lib/CodeGen/TargetLoweringObjectFileImpl.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/CodeGen/TargetLoweringObjectFileImpl.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/CodeGen/TargetLoweringObjectFileImpl.cpp (revision 341365) @@ -1,1587 +1,1588 @@ //===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements classes used to handle lowerings specific to common // object file formats. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Format.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include using namespace llvm; using namespace dwarf; static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, StringRef &Section) { SmallVector ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); for (const auto &MFE: ModuleFlags) { // Ignore flags with 'Require' behaviour. if (MFE.Behavior == Module::Require) continue; StringRef Key = MFE.Key->getString(); if (Key == "Objective-C Image Info Version") { Version = mdconst::extract(MFE.Val)->getZExtValue(); } else if (Key == "Objective-C Garbage Collection" || Key == "Objective-C GC Only" || Key == "Objective-C Is Simulated" || Key == "Objective-C Class Properties" || Key == "Objective-C Image Swift Version") { Flags |= mdconst::extract(MFE.Val)->getZExtValue(); } else if (Key == "Objective-C Image Info Section") { Section = cast(MFE.Val)->getString(); } } } //===----------------------------------------------------------------------===// // ELF //===----------------------------------------------------------------------===// void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TgtM) { TargetLoweringObjectFile::Initialize(Ctx, TgtM); TM = &TgtM; } void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { auto &C = getContext(); if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS, ELF::SHF_EXCLUDE); Streamer.SwitchSection(S); for (const auto &Operand : LinkerOptions->operands()) { if (cast(Operand)->getNumOperands() != 2) report_fatal_error("invalid llvm.linker.options"); for (const auto &Option : cast(Operand)->operands()) { Streamer.EmitBytes(cast(Option)->getString()); Streamer.EmitIntValue(0, 1); } } } unsigned Version = 0; unsigned Flags = 0; StringRef Section; GetObjCImageInfo(M, Version, Flags, Section); if (!Section.empty()) { auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); Streamer.SwitchSection(S); Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.EmitIntValue(Version, 4); Streamer.EmitIntValue(Flags, 4); Streamer.AddBlankLine(); } SmallVector ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); MDNode *CFGProfile = nullptr; for (const auto &MFE : ModuleFlags) { StringRef Key = MFE.Key->getString(); if (Key == "CG Profile") { CFGProfile = cast(MFE.Val); break; } } if (!CFGProfile) return; auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * { if (!MDO) return nullptr; auto V = cast(MDO); const Function *F = cast(V->getValue()); return TM->getSymbol(F); }; for (const auto &Edge : CFGProfile->operands()) { MDNode *E = cast(Edge); const MCSymbol *From = GetSym(E->getOperand(0)); const MCSymbol *To = GetSym(E->getOperand(1)); // Skip null functions. This can happen if functions are dead stripped after // the CGProfile pass has been run. if (!From || !To) continue; uint64_t Count = cast(E->getOperand(2)) ->getValue() ->getUniqueInteger() .getZExtValue(); Streamer.emitCGProfileEntry( MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C), MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count); } } MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); if ((Encoding & 0x80) == DW_EH_PE_indirect) return getContext().getOrCreateSymbol(StringRef("DW.ref.") + TM.getSymbol(GV)->getName()); if ((Encoding & 0x70) == DW_EH_PE_absptr) return TM.getSymbol(GV); report_fatal_error("We do not support this DWARF encoding yet!"); } void TargetLoweringObjectFileELF::emitPersonalityValue( MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); MCSymbolELF *Label = cast(getContext().getOrCreateSymbol(NameData)); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(), ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0)); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); Streamer.EmitLabel(Label); Streamer.EmitSymbolValue(Sym, Size); } const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { if (Encoding & DW_EH_PE_indirect) { MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo(); MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM); // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } return TargetLoweringObjectFile:: getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()), Encoding & ~DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM, MMI, Streamer); } static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are not the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given // section(".eh_frame") gcc will produce: // // .section .eh_frame,"a",@progbits if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF, /*AddSegmentInfo=*/false)) return SectionKind::getMetadata(); if (Name.empty() || Name[0] != '.') return K; // Default implementation based on some magic section names. if (Name == ".bss" || Name.startswith(".bss.") || Name.startswith(".gnu.linkonce.b.") || Name.startswith(".llvm.linkonce.b.") || Name == ".sbss" || Name.startswith(".sbss.") || Name.startswith(".gnu.linkonce.sb.") || Name.startswith(".llvm.linkonce.sb.")) return SectionKind::getBSS(); if (Name == ".tdata" || Name.startswith(".tdata.") || Name.startswith(".gnu.linkonce.td.") || Name.startswith(".llvm.linkonce.td.")) return SectionKind::getThreadData(); if (Name == ".tbss" || Name.startswith(".tbss.") || Name.startswith(".gnu.linkonce.tb.") || Name.startswith(".llvm.linkonce.tb.")) return SectionKind::getThreadBSS(); return K; } static unsigned getELFSectionType(StringRef Name, SectionKind K) { // Use SHT_NOTE for section whose name starts with ".note" to allow // emitting ELF notes from C variable declaration. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609 if (Name.startswith(".note")) return ELF::SHT_NOTE; if (Name == ".init_array") return ELF::SHT_INIT_ARRAY; if (Name == ".fini_array") return ELF::SHT_FINI_ARRAY; if (Name == ".preinit_array") return ELF::SHT_PREINIT_ARRAY; if (K.isBSS() || K.isThreadBSS()) return ELF::SHT_NOBITS; return ELF::SHT_PROGBITS; } static unsigned getELFSectionFlags(SectionKind K) { unsigned Flags = 0; if (!K.isMetadata()) Flags |= ELF::SHF_ALLOC; if (K.isText()) Flags |= ELF::SHF_EXECINSTR; if (K.isExecuteOnly()) Flags |= ELF::SHF_ARM_PURECODE; if (K.isWriteable()) Flags |= ELF::SHF_WRITE; if (K.isThreadLocal()) Flags |= ELF::SHF_TLS; if (K.isMergeableCString() || K.isMergeableConst()) Flags |= ELF::SHF_MERGE; if (K.isMergeableCString()) Flags |= ELF::SHF_STRINGS; return Flags; } static const Comdat *getELFComdat(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); if (!C) return nullptr; if (C->getSelectionKind() != Comdat::Any) report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" + C->getName() + "' cannot be lowered."); return C; } static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, const TargetMachine &TM) { MDNode *MD = GO->getMetadata(LLVMContext::MD_associated); if (!MD) return nullptr; const MDOperand &Op = MD->getOperand(0); if (!Op.get()) return nullptr; auto *VM = dyn_cast(Op); if (!VM) report_fatal_error("MD_associated operand is not ValueAsMetadata"); GlobalObject *OtherGO = dyn_cast(VM->getValue()); return OtherGO ? dyn_cast(TM.getSymbol(OtherGO)) : nullptr; } MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef SectionName = GO->getSection(); // Check if '#pragma clang section' name is applicable. // Note that pragma directive overrides -ffunction-section, -fdata-section // and so section name is exactly as user specified and not uniqued. const GlobalVariable *GV = dyn_cast(GO); if (GV && GV->hasImplicitSection()) { auto Attrs = GV->getAttributes(); if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) { SectionName = Attrs.getAttribute("bss-section").getValueAsString(); } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { SectionName = Attrs.getAttribute("data-section").getValueAsString(); } } const Function *F = dyn_cast(GO); if (F && F->hasFnAttribute("implicit-section-name")) { SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); } // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); if (const Comdat *C = getELFComdat(GO)) { Group = C->getName(); Flags |= ELF::SHF_GROUP; } // A section can have at most one associated section. Put each global with // MD_associated in a unique section. unsigned UniqueID = MCContext::GenericSectionID; const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM); if (AssociatedSymbol) { UniqueID = NextUniqueID++; Flags |= ELF::SHF_LINK_ORDER; } MCSectionELF *Section = getContext().getELFSection( SectionName, getELFSectionType(SectionName, Kind), Flags, /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol); // Make sure that we did not get some other section with incompatible sh_link. // This should not be possible due to UniqueID code above. assert(Section->getAssociatedSymbol() == AssociatedSymbol && "Associated symbol mismatch between sections"); return Section; } /// Return the section prefix name used by options FunctionsSections and /// DataSections. static StringRef getSectionPrefixForGlobal(SectionKind Kind) { if (Kind.isText()) return ".text"; if (Kind.isReadOnly()) return ".rodata"; if (Kind.isBSS()) return ".bss"; if (Kind.isThreadData()) return ".tdata"; if (Kind.isThreadBSS()) return ".tbss"; if (Kind.isData()) return ".data"; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); return ".data.rel.ro"; } static unsigned getEntrySizeForKind(SectionKind Kind) { if (Kind.isMergeable1ByteCString()) return 1; else if (Kind.isMergeable2ByteCString()) return 2; else if (Kind.isMergeable4ByteCString()) return 4; else if (Kind.isMergeableConst4()) return 4; else if (Kind.isMergeableConst8()) return 8; else if (Kind.isMergeableConst16()) return 16; else if (Kind.isMergeableConst32()) return 32; else { // We shouldn't have mergeable C strings or mergeable constants that we // didn't handle above. assert(!Kind.isMergeableCString() && "unknown string width"); assert(!Kind.isMergeableConst() && "unknown data width"); return 0; } } static MCSectionELF *selectELFSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags, unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) { StringRef Group = ""; if (const Comdat *C = getELFComdat(GO)) { Flags |= ELF::SHF_GROUP; Group = C->getName(); } // Get the section entry size based on the kind. unsigned EntrySize = getEntrySizeForKind(Kind); SmallString<128> Name; if (Kind.isMergeableCString()) { // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment( cast(GO)); std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; Name = SizeSpec + utostr(Align); } else if (Kind.isMergeableConst()) { Name = ".rodata.cst"; Name += utostr(EntrySize); } else { Name = getSectionPrefixForGlobal(Kind); } if (const auto *F = dyn_cast(GO)) { const auto &OptionalPrefix = F->getSectionPrefix(); if (OptionalPrefix) Name += *OptionalPrefix; } unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniqueSection) { if (TM.getUniqueSectionNames()) { Name.push_back('.'); TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/); } else { UniqueID = *NextUniqueID; (*NextUniqueID)++; } } // Use 0 as the unique ID for execute-only text. if (Kind.isExecuteOnly()) UniqueID = 0; return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, EntrySize, Group, UniqueID, AssociatedSymbol); } MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { unsigned Flags = getELFSectionFlags(Kind); // If we have -ffunction-section or -fdata-section then we should emit the // global value to a uniqued section specifically for it. bool EmitUniqueSection = false; if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { if (Kind.isText()) EmitUniqueSection = TM.getFunctionSections(); else EmitUniqueSection = TM.getDataSections(); } EmitUniqueSection |= GO->hasComdat(); const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM); if (AssociatedSymbol) { EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } MCSectionELF *Section = selectELFSectionForGlobal( getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags, &NextUniqueID, AssociatedSymbol); assert(Section->getAssociatedSymbol() == AssociatedSymbol); return Section; } MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( const Function &F, const TargetMachine &TM) const { // If the function can be removed, produce a unique section so that // the table doesn't prevent the removal. const Comdat *C = F.getComdat(); bool EmitUniqueSection = TM.getFunctionSections() || C; if (!EmitUniqueSection) return ReadOnlySection; return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC, &NextUniqueID, /* AssociatedSymbol */ nullptr); } bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( bool UsesLabelDifference, const Function &F) const { // We can always create relative relocations, so use another section // that can be marked non-executable. return false; } /// Given a mergeable constant with the specified size and relocation /// information, return a section that it should be placed in. MCSection *TargetLoweringObjectFileELF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, unsigned &Align) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) return MergeableConst8Section; if (Kind.isMergeableConst16() && MergeableConst16Section) return MergeableConst16Section; if (Kind.isMergeableConst32() && MergeableConst32Section) return MergeableConst32Section; if (Kind.isReadOnly()) return ReadOnlySection; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); return DataRelROSection; } static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, bool IsCtor, unsigned Priority, const MCSymbol *KeySym) { std::string Name; unsigned Type; unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; StringRef COMDAT = KeySym ? KeySym->getName() : ""; if (KeySym) Flags |= ELF::SHF_GROUP; if (UseInitArray) { if (IsCtor) { Type = ELF::SHT_INIT_ARRAY; Name = ".init_array"; } else { Type = ELF::SHT_FINI_ARRAY; Name = ".fini_array"; } if (Priority != 65535) { Name += '.'; Name += utostr(Priority); } } else { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (IsCtor) Name = ".ctors"; else Name = ".dtors"; if (Priority != 65535) raw_string_ostream(Name) << format(".%05u", 65535 - Priority); Type = ELF::SHT_PROGBITS; } return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT); } MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getStaticStructorSection(getContext(), UseInitArray, true, Priority, KeySym); } MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getStaticStructorSection(getContext(), UseInitArray, false, Priority, KeySym); } const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { // We may only use a PLT-relative relocation to refer to unnamed_addr // functions. if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) return nullptr; // Basic sanity checks. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || RHS->isThreadLocal()) return nullptr; return MCBinaryExpr::createSub( MCSymbolRefExpr::create(TM.getSymbol(LHS), PLTRelativeVariantKind, getContext()), MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } void TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { UseInitArray = UseInitArray_; MCContext &Ctx = getContext(); if (!UseInitArray) { StaticCtorSection = Ctx.getELFSection(".ctors", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); StaticDtorSection = Ctx.getELFSection(".dtors", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); return; } StaticCtorSection = Ctx.getELFSection(".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); StaticDtorSection = Ctx.getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); } //===----------------------------------------------------------------------===// // MachO //===----------------------------------------------------------------------===// TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() : TargetLoweringObjectFile() { SupportIndirectSymViaGOTPCRel = true; } void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); if (TM.getRelocationModel() == Reloc::Static) { StaticCtorSection = Ctx.getMachOSection("__TEXT", "__constructor", 0, SectionKind::getData()); StaticDtorSection = Ctx.getMachOSection("__TEXT", "__destructor", 0, SectionKind::getData()); } else { StaticCtorSection = Ctx.getMachOSection("__DATA", "__mod_init_func", MachO::S_MOD_INIT_FUNC_POINTERS, SectionKind::getData()); StaticDtorSection = Ctx.getMachOSection("__DATA", "__mod_term_func", MachO::S_MOD_TERM_FUNC_POINTERS, SectionKind::getData()); } } void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { // Emit the linker options if present. if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { for (const auto &Option : LinkerOptions->operands()) { SmallVector StrOptions; for (const auto &Piece : cast(Option)->operands()) StrOptions.push_back(cast(Piece)->getString()); Streamer.EmitLinkerOptions(StrOptions); } } unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; StringRef SectionVal; GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal); // The section is mandatory. If we don't have it, then we don't have GC info. if (SectionVal.empty()) return; StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) // If invalid, report the error with report_fatal_error. report_fatal_error("Invalid section specifier '" + Section + "': " + ErrorCode + "."); // Get the section. MCSectionMachO *S = getContext().getMachOSection( Segment, Section, TAA, StubSize, SectionKind::getData()); Streamer.SwitchSection(S); Streamer.EmitLabel(getContext(). getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); Streamer.EmitIntValue(VersionVal, 4); Streamer.EmitIntValue(ImageInfoFlags, 4); Streamer.AddBlankLine(); } static void checkMachOComdat(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); if (!C) return; report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() + "' cannot be lowered."); } MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; checkMachOComdat(GO); std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. report_fatal_error("Global variable '" + GO->getName() + "' has an invalid section specifier '" + GO->getSection() + "': " + ErrorCode + "."); } // Get the section. MCSectionMachO *S = getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // If TAA wasn't set by ParseSectionSpecifier() above, // use the value returned by getMachOSection() as a default. if (!TAAParsed) TAA = S->getTypeAndAttributes(); // Okay, now that we got the section, verify that the TAA & StubSize agree. // If the user declared multiple globals with different section flags, we need // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { // If invalid, report the error with report_fatal_error. report_fatal_error("Global variable '" + GO->getName() + "' section type or attributes does not match previous" " section specifier"); } return S; } MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { checkMachOComdat(GO); // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; if (Kind.isThreadData()) return TLSDataSection; if (Kind.isText()) return GO->isWeakForLinker() ? TextCoalSection : TextSection; // If this is weak/linkonce, put this in a coalescable section, either in text // or data depending on if it is writable. if (GO->isWeakForLinker()) { if (Kind.isReadOnly()) return ConstTextCoalSection; if (Kind.isReadOnlyWithRel()) return ConstDataCoalSection; return DataCoalSection; } // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && GO->getParent()->getDataLayout().getPreferredAlignment( cast(GO)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() && GO->getParent()->getDataLayout().getPreferredAlignment( cast(GO)) < 32) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or // 'L' can be merged, so we only try merging GVs with private linkage. if (GO->hasPrivateLinkage() && Kind.isMergeableConst()) { if (Kind.isMergeableConst4()) return FourByteConstantSection; if (Kind.isMergeableConst8()) return EightByteConstantSection; if (Kind.isMergeableConst16()) return SixteenByteConstantSection; } // Otherwise, if it is readonly, but not something we can specially optimize, // just drop it in .const. if (Kind.isReadOnly()) return ReadOnlySection; // If this is marked const, put it into a const section. But if the dynamic // linker needs to write to it, put it in the data segment. if (Kind.isReadOnlyWithRel()) return ConstDataSection; // Put zero initialized globals with strong external linkage in the // DATA, __common section with the .zerofill directive. if (Kind.isBSSExtern()) return DataCommonSection; // Put zero initialized globals with local linkage in __DATA,__bss directive // with the .zerofill directive (aka .lcomm). if (Kind.isBSSLocal()) return DataBSSSection; // Otherwise, just drop the variable in the normal data section. return DataSection; } MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, unsigned &Align) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. if (Kind.isData() || Kind.isReadOnlyWithRel()) return ConstDataSection; if (Kind.isMergeableConst4()) return FourByteConstantSection; if (Kind.isMergeableConst8()) return EightByteConstantSection; if (Kind.isMergeableConst16()) return SixteenByteConstantSection; return ReadOnlySection; // .const } const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // The mach-o version of this method defaults to returning a stub reference. if (Encoding & DW_EH_PE_indirect) { MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo(); MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } return TargetLoweringObjectFile:: getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()), Encoding & ~DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM, MMI, Streamer); } MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { // The mach-o version of this method defaults to returning a stub reference. MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo(); MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } return SSym; } const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation // as 64-bit do, we replace the GOT equivalent by accessing the final symbol // through a non_lazy_ptr stub instead. One advantage is that it allows the // computation of deltas to final external symbols. Example: // // _extgotequiv: // .long _extfoo // // _delta: // .long _extgotequiv-_delta // // is transformed to: // // _delta: // .long L_extfoo$non_lazy_ptr-(_delta+0) // // .section __IMPORT,__pointers,non_lazy_symbol_pointers // L_extfoo$non_lazy_ptr: // .indirect_symbol _extfoo // .long 0 // MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo(); MCContext &Ctx = getContext(); // The offset must consider the original displacement from the base symbol // since 32-bit targets don't have a GOTPCREL to fold the PC displacement. Offset = -MV.getConstant(); const MCSymbol *BaseSym = &MV.getSymB()->getSymbol(); // Access the final symbol via sym$non_lazy_ptr and generate the appropriated // non_lazy_ptr stubs. SmallString<128> Name; StringRef Suffix = "$non_lazy_ptr"; Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix(); Name += Sym->getName(); Name += Suffix; MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(const_cast(Sym), true /* access indirectly */); const MCExpr *BSymExpr = MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); const MCExpr *LHS = MCSymbolRefExpr::create(Stub, MCSymbolRefExpr::VK_None, Ctx); if (!Offset) return MCBinaryExpr::createSub(LHS, BSymExpr, Ctx); const MCExpr *RHS = MCBinaryExpr::createAdd(BSymExpr, MCConstantExpr::create(Offset, Ctx), Ctx); return MCBinaryExpr::createSub(LHS, RHS, Ctx); } static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, const MCSection &Section) { if (!AsmInfo.isSectionAtomizableBySymbols(Section)) return true; // If it is not dead stripped, it is safe to use private labels. const MCSectionMachO &SMO = cast(Section); if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) return true; return false; } void TargetLoweringObjectFileMachO::getNameWithPrefix( SmallVectorImpl &OutName, const GlobalValue *GV, const TargetMachine &TM) const { bool CannotUsePrivateLabel = true; if (auto *GO = GV->getBaseObject()) { SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM); const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM); CannotUsePrivateLabel = !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection); } getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); } //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// static unsigned getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) { unsigned Flags = 0; bool isThumb = TM.getTargetTriple().getArch() == Triple::thumb; if (K.isMetadata()) Flags |= COFF::IMAGE_SCN_MEM_DISCARDABLE; else if (K.isText()) Flags |= COFF::IMAGE_SCN_MEM_EXECUTE | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_CNT_CODE | (isThumb ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0); else if (K.isBSS()) Flags |= COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; else if (K.isThreadLocal()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; else if (K.isReadOnly() || K.isReadOnlyWithRel()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ; else if (K.isWriteable()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; return Flags; } static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); assert(C && "expected GV to have a Comdat!"); StringRef ComdatGVName = C->getName(); const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName); if (!ComdatGV) report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + "' does not exist."); if (ComdatGV->getComdat() != C) report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + "' is not a key for its COMDAT."); return ComdatGV; } static int getSelectionForCOFF(const GlobalValue *GV) { if (const Comdat *C = GV->getComdat()) { const GlobalValue *ComdatKey = getComdatGVForCOFF(GV); if (const auto *GA = dyn_cast(ComdatKey)) ComdatKey = GA->getBaseObject(); if (ComdatKey == GV) { switch (C->getSelectionKind()) { case Comdat::Any: return COFF::IMAGE_COMDAT_SELECT_ANY; case Comdat::ExactMatch: return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH; case Comdat::Largest: return COFF::IMAGE_COMDAT_SELECT_LARGEST; case Comdat::NoDuplicates: return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; case Comdat::SameSize: return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE; } } else { return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE; } } return 0; } MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind, TM); StringRef Name = GO->getSection(); StringRef COMDATSymName = ""; if (GO->hasComdat()) { Selection = getSelectionForCOFF(GO); const GlobalValue *ComdatGV; if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) ComdatGV = getComdatGVForCOFF(GO); else ComdatGV = GO; if (!ComdatGV->hasPrivateLinkage()) { MCSymbol *Sym = TM.getSymbol(ComdatGV); COMDATSymName = Sym->getName(); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; } else { Selection = 0; } } return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection); } static StringRef getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) return ".text"; if (Kind.isBSS()) return ".bss"; if (Kind.isThreadLocal()) return ".tls$"; if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) return ".rdata"; return ".data"; } MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // If we have -ffunction-sections then we should emit the global value to a // uniqued section specifically for it. bool EmitUniquedSection; if (Kind.isText()) EmitUniquedSection = TM.getFunctionSections(); else EmitUniquedSection = TM.getDataSections(); if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) { SmallString<256> Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; int Selection = getSelectionForCOFF(GO); if (!Selection) Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; const GlobalValue *ComdatGV; if (GO->hasComdat()) ComdatGV = getComdatGVForCOFF(GO); else ComdatGV = GO; unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniquedSection) UniqueID = NextUniqueID++; if (!ComdatGV->hasPrivateLinkage()) { MCSymbol *Sym = TM.getSymbol(ComdatGV); StringRef COMDATSymName = Sym->getName(); - // Append "$symbol" to the section name when targetting mingw. The ld.bfd + // Append "$symbol" to the section name *before* IR-level mangling is + // applied when targetting mingw. This is what GCC does, and the ld.bfd // COFF linker will not properly handle comdats otherwise. if (getTargetTriple().isWindowsGNUEnvironment()) - raw_svector_ostream(Name) << '$' << COMDATSymName; + raw_svector_ostream(Name) << '$' << ComdatGV->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection, UniqueID); } else { SmallString<256> TmpData; getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true); return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, Selection, UniqueID); } } if (Kind.isText()) return TextSection; if (Kind.isThreadLocal()) return TLSDataSection; if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) return ReadOnlySection; // Note: we claim that common symbols are put in BSSSection, but they are // really emitted with the magic .comm directive, which creates a symbol table // entry but not a section. if (Kind.isBSS() || Kind.isCommon()) return BSSSection; return DataSection; } void TargetLoweringObjectFileCOFF::getNameWithPrefix( SmallVectorImpl &OutName, const GlobalValue *GV, const TargetMachine &TM) const { bool CannotUsePrivateLabel = false; if (GV->hasPrivateLinkage() && ((isa(GV) && TM.getFunctionSections()) || (isa(GV) && TM.getDataSections()))) CannotUsePrivateLabel = true; getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); } MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( const Function &F, const TargetMachine &TM) const { // If the function can be removed, produce a unique section so that // the table doesn't prevent the removal. const Comdat *C = F.getComdat(); bool EmitUniqueSection = TM.getFunctionSections() || C; if (!EmitUniqueSection) return ReadOnlySection; // FIXME: we should produce a symbol for F instead. if (F.hasPrivateLinkage()) return ReadOnlySection; MCSymbol *Sym = TM.getSymbol(&F); StringRef COMDATSymName = Sym->getName(); SectionKind Kind = SectionKind::getReadOnly(); StringRef SecName = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; unsigned UniqueID = NextUniqueID++; return getContext().getCOFFSection( SecName, Characteristics, Kind, COMDATSymName, COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for // linker. MCSection *Sec = getDrectveSection(); Streamer.SwitchSection(Sec); for (const auto &Option : LinkerOptions->operands()) { for (const auto &Piece : cast(Option)->operands()) { // Lead with a space for consistency with our dllexport implementation. std::string Directive(" "); Directive.append(cast(Piece)->getString()); Streamer.EmitBytes(Directive); } } } unsigned Version = 0; unsigned Flags = 0; StringRef Section; GetObjCImageInfo(M, Version, Flags, Section); if (Section.empty()) return; auto &C = getContext(); auto *S = C.getCOFFSection( Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); Streamer.SwitchSection(S); Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.EmitIntValue(Version, 4); Streamer.EmitIntValue(Flags, 4); Streamer.AddBlankLine(); } void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); const Triple &T = TM.getTargetTriple(); if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); StaticDtorSection = Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); } else { StaticCtorSection = Ctx.getCOFFSection( ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getData()); StaticDtorSection = Ctx.getCOFFSection( ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getData()); } } static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, const Triple &T, bool IsCtor, unsigned Priority, const MCSymbol *KeySym, MCSectionCOFF *Default) { if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) return Ctx.getAssociativeCOFFSection(Default, KeySym, 0); std::string Name = IsCtor ? ".ctors" : ".dtors"; if (Priority != 65535) raw_string_ostream(Name) << format(".%05u", 65535 - Priority); return Ctx.getAssociativeCOFFSection( Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getData()), KeySym, 0); } MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getCOFFStaticStructorSection(getContext(), getTargetTriple(), true, Priority, KeySym, cast(StaticCtorSection)); } MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getCOFFStaticStructorSection(getContext(), getTargetTriple(), false, Priority, KeySym, cast(StaticDtorSection)); } void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( raw_ostream &OS, const GlobalValue *GV) const { emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler()); } void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed( raw_ostream &OS, const GlobalValue *GV) const { emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); } const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { const Triple &T = TM.getTargetTriple(); if (!T.isKnownWindowsMSVCEnvironment() && !T.isWindowsItaniumEnvironment() && !T.isWindowsCoreCLREnvironment()) return nullptr; // Our symbols should exist in address space zero, cowardly no-op if // otherwise. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0) return nullptr; // Both ptrtoint instructions must wrap global objects: // - Only global variables are eligible for image relative relocations. // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable. // We expect __ImageBase to be a global variable without a section, externally // defined. // // It should look something like this: @__ImageBase = external constant i8 if (!isa(LHS) || !isa(RHS) || LHS->isThreadLocal() || RHS->isThreadLocal() || RHS->getName() != "__ImageBase" || !RHS->hasExternalLinkage() || cast(RHS)->hasInitializer() || RHS->hasSection()) return nullptr; return MCSymbolRefExpr::create(TM.getSymbol(LHS), MCSymbolRefExpr::VK_COFF_IMGREL32, getContext()); } static std::string APIntToHexString(const APInt &AI) { unsigned Width = (AI.getBitWidth() / 8) * 2; std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true); unsigned Size = HexString.size(); assert(Width >= Size && "hex string is too large!"); HexString.insert(HexString.begin(), Width - Size, '0'); return HexString; } static std::string scalarConstantToHexString(const Constant *C) { Type *Ty = C->getType(); if (isa(C)) { return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits())); } else if (const auto *CFP = dyn_cast(C)) { return APIntToHexString(CFP->getValueAPF().bitcastToAPInt()); } else if (const auto *CI = dyn_cast(C)) { return APIntToHexString(CI->getValue()); } else { unsigned NumElements; if (isa(Ty)) NumElements = Ty->getVectorNumElements(); else NumElements = Ty->getArrayNumElements(); std::string HexString; for (int I = NumElements - 1, E = -1; I != E; --I) HexString += scalarConstantToHexString(C->getAggregateElement(I)); return HexString; } } MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( const DataLayout &DL, SectionKind Kind, const Constant *C, unsigned &Align) const { if (Kind.isMergeableConst() && C && getContext().getAsmInfo()->hasCOFFComdatConstants()) { // This creates comdat sections with the given symbol name, but unless // AsmPrinter::GetCPISymbol actually makes the symbol global, the symbol // will be created with a null storage class, which makes GNU binutils // error out. const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_LNK_COMDAT; std::string COMDATSymName; if (Kind.isMergeableConst4()) { if (Align <= 4) { COMDATSymName = "__real@" + scalarConstantToHexString(C); Align = 4; } } else if (Kind.isMergeableConst8()) { if (Align <= 8) { COMDATSymName = "__real@" + scalarConstantToHexString(C); Align = 8; } } else if (Kind.isMergeableConst16()) { // FIXME: These may not be appropriate for non-x86 architectures. if (Align <= 16) { COMDATSymName = "__xmm@" + scalarConstantToHexString(C); Align = 16; } } else if (Kind.isMergeableConst32()) { if (Align <= 32) { COMDATSymName = "__ymm@" + scalarConstantToHexString(C); Align = 32; } } if (!COMDATSymName.empty()) return getContext().getCOFFSection(".rdata", Characteristics, Kind, COMDATSymName, COFF::IMAGE_COMDAT_SELECT_ANY); } return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, Align); } //===----------------------------------------------------------------------===// // Wasm //===----------------------------------------------------------------------===// static const Comdat *getWasmComdat(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); if (!C) return nullptr; if (C->getSelectionKind() != Comdat::Any) report_fatal_error("WebAssembly COMDATs only support " "SelectionKind::Any, '" + C->getName() + "' cannot be " "lowered."); return C; } static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { // If we're told we have function data, then use that. if (K.isText()) return SectionKind::getText(); // Otherwise, ignore whatever section type the generic impl detected and use // a plain data section. return SectionKind::getData(); } MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // We don't support explict section names for functions in the wasm object // format. Each function has to be in its own unique section. if (isa(GO)) { return SelectSectionForGlobal(GO, Kind, TM); } StringRef Name = GO->getSection(); Kind = getWasmKindForNamedSection(Name, Kind); StringRef Group = ""; if (const Comdat *C = getWasmComdat(GO)) { Group = C->getName(); } return getContext().getWasmSection(Name, Kind, Group, MCContext::GenericSectionID); } static MCSectionWasm *selectWasmSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) { StringRef Group = ""; if (const Comdat *C = getWasmComdat(GO)) { Group = C->getName(); } bool UniqueSectionNames = TM.getUniqueSectionNames(); SmallString<128> Name = getSectionPrefixForGlobal(Kind); if (const auto *F = dyn_cast(GO)) { const auto &OptionalPrefix = F->getSectionPrefix(); if (OptionalPrefix) Name += *OptionalPrefix; } if (EmitUniqueSection && UniqueSectionNames) { Name.push_back('.'); TM.getNameWithPrefix(Name, GO, Mang, true); } unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniqueSection && !UniqueSectionNames) { UniqueID = *NextUniqueID; (*NextUniqueID)++; } return Ctx.getWasmSection(Name, Kind, Group, UniqueID); } MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { if (Kind.isCommon()) report_fatal_error("mergable sections not supported yet on wasm"); // If we have -ffunction-section or -fdata-section then we should emit the // global value to a uniqued section specifically for it. bool EmitUniqueSection = false; if (Kind.isText()) EmitUniqueSection = TM.getFunctionSections(); else EmitUniqueSection = TM.getDataSections(); EmitUniqueSection |= GO->hasComdat(); return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, &NextUniqueID); } bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection( bool UsesLabelDifference, const Function &F) const { // We can always create relative relocations, so use another section // that can be marked non-executable. return false; } const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { // We may only use a PLT-relative relocation to refer to unnamed_addr // functions. if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) return nullptr; // Basic sanity checks. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || RHS->isThreadLocal()) return nullptr; return MCBinaryExpr::createSub( MCSymbolRefExpr::create(TM.getSymbol(LHS), MCSymbolRefExpr::VK_None, getContext()), MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } void TargetLoweringObjectFileWasm::InitializeWasm() { StaticCtorSection = getContext().getWasmSection(".init_array", SectionKind::getData()); } MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return Priority == UINT16_MAX ? StaticCtorSection : getContext().getWasmSection(".init_array." + utostr(Priority), SectionKind::getData()); } MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { llvm_unreachable("@llvm.global_dtors should have been lowered already"); return nullptr; } Index: vendor/llvm/dist-release_70/lib/MC/MCExpr.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/MC/MCExpr.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/MC/MCExpr.cpp (revision 341365) @@ -1,892 +1,897 @@ //===- MCExpr.cpp - Assembly Level Expression Implementation --------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; #define DEBUG_TYPE "mcexpr" namespace { namespace stats { STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); } // end namespace stats } // end anonymous namespace void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { switch (getKind()) { case MCExpr::Target: return cast(this)->printImpl(OS, MAI); case MCExpr::Constant: OS << cast(*this).getValue(); return; case MCExpr::SymbolRef: { const MCSymbolRefExpr &SRE = cast(*this); const MCSymbol &Sym = SRE.getSymbol(); // Parenthesize names that start with $ so that they don't look like // absolute names. bool UseParens = !InParens && !Sym.getName().empty() && Sym.getName()[0] == '$'; if (UseParens) { OS << '('; Sym.print(OS, MAI); OS << ')'; } else Sym.print(OS, MAI); if (SRE.getKind() != MCSymbolRefExpr::VK_None) SRE.printVariantKind(OS); return; } case MCExpr::Unary: { const MCUnaryExpr &UE = cast(*this); switch (UE.getOpcode()) { case MCUnaryExpr::LNot: OS << '!'; break; case MCUnaryExpr::Minus: OS << '-'; break; case MCUnaryExpr::Not: OS << '~'; break; case MCUnaryExpr::Plus: OS << '+'; break; } bool Binary = UE.getSubExpr()->getKind() == MCExpr::Binary; if (Binary) OS << "("; UE.getSubExpr()->print(OS, MAI); if (Binary) OS << ")"; return; } case MCExpr::Binary: { const MCBinaryExpr &BE = cast(*this); // Only print parens around the LHS if it is non-trivial. if (isa(BE.getLHS()) || isa(BE.getLHS())) { BE.getLHS()->print(OS, MAI); } else { OS << '('; BE.getLHS()->print(OS, MAI); OS << ')'; } switch (BE.getOpcode()) { case MCBinaryExpr::Add: // Print "X-42" instead of "X+-42". if (const MCConstantExpr *RHSC = dyn_cast(BE.getRHS())) { if (RHSC->getValue() < 0) { OS << RHSC->getValue(); return; } } OS << '+'; break; case MCBinaryExpr::AShr: OS << ">>"; break; case MCBinaryExpr::And: OS << '&'; break; case MCBinaryExpr::Div: OS << '/'; break; case MCBinaryExpr::EQ: OS << "=="; break; case MCBinaryExpr::GT: OS << '>'; break; case MCBinaryExpr::GTE: OS << ">="; break; case MCBinaryExpr::LAnd: OS << "&&"; break; case MCBinaryExpr::LOr: OS << "||"; break; case MCBinaryExpr::LShr: OS << ">>"; break; case MCBinaryExpr::LT: OS << '<'; break; case MCBinaryExpr::LTE: OS << "<="; break; case MCBinaryExpr::Mod: OS << '%'; break; case MCBinaryExpr::Mul: OS << '*'; break; case MCBinaryExpr::NE: OS << "!="; break; case MCBinaryExpr::Or: OS << '|'; break; case MCBinaryExpr::Shl: OS << "<<"; break; case MCBinaryExpr::Sub: OS << '-'; break; case MCBinaryExpr::Xor: OS << '^'; break; } // Only print parens around the LHS if it is non-trivial. if (isa(BE.getRHS()) || isa(BE.getRHS())) { BE.getRHS()->print(OS, MAI); } else { OS << '('; BE.getRHS()->print(OS, MAI); OS << ')'; } return; } } llvm_unreachable("Invalid expression kind!"); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MCExpr::dump() const { dbgs() << *this; dbgs() << '\n'; } #endif /* *** */ const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc) { return new (Ctx) MCBinaryExpr(Opc, LHS, RHS, Loc); } const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr, MCContext &Ctx, SMLoc Loc) { return new (Ctx) MCUnaryExpr(Opc, Expr, Loc); } const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) { return new (Ctx) MCConstantExpr(Value); } /* *** */ MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind, const MCAsmInfo *MAI, SMLoc Loc) : MCExpr(MCExpr::SymbolRef, Loc), Kind(Kind), UseParensForSymbolVariant(MAI->useParensForSymbolVariant()), HasSubsectionsViaSymbols(MAI->hasSubsectionsViaSymbols()), Symbol(Symbol) { assert(Symbol); } const MCSymbolRefExpr *MCSymbolRefExpr::create(const MCSymbol *Sym, VariantKind Kind, MCContext &Ctx, SMLoc Loc) { return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo(), Loc); } const MCSymbolRefExpr *MCSymbolRefExpr::create(StringRef Name, VariantKind Kind, MCContext &Ctx) { return create(Ctx.getOrCreateSymbol(Name), Kind, Ctx); } StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { switch (Kind) { case VK_Invalid: return "<>"; case VK_None: return "<>"; case VK_DTPOFF: return "DTPOFF"; case VK_DTPREL: return "DTPREL"; case VK_GOT: return "GOT"; case VK_GOTOFF: return "GOTOFF"; case VK_GOTREL: return "GOTREL"; case VK_GOTPCREL: return "GOTPCREL"; case VK_GOTTPOFF: return "GOTTPOFF"; case VK_INDNTPOFF: return "INDNTPOFF"; case VK_NTPOFF: return "NTPOFF"; case VK_GOTNTPOFF: return "GOTNTPOFF"; case VK_PLT: return "PLT"; case VK_TLSGD: return "TLSGD"; case VK_TLSLD: return "TLSLD"; case VK_TLSLDM: return "TLSLDM"; case VK_TPOFF: return "TPOFF"; case VK_TPREL: return "TPREL"; case VK_TLSCALL: return "tlscall"; case VK_TLSDESC: return "tlsdesc"; case VK_TLVP: return "TLVP"; case VK_TLVPPAGE: return "TLVPPAGE"; case VK_TLVPPAGEOFF: return "TLVPPAGEOFF"; case VK_PAGE: return "PAGE"; case VK_PAGEOFF: return "PAGEOFF"; case VK_GOTPAGE: return "GOTPAGE"; case VK_GOTPAGEOFF: return "GOTPAGEOFF"; case VK_SECREL: return "SECREL32"; case VK_SIZE: return "SIZE"; case VK_WEAKREF: return "WEAKREF"; case VK_X86_ABS8: return "ABS8"; case VK_ARM_NONE: return "none"; case VK_ARM_GOT_PREL: return "GOT_PREL"; case VK_ARM_TARGET1: return "target1"; case VK_ARM_TARGET2: return "target2"; case VK_ARM_PREL31: return "prel31"; case VK_ARM_SBREL: return "sbrel"; case VK_ARM_TLSLDO: return "tlsldo"; case VK_ARM_TLSDESCSEQ: return "tlsdescseq"; case VK_AVR_NONE: return "none"; case VK_AVR_LO8: return "lo8"; case VK_AVR_HI8: return "hi8"; case VK_AVR_HLO8: return "hlo8"; case VK_AVR_DIFF8: return "diff8"; case VK_AVR_DIFF16: return "diff16"; case VK_AVR_DIFF32: return "diff32"; case VK_PPC_LO: return "l"; case VK_PPC_HI: return "h"; case VK_PPC_HA: return "ha"; case VK_PPC_HIGH: return "high"; case VK_PPC_HIGHA: return "higha"; case VK_PPC_HIGHER: return "higher"; case VK_PPC_HIGHERA: return "highera"; case VK_PPC_HIGHEST: return "highest"; case VK_PPC_HIGHESTA: return "highesta"; case VK_PPC_GOT_LO: return "got@l"; case VK_PPC_GOT_HI: return "got@h"; case VK_PPC_GOT_HA: return "got@ha"; case VK_PPC_TOCBASE: return "tocbase"; case VK_PPC_TOC: return "toc"; case VK_PPC_TOC_LO: return "toc@l"; case VK_PPC_TOC_HI: return "toc@h"; case VK_PPC_TOC_HA: return "toc@ha"; case VK_PPC_DTPMOD: return "dtpmod"; case VK_PPC_TPREL_LO: return "tprel@l"; case VK_PPC_TPREL_HI: return "tprel@h"; case VK_PPC_TPREL_HA: return "tprel@ha"; case VK_PPC_TPREL_HIGH: return "tprel@high"; case VK_PPC_TPREL_HIGHA: return "tprel@higha"; case VK_PPC_TPREL_HIGHER: return "tprel@higher"; case VK_PPC_TPREL_HIGHERA: return "tprel@highera"; case VK_PPC_TPREL_HIGHEST: return "tprel@highest"; case VK_PPC_TPREL_HIGHESTA: return "tprel@highesta"; case VK_PPC_DTPREL_LO: return "dtprel@l"; case VK_PPC_DTPREL_HI: return "dtprel@h"; case VK_PPC_DTPREL_HA: return "dtprel@ha"; case VK_PPC_DTPREL_HIGH: return "dtprel@high"; case VK_PPC_DTPREL_HIGHA: return "dtprel@higha"; case VK_PPC_DTPREL_HIGHER: return "dtprel@higher"; case VK_PPC_DTPREL_HIGHERA: return "dtprel@highera"; case VK_PPC_DTPREL_HIGHEST: return "dtprel@highest"; case VK_PPC_DTPREL_HIGHESTA: return "dtprel@highesta"; case VK_PPC_GOT_TPREL: return "got@tprel"; case VK_PPC_GOT_TPREL_LO: return "got@tprel@l"; case VK_PPC_GOT_TPREL_HI: return "got@tprel@h"; case VK_PPC_GOT_TPREL_HA: return "got@tprel@ha"; case VK_PPC_GOT_DTPREL: return "got@dtprel"; case VK_PPC_GOT_DTPREL_LO: return "got@dtprel@l"; case VK_PPC_GOT_DTPREL_HI: return "got@dtprel@h"; case VK_PPC_GOT_DTPREL_HA: return "got@dtprel@ha"; case VK_PPC_TLS: return "tls"; case VK_PPC_GOT_TLSGD: return "got@tlsgd"; case VK_PPC_GOT_TLSGD_LO: return "got@tlsgd@l"; case VK_PPC_GOT_TLSGD_HI: return "got@tlsgd@h"; case VK_PPC_GOT_TLSGD_HA: return "got@tlsgd@ha"; case VK_PPC_TLSGD: return "tlsgd"; case VK_PPC_GOT_TLSLD: return "got@tlsld"; case VK_PPC_GOT_TLSLD_LO: return "got@tlsld@l"; case VK_PPC_GOT_TLSLD_HI: return "got@tlsld@h"; case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha"; case VK_PPC_TLSLD: return "tlsld"; case VK_PPC_LOCAL: return "local"; case VK_COFF_IMGREL32: return "IMGREL"; case VK_Hexagon_PCREL: return "PCREL"; case VK_Hexagon_LO16: return "LO16"; case VK_Hexagon_HI16: return "HI16"; case VK_Hexagon_GPREL: return "GPREL"; case VK_Hexagon_GD_GOT: return "GDGOT"; case VK_Hexagon_LD_GOT: return "LDGOT"; case VK_Hexagon_GD_PLT: return "GDPLT"; case VK_Hexagon_LD_PLT: return "LDPLT"; case VK_Hexagon_IE: return "IE"; case VK_Hexagon_IE_GOT: return "IEGOT"; case VK_WebAssembly_FUNCTION: return "FUNCTION"; case VK_WebAssembly_TYPEINDEX: return "TYPEINDEX"; case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo"; case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi"; case VK_AMDGPU_REL32_LO: return "rel32@lo"; case VK_AMDGPU_REL32_HI: return "rel32@hi"; case VK_AMDGPU_REL64: return "rel64"; } llvm_unreachable("Invalid variant kind"); } MCSymbolRefExpr::VariantKind MCSymbolRefExpr::getVariantKindForName(StringRef Name) { return StringSwitch(Name.lower()) .Case("dtprel", VK_DTPREL) .Case("dtpoff", VK_DTPOFF) .Case("got", VK_GOT) .Case("gotoff", VK_GOTOFF) .Case("gotrel", VK_GOTREL) .Case("gotpcrel", VK_GOTPCREL) .Case("gottpoff", VK_GOTTPOFF) .Case("indntpoff", VK_INDNTPOFF) .Case("ntpoff", VK_NTPOFF) .Case("gotntpoff", VK_GOTNTPOFF) .Case("plt", VK_PLT) .Case("tlscall", VK_TLSCALL) .Case("tlsdesc", VK_TLSDESC) .Case("tlsgd", VK_TLSGD) .Case("tlsld", VK_TLSLD) .Case("tlsldm", VK_TLSLDM) .Case("tpoff", VK_TPOFF) .Case("tprel", VK_TPREL) .Case("tlvp", VK_TLVP) .Case("tlvppage", VK_TLVPPAGE) .Case("tlvppageoff", VK_TLVPPAGEOFF) .Case("page", VK_PAGE) .Case("pageoff", VK_PAGEOFF) .Case("gotpage", VK_GOTPAGE) .Case("gotpageoff", VK_GOTPAGEOFF) .Case("imgrel", VK_COFF_IMGREL32) .Case("secrel32", VK_SECREL) .Case("size", VK_SIZE) .Case("abs8", VK_X86_ABS8) .Case("l", VK_PPC_LO) .Case("h", VK_PPC_HI) .Case("ha", VK_PPC_HA) .Case("high", VK_PPC_HIGH) .Case("higha", VK_PPC_HIGHA) .Case("higher", VK_PPC_HIGHER) .Case("highera", VK_PPC_HIGHERA) .Case("highest", VK_PPC_HIGHEST) .Case("highesta", VK_PPC_HIGHESTA) .Case("got@l", VK_PPC_GOT_LO) .Case("got@h", VK_PPC_GOT_HI) .Case("got@ha", VK_PPC_GOT_HA) .Case("local", VK_PPC_LOCAL) .Case("tocbase", VK_PPC_TOCBASE) .Case("toc", VK_PPC_TOC) .Case("toc@l", VK_PPC_TOC_LO) .Case("toc@h", VK_PPC_TOC_HI) .Case("toc@ha", VK_PPC_TOC_HA) .Case("tls", VK_PPC_TLS) .Case("dtpmod", VK_PPC_DTPMOD) .Case("tprel@l", VK_PPC_TPREL_LO) .Case("tprel@h", VK_PPC_TPREL_HI) .Case("tprel@ha", VK_PPC_TPREL_HA) .Case("tprel@high", VK_PPC_TPREL_HIGH) .Case("tprel@higha", VK_PPC_TPREL_HIGHA) .Case("tprel@higher", VK_PPC_TPREL_HIGHER) .Case("tprel@highera", VK_PPC_TPREL_HIGHERA) .Case("tprel@highest", VK_PPC_TPREL_HIGHEST) .Case("tprel@highesta", VK_PPC_TPREL_HIGHESTA) .Case("dtprel@l", VK_PPC_DTPREL_LO) .Case("dtprel@h", VK_PPC_DTPREL_HI) .Case("dtprel@ha", VK_PPC_DTPREL_HA) .Case("dtprel@high", VK_PPC_DTPREL_HIGH) .Case("dtprel@higha", VK_PPC_DTPREL_HIGHA) .Case("dtprel@higher", VK_PPC_DTPREL_HIGHER) .Case("dtprel@highera", VK_PPC_DTPREL_HIGHERA) .Case("dtprel@highest", VK_PPC_DTPREL_HIGHEST) .Case("dtprel@highesta", VK_PPC_DTPREL_HIGHESTA) .Case("got@tprel", VK_PPC_GOT_TPREL) .Case("got@tprel@l", VK_PPC_GOT_TPREL_LO) .Case("got@tprel@h", VK_PPC_GOT_TPREL_HI) .Case("got@tprel@ha", VK_PPC_GOT_TPREL_HA) .Case("got@dtprel", VK_PPC_GOT_DTPREL) .Case("got@dtprel@l", VK_PPC_GOT_DTPREL_LO) .Case("got@dtprel@h", VK_PPC_GOT_DTPREL_HI) .Case("got@dtprel@ha", VK_PPC_GOT_DTPREL_HA) .Case("got@tlsgd", VK_PPC_GOT_TLSGD) .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD_LO) .Case("got@tlsgd@h", VK_PPC_GOT_TLSGD_HI) .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD_HA) .Case("got@tlsld", VK_PPC_GOT_TLSLD) .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO) .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) .Case("gdgot", VK_Hexagon_GD_GOT) .Case("gdplt", VK_Hexagon_GD_PLT) .Case("iegot", VK_Hexagon_IE_GOT) .Case("ie", VK_Hexagon_IE) .Case("ldgot", VK_Hexagon_LD_GOT) .Case("ldplt", VK_Hexagon_LD_PLT) .Case("pcrel", VK_Hexagon_PCREL) .Case("none", VK_ARM_NONE) .Case("got_prel", VK_ARM_GOT_PREL) .Case("target1", VK_ARM_TARGET1) .Case("target2", VK_ARM_TARGET2) .Case("prel31", VK_ARM_PREL31) .Case("sbrel", VK_ARM_SBREL) .Case("tlsldo", VK_ARM_TLSLDO) .Case("lo8", VK_AVR_LO8) .Case("hi8", VK_AVR_HI8) .Case("hlo8", VK_AVR_HLO8) .Case("function", VK_WebAssembly_FUNCTION) .Case("typeindex", VK_WebAssembly_TYPEINDEX) .Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO) .Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI) .Case("rel32@lo", VK_AMDGPU_REL32_LO) .Case("rel32@hi", VK_AMDGPU_REL32_HI) .Case("rel64", VK_AMDGPU_REL64) .Default(VK_Invalid); } void MCSymbolRefExpr::printVariantKind(raw_ostream &OS) const { if (UseParensForSymbolVariant) OS << '(' << MCSymbolRefExpr::getVariantKindName(getKind()) << ')'; else OS << '@' << MCSymbolRefExpr::getVariantKindName(getKind()); } /* *** */ void MCTargetExpr::anchor() {} /* *** */ bool MCExpr::evaluateAsAbsolute(int64_t &Res) const { return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const { return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout, const SectionAddrMap &Addrs) const { return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm) const { return evaluateAsAbsolute(Res, Asm, nullptr, nullptr); } bool MCExpr::evaluateKnownAbsolute(int64_t &Res, const MCAsmLayout &Layout) const { return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, true); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs) const { // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us // absolutize differences across sections and that is what the MachO writer // uses Addrs for. return evaluateAsAbsolute(Res, Asm, Layout, Addrs, Addrs); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet) const { MCValue Value; // Fast path constants. if (const MCConstantExpr *CE = dyn_cast(this)) { Res = CE->getValue(); return true; } bool IsRelocatable = evaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet); // Record the current value. Res = Value.getConstant(); return IsRelocatable && Value.isAbsolute(); } /// Helper method for \see EvaluateSymbolAdd(). static void AttemptToFoldSymbolOffsetDifference( const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet, const MCSymbolRefExpr *&A, const MCSymbolRefExpr *&B, int64_t &Addend) { if (!A || !B) return; const MCSymbol &SA = A->getSymbol(); const MCSymbol &SB = B->getSymbol(); if (SA.isUndefined() || SB.isUndefined()) return; if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet)) return; if (SA.getFragment() == SB.getFragment() && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() && !SB.isUnset()) { Addend += (SA.getOffset() - SB.getOffset()); // Pointers to Thumb symbols need to have their low-bit set to allow // for interworking. if (Asm->isThumbFunc(&SA)) Addend |= 1; + // If symbol is labeled as micromips, we set low-bit to ensure + // correct offset in .gcc_except_table + if (Asm->getBackend().isMicroMips(&SA)) + Addend |= 1; + // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = nullptr; return; } if (!Layout) return; const MCSection &SecA = *SA.getFragment()->getParent(); const MCSection &SecB = *SB.getFragment()->getParent(); if ((&SecA != &SecB) && !Addrs) return; // Eagerly evaluate. Addend += Layout->getSymbolOffset(A->getSymbol()) - Layout->getSymbolOffset(B->getSymbol()); if (Addrs && (&SecA != &SecB)) Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB)); // Pointers to Thumb symbols need to have their low-bit set to allow // for interworking. if (Asm->isThumbFunc(&SA)) Addend |= 1; // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = nullptr; } /// Evaluate the result of an add between (conceptually) two MCValues. /// /// This routine conceptually attempts to construct an MCValue: /// Result = (Result_A - Result_B + Result_Cst) /// from two MCValue's LHS and RHS where /// Result = LHS + RHS /// and /// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst). /// /// This routine attempts to aggresively fold the operands such that the result /// is representable in an MCValue, but may not always succeed. /// /// \returns True on success, false if the result is not representable in an /// MCValue. /// NOTE: It is really important to have both the Asm and Layout arguments. /// They might look redundant, but this function can be used before layout /// is done (see the object streamer for example) and having the Asm argument /// lets us avoid relaxations early. static bool EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet, const MCValue &LHS, const MCSymbolRefExpr *RHS_A, const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst, MCValue &Res) { // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy // about dealing with modifiers. This will ultimately bite us, one day. const MCSymbolRefExpr *LHS_A = LHS.getSymA(); const MCSymbolRefExpr *LHS_B = LHS.getSymB(); int64_t LHS_Cst = LHS.getConstant(); // Fold the result constant immediately. int64_t Result_Cst = LHS_Cst + RHS_Cst; assert((!Layout || Asm) && "Must have an assembler object if layout is given!"); // If we have a layout, we can fold resolved differences. Do not do this if // the backend requires this to be emitted as individual relocations, unless // the InSet flag is set to get the current difference anyway (used for // example to calculate symbol sizes). if (Asm && (InSet || !Asm->getBackend().requiresDiffExpressionRelocations())) { // First, fold out any differences which are fully resolved. By // reassociating terms in // Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst). // we have the four possible differences: // (LHS_A - LHS_B), // (LHS_A - RHS_B), // (RHS_A - LHS_B), // (RHS_A - RHS_B). // Since we are attempting to be as aggressive as possible about folding, we // attempt to evaluate each possible alternative. AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B, Result_Cst); AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B, Result_Cst); AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B, Result_Cst); AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B, Result_Cst); } // We can't represent the addition or subtraction of two symbols. if ((LHS_A && RHS_A) || (LHS_B && RHS_B)) return false; // At this point, we have at most one additive symbol and one subtractive // symbol -- find them. const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A; const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B; Res = MCValue::get(A, B, Result_Cst); return true; } bool MCExpr::evaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr; return evaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr, false); } bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const { MCAssembler *Assembler = &Layout.getAssembler(); return evaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr, true); } static bool canExpand(const MCSymbol &Sym, bool InSet) { const MCExpr *Expr = Sym.getVariableValue(); const auto *Inner = dyn_cast(Expr); if (Inner) { if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) return false; } if (InSet) return true; return !Sym.isInSection(); } bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const MCFixup *Fixup, const SectionAddrMap *Addrs, bool InSet) const { ++stats::MCExprEvaluate; switch (getKind()) { case Target: return cast(this)->evaluateAsRelocatableImpl(Res, Layout, Fixup); case Constant: Res = MCValue::get(cast(this)->getValue()); return true; case SymbolRef: { const MCSymbolRefExpr *SRE = cast(this); const MCSymbol &Sym = SRE->getSymbol(); // Evaluate recursively if this is a variable. if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None && canExpand(Sym, InSet)) { bool IsMachO = SRE->hasSubsectionsViaSymbols(); if (Sym.getVariableValue()->evaluateAsRelocatableImpl( Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) { if (!IsMachO) return true; const MCSymbolRefExpr *A = Res.getSymA(); const MCSymbolRefExpr *B = Res.getSymB(); // FIXME: This is small hack. Given // a = b + 4 // .long a // the OS X assembler will completely drop the 4. We should probably // include it in the relocation or produce an error if that is not // possible. // Allow constant expressions. if (!A && !B) return true; // Allows aliases with zero offset. if (Res.getConstant() == 0 && (!A || !B)) return true; } } Res = MCValue::get(SRE, nullptr, 0); return true; } case Unary: { const MCUnaryExpr *AUE = cast(this); MCValue Value; if (!AUE->getSubExpr()->evaluateAsRelocatableImpl(Value, Asm, Layout, Fixup, Addrs, InSet)) return false; switch (AUE->getOpcode()) { case MCUnaryExpr::LNot: if (!Value.isAbsolute()) return false; Res = MCValue::get(!Value.getConstant()); break; case MCUnaryExpr::Minus: /// -(a - b + const) ==> (b - a - const) if (Value.getSymA() && !Value.getSymB()) return false; // The cast avoids undefined behavior if the constant is INT64_MIN. Res = MCValue::get(Value.getSymB(), Value.getSymA(), -(uint64_t)Value.getConstant()); break; case MCUnaryExpr::Not: if (!Value.isAbsolute()) return false; Res = MCValue::get(~Value.getConstant()); break; case MCUnaryExpr::Plus: Res = Value; break; } return true; } case Binary: { const MCBinaryExpr *ABE = cast(this); MCValue LHSValue, RHSValue; if (!ABE->getLHS()->evaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup, Addrs, InSet) || !ABE->getRHS()->evaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup, Addrs, InSet)) { // Check if both are Target Expressions, see if we can compare them. if (const MCTargetExpr *L = dyn_cast(ABE->getLHS())) if (const MCTargetExpr *R = cast(ABE->getRHS())) { switch (ABE->getOpcode()) { case MCBinaryExpr::EQ: Res = MCValue::get((L->isEqualTo(R)) ? -1 : 0); return true; case MCBinaryExpr::NE: Res = MCValue::get((R->isEqualTo(R)) ? 0 : -1); return true; default: {} } } return false; } // We only support a few operations on non-constant expressions, handle // those first. if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) { switch (ABE->getOpcode()) { default: return false; case MCBinaryExpr::Sub: // Negate RHS and add. // The cast avoids undefined behavior if the constant is INT64_MIN. return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue, RHSValue.getSymB(), RHSValue.getSymA(), -(uint64_t)RHSValue.getConstant(), Res); case MCBinaryExpr::Add: return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue, RHSValue.getSymA(), RHSValue.getSymB(), RHSValue.getConstant(), Res); } } // FIXME: We need target hooks for the evaluation. It may be limited in // width, and gas defines the result of comparisons differently from // Apple as. int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant(); int64_t Result = 0; auto Op = ABE->getOpcode(); switch (Op) { case MCBinaryExpr::AShr: Result = LHS >> RHS; break; case MCBinaryExpr::Add: Result = LHS + RHS; break; case MCBinaryExpr::And: Result = LHS & RHS; break; case MCBinaryExpr::Div: case MCBinaryExpr::Mod: // Handle division by zero. gas just emits a warning and keeps going, // we try to be stricter. // FIXME: Currently the caller of this function has no way to understand // we're bailing out because of 'division by zero'. Therefore, it will // emit a 'expected relocatable expression' error. It would be nice to // change this code to emit a better diagnostic. if (RHS == 0) return false; if (ABE->getOpcode() == MCBinaryExpr::Div) Result = LHS / RHS; else Result = LHS % RHS; break; case MCBinaryExpr::EQ: Result = LHS == RHS; break; case MCBinaryExpr::GT: Result = LHS > RHS; break; case MCBinaryExpr::GTE: Result = LHS >= RHS; break; case MCBinaryExpr::LAnd: Result = LHS && RHS; break; case MCBinaryExpr::LOr: Result = LHS || RHS; break; case MCBinaryExpr::LShr: Result = uint64_t(LHS) >> uint64_t(RHS); break; case MCBinaryExpr::LT: Result = LHS < RHS; break; case MCBinaryExpr::LTE: Result = LHS <= RHS; break; case MCBinaryExpr::Mul: Result = LHS * RHS; break; case MCBinaryExpr::NE: Result = LHS != RHS; break; case MCBinaryExpr::Or: Result = LHS | RHS; break; case MCBinaryExpr::Shl: Result = uint64_t(LHS) << uint64_t(RHS); break; case MCBinaryExpr::Sub: Result = LHS - RHS; break; case MCBinaryExpr::Xor: Result = LHS ^ RHS; break; } switch (Op) { default: Res = MCValue::get(Result); break; case MCBinaryExpr::EQ: case MCBinaryExpr::GT: case MCBinaryExpr::GTE: case MCBinaryExpr::LT: case MCBinaryExpr::LTE: case MCBinaryExpr::NE: // A comparison operator returns a -1 if true and 0 if false. Res = MCValue::get(Result ? -1 : 0); break; } return true; } } llvm_unreachable("Invalid assembly expression kind!"); } MCFragment *MCExpr::findAssociatedFragment() const { switch (getKind()) { case Target: // We never look through target specific expressions. return cast(this)->findAssociatedFragment(); case Constant: return MCSymbol::AbsolutePseudoFragment; case SymbolRef: { const MCSymbolRefExpr *SRE = cast(this); const MCSymbol &Sym = SRE->getSymbol(); return Sym.getFragment(); } case Unary: return cast(this)->getSubExpr()->findAssociatedFragment(); case Binary: { const MCBinaryExpr *BE = cast(this); MCFragment *LHS_F = BE->getLHS()->findAssociatedFragment(); MCFragment *RHS_F = BE->getRHS()->findAssociatedFragment(); // If either is absolute, return the other. if (LHS_F == MCSymbol::AbsolutePseudoFragment) return RHS_F; if (RHS_F == MCSymbol::AbsolutePseudoFragment) return LHS_F; // Not always correct, but probably the best we can do without more context. if (BE->getOpcode() == MCBinaryExpr::Sub) return MCSymbol::AbsolutePseudoFragment; // Otherwise, return the first non-null fragment. return LHS_F ? LHS_F : RHS_F; } } llvm_unreachable("Invalid assembly expression kind!"); } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp (revision 341365) @@ -1,577 +1,586 @@ //===-- MipsAsmBackend.cpp - Mips Asm Backend ----------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the MipsAsmBackend class. // //===----------------------------------------------------------------------===// // #include "MCTargetDesc/MipsAsmBackend.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; // Prepare value for the target space for it static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, MCContext &Ctx) { unsigned Kind = Fixup.getKind(); // Add/subtract and shift switch (Kind) { default: return 0; case FK_Data_2: case Mips::fixup_Mips_LO16: case Mips::fixup_Mips_GPREL16: case Mips::fixup_Mips_GPOFF_HI: case Mips::fixup_Mips_GPOFF_LO: case Mips::fixup_Mips_GOT_PAGE: case Mips::fixup_Mips_GOT_OFST: case Mips::fixup_Mips_GOT_DISP: case Mips::fixup_Mips_GOT_LO16: case Mips::fixup_Mips_CALL_LO16: case Mips::fixup_MICROMIPS_GPOFF_HI: case Mips::fixup_MICROMIPS_GPOFF_LO: case Mips::fixup_MICROMIPS_LO16: case Mips::fixup_MICROMIPS_GOT_PAGE: case Mips::fixup_MICROMIPS_GOT_OFST: case Mips::fixup_MICROMIPS_GOT_DISP: case Mips::fixup_MIPS_PCLO16: Value &= 0xffff; break; case FK_DTPRel_4: case FK_DTPRel_8: case FK_TPRel_4: case FK_TPRel_8: case FK_GPRel_4: case FK_Data_4: case FK_Data_8: case Mips::fixup_Mips_SUB: case Mips::fixup_MICROMIPS_SUB: break; case Mips::fixup_Mips_PC16: // The displacement is then divided by 4 to give us an 18 bit // address range. Forcing a signed division because Value can be negative. Value = (int64_t)Value / 4; // We now check if Value can be encoded as a 16-bit signed immediate. if (!isInt<16>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC16 fixup"); return 0; } break; case Mips::fixup_MIPS_PC19_S2: case Mips::fixup_MICROMIPS_PC19_S2: // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 4; // We now check if Value can be encoded as a 19-bit signed immediate. if (!isInt<19>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC19 fixup"); return 0; } break; case Mips::fixup_Mips_26: // So far we are only using this type for jumps. // The displacement is then divided by 4 to give us an 28 bit // address range. Value >>= 2; break; case Mips::fixup_Mips_HI16: case Mips::fixup_Mips_GOT: case Mips::fixup_MICROMIPS_GOT16: case Mips::fixup_Mips_GOT_HI16: case Mips::fixup_Mips_CALL_HI16: case Mips::fixup_MICROMIPS_HI16: case Mips::fixup_MIPS_PCHI16: // Get the 2nd 16-bits. Also add 1 if bit 15 is 1. Value = ((Value + 0x8000) >> 16) & 0xffff; break; case Mips::fixup_Mips_HIGHER: case Mips::fixup_MICROMIPS_HIGHER: // Get the 3rd 16-bits. Value = ((Value + 0x80008000LL) >> 32) & 0xffff; break; case Mips::fixup_Mips_HIGHEST: case Mips::fixup_MICROMIPS_HIGHEST: // Get the 4th 16-bits. Value = ((Value + 0x800080008000LL) >> 48) & 0xffff; break; case Mips::fixup_MICROMIPS_26_S1: Value >>= 1; break; case Mips::fixup_MICROMIPS_PC7_S1: Value -= 4; // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 2; // We now check if Value can be encoded as a 7-bit signed immediate. if (!isInt<7>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC7 fixup"); return 0; } break; case Mips::fixup_MICROMIPS_PC10_S1: Value -= 2; // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 2; // We now check if Value can be encoded as a 10-bit signed immediate. if (!isInt<10>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC10 fixup"); return 0; } break; case Mips::fixup_MICROMIPS_PC16_S1: Value -= 4; // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 2; // We now check if Value can be encoded as a 16-bit signed immediate. if (!isInt<16>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC16 fixup"); return 0; } break; case Mips::fixup_MIPS_PC18_S3: // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 8; // We now check if Value can be encoded as a 18-bit signed immediate. if (!isInt<18>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup"); return 0; } break; case Mips::fixup_MICROMIPS_PC18_S3: // Check alignment. if ((Value & 7)) { Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup"); } // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 8; // We now check if Value can be encoded as a 18-bit signed immediate. if (!isInt<18>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup"); return 0; } break; case Mips::fixup_MIPS_PC21_S2: // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 4; // We now check if Value can be encoded as a 21-bit signed immediate. if (!isInt<21>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC21 fixup"); return 0; } break; case Mips::fixup_MIPS_PC26_S2: // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 4; // We now check if Value can be encoded as a 26-bit signed immediate. if (!isInt<26>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC26 fixup"); return 0; } break; case Mips::fixup_MICROMIPS_PC26_S1: // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 2; // We now check if Value can be encoded as a 26-bit signed immediate. if (!isInt<26>(Value)) { Ctx.reportFatalError(Fixup.getLoc(), "out of range PC26 fixup"); return 0; } break; case Mips::fixup_MICROMIPS_PC21_S1: // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 2; // We now check if Value can be encoded as a 21-bit signed immediate. if (!isInt<21>(Value)) { Ctx.reportError(Fixup.getLoc(), "out of range PC21 fixup"); return 0; } break; } return Value; } std::unique_ptr MipsAsmBackend::createObjectTargetWriter() const { return createMipsELFObjectWriter(TheTriple, IsN32); } // Little-endian fixup data byte ordering: // mips32r2: a | b | x | x // microMIPS: x | x | a | b static bool needsMMLEByteOrder(unsigned Kind) { return Kind != Mips::fixup_MICROMIPS_PC10_S1 && Kind >= Mips::fixup_MICROMIPS_26_S1 && Kind < Mips::LastTargetFixupKind; } // Calculate index for microMIPS specific little endian byte order static unsigned calculateMMLEIndex(unsigned i) { assert(i <= 3 && "Index out of range!"); return (1 - i / 2) * 2 + i % 2; } /// ApplyFixup - Apply the \p Value for given \p Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const { MCFixupKind Kind = Fixup.getKind(); MCContext &Ctx = Asm.getContext(); Value = adjustFixupValue(Fixup, Value, Ctx); if (!Value) return; // Doesn't change encoding. // Where do we start in the object unsigned Offset = Fixup.getOffset(); // Number of bytes we need to fixup unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; // Used to point to big endian bytes unsigned FullSize; switch ((unsigned)Kind) { case FK_Data_2: case Mips::fixup_Mips_16: case Mips::fixup_MICROMIPS_PC10_S1: FullSize = 2; break; case FK_Data_8: case Mips::fixup_Mips_64: FullSize = 8; break; case FK_Data_4: default: FullSize = 4; break; } // Grab current value, if any, from bits. uint64_t CurVal = 0; bool microMipsLEByteOrder = needsMMLEByteOrder((unsigned) Kind); for (unsigned i = 0; i != NumBytes; ++i) { unsigned Idx = Endian == support::little ? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i) : (FullSize - 1 - i); CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); } uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); CurVal |= Value & Mask; // Write out the fixed up bytes back to the code/data bits. for (unsigned i = 0; i != NumBytes; ++i) { unsigned Idx = Endian == support::little ? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i) : (FullSize - 1 - i); Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff); } } Optional MipsAsmBackend::getFixupKind(StringRef Name) const { return StringSwitch>(Name) .Case("R_MIPS_NONE", (MCFixupKind)Mips::fixup_Mips_NONE) .Case("R_MIPS_32", FK_Data_4) .Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE) .Case("R_MIPS_CALL_HI16", (MCFixupKind)Mips::fixup_Mips_CALL_HI16) .Case("R_MIPS_CALL_LO16", (MCFixupKind)Mips::fixup_Mips_CALL_LO16) .Case("R_MIPS_CALL16", (MCFixupKind)Mips::fixup_Mips_CALL16) .Case("R_MIPS_GOT16", (MCFixupKind)Mips::fixup_Mips_GOT) .Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE) .Case("R_MIPS_GOT_OFST", (MCFixupKind)Mips::fixup_Mips_GOT_OFST) .Case("R_MIPS_GOT_DISP", (MCFixupKind)Mips::fixup_Mips_GOT_DISP) .Case("R_MIPS_GOT_HI16", (MCFixupKind)Mips::fixup_Mips_GOT_HI16) .Case("R_MIPS_GOT_LO16", (MCFixupKind)Mips::fixup_Mips_GOT_LO16) .Case("R_MIPS_TLS_GOTTPREL", (MCFixupKind)Mips::fixup_Mips_GOTTPREL) .Case("R_MIPS_TLS_DTPREL_HI16", (MCFixupKind)Mips::fixup_Mips_DTPREL_HI) .Case("R_MIPS_TLS_DTPREL_LO16", (MCFixupKind)Mips::fixup_Mips_DTPREL_LO) .Case("R_MIPS_TLS_GD", (MCFixupKind)Mips::fixup_Mips_TLSGD) .Case("R_MIPS_TLS_LDM", (MCFixupKind)Mips::fixup_Mips_TLSLDM) .Case("R_MIPS_TLS_TPREL_HI16", (MCFixupKind)Mips::fixup_Mips_TPREL_HI) .Case("R_MIPS_TLS_TPREL_LO16", (MCFixupKind)Mips::fixup_Mips_TPREL_LO) .Case("R_MICROMIPS_CALL16", (MCFixupKind)Mips::fixup_MICROMIPS_CALL16) .Case("R_MICROMIPS_GOT_DISP", (MCFixupKind)Mips::fixup_MICROMIPS_GOT_DISP) .Case("R_MICROMIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_MICROMIPS_GOT_PAGE) .Case("R_MICROMIPS_GOT_OFST", (MCFixupKind)Mips::fixup_MICROMIPS_GOT_OFST) .Case("R_MICROMIPS_GOT16", (MCFixupKind)Mips::fixup_MICROMIPS_GOT16) .Case("R_MICROMIPS_TLS_GOTTPREL", (MCFixupKind)Mips::fixup_MICROMIPS_GOTTPREL) .Case("R_MICROMIPS_TLS_DTPREL_HI16", (MCFixupKind)Mips::fixup_MICROMIPS_TLS_DTPREL_HI16) .Case("R_MICROMIPS_TLS_DTPREL_LO16", (MCFixupKind)Mips::fixup_MICROMIPS_TLS_DTPREL_LO16) .Case("R_MICROMIPS_TLS_GD", (MCFixupKind)Mips::fixup_MICROMIPS_TLS_GD) .Case("R_MICROMIPS_TLS_LDM", (MCFixupKind)Mips::fixup_MICROMIPS_TLS_LDM) .Case("R_MICROMIPS_TLS_TPREL_HI16", (MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_HI16) .Case("R_MICROMIPS_TLS_TPREL_LO16", (MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_LO16) .Default(MCAsmBackend::getFixupKind(Name)); } const MCFixupKindInfo &MipsAsmBackend:: getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo LittleEndianInfos[] = { // This table *must* be in same the order of fixup_* kinds in // MipsFixupKinds.h. // // name offset bits flags { "fixup_Mips_NONE", 0, 0, 0 }, { "fixup_Mips_16", 0, 16, 0 }, { "fixup_Mips_32", 0, 32, 0 }, { "fixup_Mips_REL32", 0, 32, 0 }, { "fixup_Mips_26", 0, 26, 0 }, { "fixup_Mips_HI16", 0, 16, 0 }, { "fixup_Mips_LO16", 0, 16, 0 }, { "fixup_Mips_GPREL16", 0, 16, 0 }, { "fixup_Mips_LITERAL", 0, 16, 0 }, { "fixup_Mips_GOT", 0, 16, 0 }, { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_Mips_CALL16", 0, 16, 0 }, { "fixup_Mips_GPREL32", 0, 32, 0 }, { "fixup_Mips_SHIFT5", 6, 5, 0 }, { "fixup_Mips_SHIFT6", 6, 5, 0 }, { "fixup_Mips_64", 0, 64, 0 }, { "fixup_Mips_TLSGD", 0, 16, 0 }, { "fixup_Mips_GOTTPREL", 0, 16, 0 }, { "fixup_Mips_TPREL_HI", 0, 16, 0 }, { "fixup_Mips_TPREL_LO", 0, 16, 0 }, { "fixup_Mips_TLSLDM", 0, 16, 0 }, { "fixup_Mips_DTPREL_HI", 0, 16, 0 }, { "fixup_Mips_DTPREL_LO", 0, 16, 0 }, { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_Mips_GPOFF_HI", 0, 16, 0 }, { "fixup_MICROMIPS_GPOFF_HI",0, 16, 0 }, { "fixup_Mips_GPOFF_LO", 0, 16, 0 }, { "fixup_MICROMIPS_GPOFF_LO",0, 16, 0 }, { "fixup_Mips_GOT_PAGE", 0, 16, 0 }, { "fixup_Mips_GOT_OFST", 0, 16, 0 }, { "fixup_Mips_GOT_DISP", 0, 16, 0 }, { "fixup_Mips_HIGHER", 0, 16, 0 }, { "fixup_MICROMIPS_HIGHER", 0, 16, 0 }, { "fixup_Mips_HIGHEST", 0, 16, 0 }, { "fixup_MICROMIPS_HIGHEST", 0, 16, 0 }, { "fixup_Mips_GOT_HI16", 0, 16, 0 }, { "fixup_Mips_GOT_LO16", 0, 16, 0 }, { "fixup_Mips_CALL_HI16", 0, 16, 0 }, { "fixup_Mips_CALL_LO16", 0, 16, 0 }, { "fixup_Mips_PC18_S3", 0, 18, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC19_S2", 0, 19, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PCLO16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 0, 26, 0 }, { "fixup_MICROMIPS_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_LO16", 0, 16, 0 }, { "fixup_MICROMIPS_GOT16", 0, 16, 0 }, { "fixup_MICROMIPS_PC7_S1", 0, 7, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC10_S1", 0, 10, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC16_S1", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC26_S1", 0, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC19_S2", 0, 19, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC18_S3", 0, 18, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC21_S1", 0, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_CALL16", 0, 16, 0 }, { "fixup_MICROMIPS_GOT_DISP", 0, 16, 0 }, { "fixup_MICROMIPS_GOT_PAGE", 0, 16, 0 }, { "fixup_MICROMIPS_GOT_OFST", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_GD", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_LDM", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0, 16, 0 }, { "fixup_MICROMIPS_GOTTPREL", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 }, { "fixup_Mips_SUB", 0, 64, 0 }, { "fixup_MICROMIPS_SUB", 0, 64, 0 } }; static_assert(array_lengthof(LittleEndianInfos) == Mips::NumTargetFixupKinds, "Not all MIPS little endian fixup kinds added!"); const static MCFixupKindInfo BigEndianInfos[] = { // This table *must* be in same the order of fixup_* kinds in // MipsFixupKinds.h. // // name offset bits flags { "fixup_Mips_NONE", 0, 0, 0 }, { "fixup_Mips_16", 16, 16, 0 }, { "fixup_Mips_32", 0, 32, 0 }, { "fixup_Mips_REL32", 0, 32, 0 }, { "fixup_Mips_26", 6, 26, 0 }, { "fixup_Mips_HI16", 16, 16, 0 }, { "fixup_Mips_LO16", 16, 16, 0 }, { "fixup_Mips_GPREL16", 16, 16, 0 }, { "fixup_Mips_LITERAL", 16, 16, 0 }, { "fixup_Mips_GOT", 16, 16, 0 }, { "fixup_Mips_PC16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_Mips_CALL16", 16, 16, 0 }, { "fixup_Mips_GPREL32", 0, 32, 0 }, { "fixup_Mips_SHIFT5", 21, 5, 0 }, { "fixup_Mips_SHIFT6", 21, 5, 0 }, { "fixup_Mips_64", 0, 64, 0 }, { "fixup_Mips_TLSGD", 16, 16, 0 }, { "fixup_Mips_GOTTPREL", 16, 16, 0 }, { "fixup_Mips_TPREL_HI", 16, 16, 0 }, { "fixup_Mips_TPREL_LO", 16, 16, 0 }, { "fixup_Mips_TLSLDM", 16, 16, 0 }, { "fixup_Mips_DTPREL_HI", 16, 16, 0 }, { "fixup_Mips_DTPREL_LO", 16, 16, 0 }, { "fixup_Mips_Branch_PCRel",16, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_Mips_GPOFF_HI", 16, 16, 0 }, { "fixup_MICROMIPS_GPOFF_HI", 16, 16, 0 }, { "fixup_Mips_GPOFF_LO", 16, 16, 0 }, { "fixup_MICROMIPS_GPOFF_LO", 16, 16, 0 }, { "fixup_Mips_GOT_PAGE", 16, 16, 0 }, { "fixup_Mips_GOT_OFST", 16, 16, 0 }, { "fixup_Mips_GOT_DISP", 16, 16, 0 }, { "fixup_Mips_HIGHER", 16, 16, 0 }, { "fixup_MICROMIPS_HIGHER", 16, 16, 0 }, { "fixup_Mips_HIGHEST", 16, 16, 0 }, { "fixup_MICROMIPS_HIGHEST",16, 16, 0 }, { "fixup_Mips_GOT_HI16", 16, 16, 0 }, { "fixup_Mips_GOT_LO16", 16, 16, 0 }, { "fixup_Mips_CALL_HI16", 16, 16, 0 }, { "fixup_Mips_CALL_LO16", 16, 16, 0 }, { "fixup_Mips_PC18_S3", 14, 18, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC19_S2", 13, 19, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PCLO16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 6, 26, 0 }, { "fixup_MICROMIPS_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_LO16", 16, 16, 0 }, { "fixup_MICROMIPS_GOT16", 16, 16, 0 }, { "fixup_MICROMIPS_PC7_S1", 9, 7, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC10_S1", 6, 10, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC16_S1",16, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC26_S1", 6, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC19_S2",13, 19, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC18_S3",14, 18, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_PC21_S1",11, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_CALL16", 16, 16, 0 }, { "fixup_MICROMIPS_GOT_DISP", 16, 16, 0 }, { "fixup_MICROMIPS_GOT_PAGE", 16, 16, 0 }, { "fixup_MICROMIPS_GOT_OFST", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_GD", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_LDM", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16, 16, 0 }, { "fixup_MICROMIPS_GOTTPREL", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 }, { "fixup_Mips_SUB", 0, 64, 0 }, { "fixup_MICROMIPS_SUB", 0, 64, 0 } }; static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds, "Not all MIPS big endian fixup kinds added!"); if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); if (Endian == support::little) return LittleEndianInfos[Kind - FirstTargetFixupKind]; return BigEndianInfos[Kind - FirstTargetFixupKind]; } /// WriteNopData - Write an (optimal) nop sequence of Count bytes /// to the given output. If the target cannot generate such a sequence, /// it should return an error. /// /// \return - True on success. bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { // Check for a less than instruction size number of bytes // FIXME: 16 bit instructions are not handled yet here. // We shouldn't be using a hard coded number for instruction size. // If the count is not 4-byte aligned, we must be writing data into the text // section (otherwise we have unaligned instructions, and thus have far // bigger problems), so just write zeros instead. OS.write_zeros(Count); return true; } bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) { const unsigned FixupKind = Fixup.getKind(); switch (FixupKind) { default: return false; // All these relocations require special processing // at linking time. Delegate this work to a linker. case Mips::fixup_Mips_CALL_HI16: case Mips::fixup_Mips_CALL_LO16: case Mips::fixup_Mips_CALL16: case Mips::fixup_Mips_GOT: case Mips::fixup_Mips_GOT_PAGE: case Mips::fixup_Mips_GOT_OFST: case Mips::fixup_Mips_GOT_DISP: case Mips::fixup_Mips_GOT_HI16: case Mips::fixup_Mips_GOT_LO16: case Mips::fixup_Mips_GOTTPREL: case Mips::fixup_Mips_DTPREL_HI: case Mips::fixup_Mips_DTPREL_LO: case Mips::fixup_Mips_TLSGD: case Mips::fixup_Mips_TLSLDM: case Mips::fixup_Mips_TPREL_HI: case Mips::fixup_Mips_TPREL_LO: case Mips::fixup_MICROMIPS_CALL16: case Mips::fixup_MICROMIPS_GOT_DISP: case Mips::fixup_MICROMIPS_GOT_PAGE: case Mips::fixup_MICROMIPS_GOT_OFST: case Mips::fixup_MICROMIPS_GOT16: case Mips::fixup_MICROMIPS_GOTTPREL: case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16: case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16: case Mips::fixup_MICROMIPS_TLS_GD: case Mips::fixup_MICROMIPS_TLS_LDM: case Mips::fixup_MICROMIPS_TLS_TPREL_HI16: case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: return true; } +} + +bool MipsAsmBackend::isMicroMips(const MCSymbol *Sym) const { + if (const auto *ElfSym = dyn_cast(Sym)) { + if (ElfSym->getOther() & ELF::STO_MIPS_MICROMIPS) + return true; + } + return false; } MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), Options.ABIName == "n32"); } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h (revision 341365) @@ -1,97 +1,99 @@ //===-- MipsAsmBackend.h - Mips Asm Backend ------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the MipsAsmBackend class. // //===----------------------------------------------------------------------===// // #ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H #define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H #include "MCTargetDesc/MipsFixupKinds.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmBackend.h" namespace llvm { class MCAssembler; struct MCFixupKindInfo; class MCObjectWriter; class MCRegisterInfo; +class MCSymbolELF; class Target; class MipsAsmBackend : public MCAsmBackend { Triple TheTriple; bool IsN32; public: MipsAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, bool N32) : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big), TheTriple(TT), IsN32(N32) {} std::unique_ptr createObjectTargetWriter() const override; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const override; Optional getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; unsigned getNumFixupKinds() const override { return Mips::NumTargetFixupKinds; } /// @name Target Relaxation Interfaces /// @{ /// MayNeedRelaxation - Check whether the given instruction may need /// relaxation. /// /// \param Inst - The instruction to test. bool mayNeedRelaxation(const MCInst &Inst, const MCSubtargetInfo &STI) const override { return false; } /// fixupNeedsRelaxation - Target specific predicate for whether a given /// fixup requires the associated instruction to be relaxed. bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { // FIXME. llvm_unreachable("RelaxInstruction() unimplemented"); return false; } /// RelaxInstruction - Relax the instruction in the given fragment /// to the next wider instruction. /// /// \param Inst - The instruction to relax, which may be the same /// as the output. /// \param [out] Res On return, the relaxed instruction. void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override {} /// @} bool writeNopData(raw_ostream &OS, uint64_t Count) const override; bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override; + bool isMicroMips(const MCSymbol *Sym) const override; }; // class MipsAsmBackend } // namespace #endif Index: vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp (revision 341365) @@ -1,105 +1,122 @@ //===-------- MipsELFStreamer.cpp - ELF Object Output ---------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "MipsELFStreamer.h" #include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" using namespace llvm; MipsELFStreamer::MipsELFStreamer(MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter) : MCELFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)) { RegInfoRecord = new MipsRegInfoRecord(this, Context); MipsOptionRecords.push_back( std::unique_ptr(RegInfoRecord)); } void MipsELFStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) { MCELFStreamer::EmitInstruction(Inst, STI); MCContext &Context = getContext(); const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo(); for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) { const MCOperand &Op = Inst.getOperand(OpIndex); if (!Op.isReg()) continue; unsigned Reg = Op.getReg(); RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo); } createPendingLabelRelocs(); +} + +void MipsELFStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { + Frame.Begin = getContext().createTempSymbol(); + MCELFStreamer::EmitLabel(Frame.Begin); +} + +MCSymbol *MipsELFStreamer::EmitCFILabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + MCELFStreamer::EmitLabel(Label); + return Label; +} + +void MipsELFStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { + Frame.End = getContext().createTempSymbol(); + MCELFStreamer::EmitLabel(Frame.End); } void MipsELFStreamer::createPendingLabelRelocs() { MipsTargetELFStreamer *ELFTargetStreamer = static_cast(getTargetStreamer()); // FIXME: Also mark labels when in MIPS16 mode. if (ELFTargetStreamer->isMicroMipsEnabled()) { for (auto *L : Labels) { auto *Label = cast(L); getAssembler().registerSymbol(*Label); Label->setOther(ELF::STO_MIPS_MICROMIPS); } } Labels.clear(); } void MipsELFStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { MCELFStreamer::EmitLabel(Symbol); Labels.push_back(Symbol); } void MipsELFStreamer::SwitchSection(MCSection *Section, const MCExpr *Subsection) { MCELFStreamer::SwitchSection(Section, Subsection); Labels.clear(); } void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) { MCELFStreamer::EmitValueImpl(Value, Size, Loc); Labels.clear(); } void MipsELFStreamer::EmitIntValue(uint64_t Value, unsigned Size) { MCELFStreamer::EmitIntValue(Value, Size); Labels.clear(); } void MipsELFStreamer::EmitMipsOptionRecords() { for (const auto &I : MipsOptionRecords) I->EmitMipsOptionRecord(); } MCELFStreamer *llvm::createMipsELFStreamer( MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter, bool RelaxAll) { return new MipsELFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)); } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h (revision 341365) @@ -1,77 +1,84 @@ //===- MipsELFStreamer.h - ELF Object Output --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is a custom MCELFStreamer which allows us to insert some hooks before // emitting data into an actual object file. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H #define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H #include "MipsOptionRecord.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCELFStreamer.h" #include namespace llvm { class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCSubtargetInfo; +struct MCDwarfFrameInfo; class MipsELFStreamer : public MCELFStreamer { SmallVector, 8> MipsOptionRecords; MipsRegInfoRecord *RegInfoRecord; SmallVector Labels; public: MipsELFStreamer(MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter); /// Overriding this function allows us to add arbitrary behaviour before the /// \p Inst is actually emitted. For example, we can inspect the operands and /// gather sufficient information that allows us to reason about the register /// usage for the translation unit. void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool = false) override; /// Overriding this function allows us to record all labels that should be /// marked as microMIPS. Based on this data marking is done in /// EmitInstruction. void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; /// Overriding this function allows us to dismiss all labels that are /// candidates for marking as microMIPS when .section directive is processed. void SwitchSection(MCSection *Section, const MCExpr *Subsection = nullptr) override; /// Overriding these functions allows us to dismiss all labels that are /// candidates for marking as microMIPS when .word/.long/.4byte etc /// directives are emitted. void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; void EmitIntValue(uint64_t Value, unsigned Size) override; + + // Overriding these functions allows us to avoid recording of these labels + // in EmitLabel and later marking them as microMIPS. + void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; + void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; + MCSymbol *EmitCFILabel() override; /// Emits all the option records stored up until the point it's called. void EmitMipsOptionRecords(); /// Mark labels as microMIPS, if necessary for the subtarget. void createPendingLabelRelocs(); }; MCELFStreamer *createMipsELFStreamer(MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter, bool RelaxAll); } // end namespace llvm #endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H Index: vendor/llvm/dist-release_70/lib/Target/Mips/MicroMips32r6InstrInfo.td =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MicroMips32r6InstrInfo.td (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MicroMips32r6InstrInfo.td (revision 341365) @@ -1,1804 +1,1804 @@ //=- MicroMips32r6InstrInfo.td - MicroMips r6 Instruction Information -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes microMIPSr6 instructions. // //===----------------------------------------------------------------------===// def brtarget21_mm : Operand { let EncoderMethod = "getBranchTarget21OpValueMM"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget21MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; } def brtarget26_mm : Operand { let EncoderMethod = "getBranchTarget26OpValueMM"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget26MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; } def brtargetr6 : Operand { let EncoderMethod = "getBranchTargetOpValueMMR6"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTargetMM"; let ParserMatchClass = MipsJumpTargetAsmOperand; } def brtarget_lsl2_mm : Operand { let EncoderMethod = "getBranchTargetOpValueLsl2MMR6"; let OperandType = "OPERAND_PCREL"; // Instructions that use this operand have their decoder method // set with DecodeDisambiguates let DecoderMethod = ""; let ParserMatchClass = MipsJumpTargetAsmOperand; } //===----------------------------------------------------------------------===// // // Instruction Encodings // //===----------------------------------------------------------------------===// class ADD_MMR6_ENC : ARITH_FM_MMR6<"add", 0x110>; class ADDIU_MMR6_ENC : ADDI_FM_MMR6<"addiu", 0xc>; class ADDU_MMR6_ENC : ARITH_FM_MMR6<"addu", 0x150>; class ADDIUPC_MMR6_ENC : PCREL19_FM_MMR6<0b00>; class ALUIPC_MMR6_ENC : PCREL16_FM_MMR6<0b11111>; class AND_MMR6_ENC : ARITH_FM_MMR6<"and", 0x250>; class ANDI_MMR6_ENC : ADDI_FM_MMR6<"andi", 0x34>; class AUIPC_MMR6_ENC : PCREL16_FM_MMR6<0b11110>; class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>; class AUI_MMR6_ENC : AUI_FM_MMR6; class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>; class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>; class BC16_MMR6_ENC : BC16_FM_MM16R6; class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>; class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>; class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>; class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">; class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"beqzc", 0b100000>; class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"bnezc", 0b101000>; class BGEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgec", 0b111101>, DecodeDisambiguates<"POP75GroupBranchMMR6">; class BGEUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgeuc", 0b110000>, DecodeDisambiguates<"BlezGroupBranchMMR6">; class BLTC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltc", 0b110101>, DecodeDisambiguates<"POP65GroupBranchMMR6">; class BLTUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltuc", 0b111000>, DecodeDisambiguates<"BgtzGroupBranchMMR6">; class BEQC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"beqc", 0b011101>; class BNEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bnec", 0b011111>; class BLTZC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bltzc", 0b110101>, DecodeDisambiguates<"POP65GroupBranchMMR6">; class BLEZC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"blezc", 0b111101>, DecodeDisambiguates<"POP75GroupBranchMMR6">; class BGEZC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bgezc", 0b111101>, DecodeDisambiguates<"POP75GroupBranchMMR6">; class BGTZC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bgtzc", 0b110101>, DecodeDisambiguates<"POP65GroupBranchMMR6">; class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"beqzalc", 0b011101>, DecodeDisambiguates<"POP35GroupBranchMMR6">; class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bnezalc", 0b011111>, DecodeDisambiguates<"POP37GroupBranchMMR6">; class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bgtzalc", 0b111000>, MMDecodeDisambiguatedBy<"BgtzGroupBranchMMR6">; class BLTZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bltzalc", 0b111000>, MMDecodeDisambiguatedBy<"BgtzGroupBranchMMR6">; class BGEZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bgezalc", 0b110000>, MMDecodeDisambiguatedBy<"BlezGroupBranchMMR6">; class BLEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"blezalc", 0b110000>, MMDecodeDisambiguatedBy<"BlezGroupBranchMMR6">; class CACHE_MMR6_ENC : CACHE_PREF_FM_MMR6<0b001000, 0b0110>; class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>; class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>; class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>; class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>; class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>; class EI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"ei", 0x15d>; class DI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"di", 0b0100011101>; class ERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0x3cd>; class DERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0b1110001101>; class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">; class GINVI_MMR6_ENC : POOL32A_GINV_FM_MMR6<"ginvi", 0b00>; class GINVT_MMR6_ENC : POOL32A_GINV_FM_MMR6<"ginvt", 0b10>; class JALRC16_MMR6_ENC : POOL16C_JALRC_FM_MM16R6<0xb>; class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>; class JIC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b101000>; class JRC16_MMR6_ENC: POOL16C_JALRC_FM_MM16R6<0x3>; class JRCADDIUSP_MMR6_ENC : POOL16C_JRCADDIUSP_FM_MM16R6<0x13>; class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>; class LWPC_MMR6_ENC : PCREL19_FM_MMR6<0b01>; class LWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0x2>; class MFC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mfc0", 0b00011, 0b111100>; class MFC1_MMR6_ENC : POOL32F_MFTC1_FM_MMR6<"mfc1", 0b10000000>; class MFC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mfc2", 0b0100110100>; class MFHC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mfhc0", 0b00011, 0b110100>; class MFHC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mfhc2", 0b1000110100>; class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>; class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>; class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>; class MUH_MMR6_ENC : ARITH_FM_MMR6<"muh", 0x58>; class MULU_MMR6_ENC : ARITH_FM_MMR6<"mulu", 0x98>; class MUHU_MMR6_ENC : ARITH_FM_MMR6<"muhu", 0xd8>; class MTC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mtc0", 0b01011, 0b111100>; class MTC1_MMR6_ENC : POOL32F_MFTC1_FM_MMR6<"mtc1", 0b10100000>; class MTC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mtc2", 0b0101110100>; class MTHC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mthc0", 0b01011, 0b110100>; class MTHC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mthc2", 0b1001110100>; class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>; class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>; class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>; class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>; class SB16_MMR6_ENC : LOAD_STORE_FM_MM16<0x22>; class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>; class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>; class SH16_MMR6_ENC : LOAD_STORE_FM_MM16<0x2a>; class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>; class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>; class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>; class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>; class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>; class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>; class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>; class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<"wrpgpr", 0x3c5>; class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<"wsbh", 0x1ec>; class LB_MMR6_ENC : LB32_FM_MMR6; class LBU_MMR6_ENC : LBU32_FM_MMR6; class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>; class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6; class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">; class SSNOP_MMR6_ENC : BARRIER_FM_MM<0x1>, MMR6Arch<"ssnop">; class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6; class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">; class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>; class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">; class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>; class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>; class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>; class ABS_D_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.d", 1, 0b0001101>; class FLOOR_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.s", 0, 0b00001100>; class FLOOR_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.d", 1, 0b00001100>; class FLOOR_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.s", 0, 0b00101100>; class FLOOR_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.d", 1, 0b00101100>; class CEIL_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.s", 0, 0b01001100>; class CEIL_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.d", 1, 0b01001100>; class CEIL_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.s", 0, 0b01101100>; class CEIL_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.d", 1, 0b01101100>; class TRUNC_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.s", 0, 0b10001100>; class TRUNC_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.d", 1, 0b10001100>; class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>; class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>; class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>; class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>; class LW_MMR6_ENC : LOAD_WORD_FM_MMR6; class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6; class JALRC_HB_MMR6_ENC : POOL32A_JALRC_FM_MMR6<"jalrc.hb", 0b0001111100>; class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>; class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>; class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0, 0b11001100>; class ROUND_L_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.d", 1, 0b11001100>; class ROUND_W_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.s", 0, 0b11101100>; class ROUND_W_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.d", 1, 0b11101100>; class SEL_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.s", 0, 0b010111000>; class SEL_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.d", 1, 0b010111000>; class SELEQZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.s", 0, 0b000111000>; class SELEQZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.d", 1, 0b000111000>; class SELNEZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selnez.s", 0, 0b001111000>; class SELNEZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selnez.d", 1, 0b001111000>; class CLASS_S_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.s", 0, 0b001100000>; class CLASS_D_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.d", 1, 0b001100000>; class EXT_MMR6_ENC : POOL32A_EXT_INS_FM_MMR6<"ext", 0b101100>; class INS_MMR6_ENC : POOL32A_EXT_INS_FM_MMR6<"ins", 0b001100>; class JALRC_MMR6_ENC : POOL32A_JALRC_FM_MMR6<"jalrc", 0b0000111100>; class BOVC_MMR6_ENC : POP35_BOVC_FM_MMR6<"bovc">; class BNVC_MMR6_ENC : POP37_BNVC_FM_MMR6<"bnvc">; class ADDU16_MMR6_ENC : POOL16A_ADDU16_FM_MMR6; class AND16_MMR6_ENC : POOL16C_AND16_FM_MMR6; class ANDI16_MMR6_ENC : ANDI_FM_MM16<0b001011>; class NOT16_MMR6_ENC : POOL16C_NOT16_FM_MMR6; class OR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1001>; class SLL16_MMR6_ENC : SHIFT_FM_MM16<0>; class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>; class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>; class LI16_MMR6_ENC : LI_FM_MM16; class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>; class MOVEP_MMR6_ENC : POOL16C_MOVEP16_FM_MMR6; class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>; class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6; class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>; class TLBINV_MMR6_ENC : POOL32A_TLBINV_FM_MMR6<"tlbinv", 0x10d>; class TLBINVF_MMR6_ENC : POOL32A_TLBINV_FM_MMR6<"tlbinvf", 0x14d>; class DVP_MMR6_ENC : POOL32A_DVPEVP_FM_MMR6<"dvp", 0b0001100101>; class EVP_MMR6_ENC : POOL32A_DVPEVP_FM_MMR6<"evp", 0b0011100101>; class BC1EQZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc1eqzc", 0b01000>; class BC1NEZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc1nezc", 0b01001>; class BC2EQZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc2eqzc", 0b01010>; class BC2NEZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc2nezc", 0b01011>; class LDC1_MMR6_ENC : LDWC1_SDWC1_FM_MMR6<"ldc1", 0b101111>; class SDC1_MMR6_ENC : LDWC1_SDWC1_FM_MMR6<"sdc1", 0b101110>; class LDC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"ldc2", 0b0010>; class SDC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"sdc2", 0b1010>; class LWC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"lwc2", 0b0000>; class SWC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"swc2", 0b1000>; class LL_MMR6_ENC : POOL32C_LL_E_SC_E_FM_MMR6<"ll", 0b0011, 0b000>; class SC_MMR6_ENC : POOL32C_LL_E_SC_E_FM_MMR6<"sc", 0b1011, 0b000>; /// Floating Point Instructions class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>; class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>; class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>; class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>; class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>; class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>; class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>; class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>; class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>; class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>; class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>; class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>; class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>; class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>; class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>; class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>; class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>; class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>; class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>; class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>; class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>; class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>; class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>; class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>; //===----------------------------------------------------------------------===// // // Instruction Descriptions // //===----------------------------------------------------------------------===// class CMP_CBR_RT_Z_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rt, $offset"); list Defs = [AT]; InstrItinClass Itinerary = II_BCCZC; } class BEQZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"beqzalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BGEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgezalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BGTZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgtzalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BLEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"blezalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BLTZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bltzalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BLTZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bltzc", brtarget_lsl2_mm, GPR32Opnd>; class BLEZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"blezc", brtarget_lsl2_mm, GPR32Opnd>; class BGEZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgezc", brtarget_lsl2_mm, GPR32Opnd>; class BGTZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgtzc", brtarget_lsl2_mm, GPR32Opnd>; class CMP_CBR_2R_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $offset"); list Defs = [AT]; InstrItinClass Itinerary = II_BCCC; } class BGEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgec", brtarget_lsl2_mm, GPR32Opnd>; class BGEUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgeuc", brtarget_lsl2_mm, GPR32Opnd>; class BLTC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltc", brtarget_lsl2_mm, GPR32Opnd>; class BLTUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltuc", brtarget_lsl2_mm, GPR32Opnd>; class BEQC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"beqc", brtarget_lsl2_mm, GPR32Opnd>; class BNEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bnec", brtarget_lsl2_mm, GPR32Opnd>; class ADD_MMR6_DESC : ArithLogicR<"add", GPR32Opnd, 1, II_ADD>; class ADDIU_MMR6_DESC : ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>; class ADDU_MMR6_DESC : ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU>; class MUL_MMR6_DESC : ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>; class MUH_MMR6_DESC : ArithLogicR<"muh", GPR32Opnd, 1, II_MUH, mulhs>; class MULU_MMR6_DESC : ArithLogicR<"mulu", GPR32Opnd, 1, II_MULU>; class MUHU_MMR6_DESC : ArithLogicR<"muhu", GPR32Opnd, 1, II_MUHU, mulhu>; class BC_MMR6_DESC_BASE : BRANCH_DESC_BASE, MMR6Arch { dag InOperandList = (ins opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$offset"); bit isBarrier = 1; InstrItinClass Itinerary = Itin; } class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm, II_BALC> { bit isCall = 1; list Defs = [RA]; } class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm, II_BC> { list Pattern = [(br bb:$offset)]; } class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset), !strconcat("bc16", "\t$offset"), [], II_BC, FrmI>, MMR6Arch<"bc16"> { let isBranch = 1; let isTerminator = 1; let isBarrier = 1; let hasDelaySlot = 0; let AdditionalPredicates = [RelocPIC]; let Defs = [AT]; } class BEQZC_BNEZC_MM16R6_DESC_BASE : CBranchZeroMM, MMR6Arch { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 0; let Defs = [AT]; } class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">; class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">; class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd, 0, II_SUB>; class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd, 0,II_SUBU>; class BITSWAP_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); list Pattern = []; InstrItinClass Itinerary = II_BITSWAP; } class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>; class BRK_MMR6_DESC : BRK_FT<"break">; class CACHE_HINT_MMR6_DESC : MMR6Arch { dag OutOperandList = (outs); dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint); string AsmString = !strconcat(instr_asm, "\t$hint, $addr"); list Pattern = []; string DecoderMethod = "DecodeCacheOpMM"; InstrItinClass Itinerary = Itin; } class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd, II_CACHE>; class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd, II_PREF>; class LB_LBU_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); string DecoderMethod = "DecodeLoadByte15"; bit mayLoad = 1; InstrItinClass Itinerary = Itin; } class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd, II_LB>; class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd, II_LBU>; class CLO_CLZ_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins GPROpnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); InstrItinClass Itinerary = Itin; } class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd, II_CLO>; class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd, II_CLZ>; class EHB_MMR6_DESC : Barrier<"ehb", II_EHB>; class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd, II_EI>; class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd, II_DI>; class ERET_MMR6_DESC : ER_FT<"eret", II_ERET>; class DERET_MMR6_DESC : ER_FT<"deret", II_DERET>; class ERETNC_MMR6_DESC : ER_FT<"eretnc", II_ERETNC>; class JALRC16_MMR6_DESC_BASE : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, MMR6Arch { let isCall = 1; let hasDelaySlot = 0; let Defs = [RA]; } class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>; class JMP_MMR6_IDX_COMPACT_DESC_BASE : MMR6Arch { dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); string AsmString = !strconcat(opstr, "\t$rt, $offset"); list Pattern = []; bit isTerminator = 1; bit hasDelaySlot = 0; InstrItinClass Itinerary = Itin; } class JIALC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jialc", calloffset16, GPR32Opnd, II_JIALC> { bit isCall = 1; list Defs = [RA]; } class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd, II_JIC> { bit isBarrier = 1; list Defs = [AT]; } class JRC16_MMR6_DESC_BASE : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], II_JR, FrmR>, MMR6Arch { let hasDelaySlot = 0; let isBranch = 1; let isIndirectBranch = 1; } class JRC16_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR32Opnd>; class JRCADDIUSP_MMR6_DESC : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jrcaddiusp\t$imm", [], II_JRADDIUSP, FrmR>, MMR6Arch<"jrcaddiusp"> { let hasDelaySlot = 0; let isTerminator = 1; let isBarrier = 1; let isBranch = 1; let isIndirectBranch = 1; } class ALIGN_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp"); list Pattern = []; InstrItinClass Itinerary = Itin; } class ALIGN_MMR6_DESC : ALIGN_MMR6_DESC_BASE<"align", GPR32Opnd, uimm2, II_ALIGN>; class AUI_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); list Pattern = []; InstrItinClass Itinerary = Itin; } class AUI_MMR6_DESC : AUI_MMR6_DESC_BASE<"aui", GPR32Opnd, II_AUI>; class ALUIPC_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins simm16:$imm); string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); list Pattern = []; InstrItinClass Itinerary = Itin; } class ALUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"aluipc", GPR32Opnd, II_ALUIPC>; class AUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"auipc", GPR32Opnd, II_AUIPC>; class LSA_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $rd, $imm2"); list Pattern = []; InstrItinClass Itinerary = Itin; } class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1, II_LSA>; class PCREL_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins ImmOpnd:$imm); string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); list Pattern = []; InstrItinClass Itinerary = Itin; } class ADDIUPC_MMR6_DESC : PCREL_MMR6_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2, II_ADDIUPC>; class LWPC_MMR6_DESC: PCREL_MMR6_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2, II_LWPC>; class SELEQNE_Z_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list Pattern = []; InstrItinClass Itinerary = Itin; } class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd, II_SELCCZ>; class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd, II_SELCCZ>; class PAUSE_MMR6_DESC : Barrier<"pause", II_PAUSE>; class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel); string AsmString = !strconcat("rdhwr", "\t$rt, $rs, $sel"); list Pattern = []; InstrItinClass Itinerary = II_RDHWR; Format Form = FrmR; } class WAIT_MMR6_DESC : WaitMM<"wait">; // FIXME: ssnop should not be defined for R6. Per MD000582 microMIPS32 6.03: // Assemblers targeting specifically Release 6 should reject the SSNOP // instruction with an error. class SSNOP_MMR6_DESC : Barrier<"ssnop", II_SSNOP>; class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>; class DIVMOD_MMR6_DESC_BASE : MipsR6Inst { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); string AsmString = !strconcat(opstr, "\t$rd, $rs, $rt"); list Pattern = [(set GPROpnd:$rd, (OpNode GPROpnd:$rs, GPROpnd:$rt))]; string BaseOpcode = opstr; Format f = FrmR; let isCommutable = 0; let isReMaterializable = 1; InstrItinClass Itinerary = Itin; // This instruction doesn't trap division by zero itself. We must insert // teq instructions as well. bit usesCustomInserter = 1; } class DIV_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"div", GPR32Opnd, II_DIV, sdiv>; class DIVU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"divu", GPR32Opnd, II_DIVU, udiv>; class MOD_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"mod", GPR32Opnd, II_MOD, srem>; class MODU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"modu", GPR32Opnd, II_MODU, urem>; class AND_MMR6_DESC : ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>; class ANDI_MMR6_DESC : ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI>; class NOR_MMR6_DESC : LogicNOR<"nor", GPR32Opnd>; class OR_MMR6_DESC : ArithLogicR<"or", GPR32Opnd, 1, II_OR, or>; class ORI_MMR6_DESC : ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16, or> { int AddedComplexity = 1; } class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>; class XORI_MMR6_DESC : ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16, xor>; class SW_MMR6_DESC : Store<"sw", GPR32Opnd> { InstrItinClass Itinerary = II_SW; } class WRPGPR_WSBH_MMR6_DESC_BASE { dag InOperandList = (ins RO:$rs); dag OutOperandList = (outs RO:$rt); string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); list Pattern = []; Format f = FrmR; string BaseOpcode = instr_asm; bit hasSideEffects = 0; InstrItinClass Itinerary = Itin; } class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd, II_WRPGPR>; class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd, II_WSBH>; class MTC0_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rt, uimm3:$sel); dag OutOperandList = (outs DstRC:$rs); string AsmString = !strconcat(opstr, "\t$rt, $rs, $sel"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MTC1_MMR6_DESC_BASE< string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin = NoItinerary, SDPatternOperator OpNode = null_frag> : MipsR6Inst { dag InOperandList = (ins SrcRC:$rt); dag OutOperandList = (outs DstRC:$fs); string AsmString = !strconcat(opstr, "\t$rt, $fs"); list Pattern = [(set DstRC:$fs, (OpNode SrcRC:$rt))]; Format f = FrmFR; InstrItinClass Itinerary = Itin; string BaseOpcode = opstr; } class MTC1_64_MMR6_DESC_BASE< string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin = NoItinerary> : MipsR6Inst { dag InOperandList = (ins DstRC:$fs_in, SrcRC:$rt); dag OutOperandList = (outs DstRC:$fs); string AsmString = !strconcat(opstr, "\t$rt, $fs"); list Pattern = []; Format f = FrmFR; InstrItinClass Itinerary = Itin; string BaseOpcode = opstr; // $fs_in is part of a white lie to work around a widespread bug in the FPU // implementation. See expandBuildPairF64 for details. let Constraints = "$fs = $fs_in"; } class MTC2_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rt); dag OutOperandList = (outs DstRC:$impl); string AsmString = !strconcat(opstr, "\t$rt, $impl"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MTC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mtc0", COP0Opnd, GPR32Opnd, II_MTC0>; class MTC1_MMR6_DESC : MTC1_MMR6_DESC_BASE<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, HARDFLOAT; class MTC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mtc2", COP2Opnd, GPR32Opnd, II_MTC2>; class MTHC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mthc0", COP0Opnd, GPR32Opnd, II_MTHC0>; class MTHC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mthc2", COP2Opnd, GPR32Opnd, II_MTC2>; class MFC0_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rs, uimm3:$sel); dag OutOperandList = (outs DstRC:$rt); string AsmString = !strconcat(opstr, "\t$rt, $rs, $sel"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MFC1_MMR6_DESC_BASE : MipsR6Inst { dag InOperandList = (ins SrcRC:$fs); dag OutOperandList = (outs DstRC:$rt); string AsmString = !strconcat(opstr, "\t$rt, $fs"); list Pattern = [(set DstRC:$rt, (OpNode SrcRC:$fs))]; Format f = FrmFR; InstrItinClass Itinerary = Itin; string BaseOpcode = opstr; } class MFC2_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$impl); dag OutOperandList = (outs DstRC:$rt); string AsmString = !strconcat(opstr, "\t$rt, $impl"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MFC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfc0", GPR32Opnd, COP0Opnd, II_MFC0>; class MFC1_MMR6_DESC : MFC1_MMR6_DESC_BASE<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, bitconvert>, HARDFLOAT; class MFC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfc2", GPR32Opnd, COP2Opnd, II_MFC2>; class MFHC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfhc0", GPR32Opnd, COP0Opnd, II_MFHC0>; class MFHC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfhc2", GPR32Opnd, COP2Opnd, II_MFC2>; class LDC1_D64_MMR6_DESC : MipsR6Inst, HARDFLOAT, FGR_64 { dag InOperandList = (ins mem_mm_16:$addr); dag OutOperandList = (outs FGR64Opnd:$ft); string AsmString = !strconcat("ldc1", "\t$ft, $addr"); list Pattern = [(set FGR64Opnd:$ft, (load addrimm16:$addr))]; Format f = FrmFI; InstrItinClass Itinerary = II_LDC1; string BaseOpcode = "ldc1"; bit mayLoad = 1; let DecoderMethod = "DecodeFMemMMR2"; } class SDC1_D64_MMR6_DESC : MipsR6Inst, HARDFLOAT, FGR_64 { dag InOperandList = (ins FGR64Opnd:$ft, mem_mm_16:$addr); dag OutOperandList = (outs); string AsmString = !strconcat("sdc1", "\t$ft, $addr"); list Pattern = [(store FGR64Opnd:$ft, addrimm16:$addr)]; Format f = FrmFI; InstrItinClass Itinerary = II_SDC1; string BaseOpcode = "sdc1"; bit mayStore = 1; let DecoderMethod = "DecodeFMemMMR2"; } class LDC2_LWC2_MMR6_DESC_BASE { dag OutOperandList = (outs COP2Opnd:$rt); dag InOperandList = (ins mem_mm_11:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); list Pattern = [(set COP2Opnd:$rt, (load addrimm11:$addr))]; Format f = FrmFI; InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayLoad = 1; string DecoderMethod = "DecodeFMemCop2MMR6"; } class LDC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"ldc2", II_LDC2>; class LWC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"lwc2", II_LWC2>; class SDC2_SWC2_MMR6_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins COP2Opnd:$rt, mem_mm_11:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); list Pattern = [(store COP2Opnd:$rt, addrimm11:$addr)]; Format f = FrmFI; InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayStore = 1; string DecoderMethod = "DecodeFMemCop2MMR6"; } class SDC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"sdc2", II_SDC2>; class SWC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"swc2", II_SWC2>; class GINV_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rs, uimm2:$type); dag OutOperandList = (outs); string AsmString = !strconcat(opstr, "\t$rs, $type"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class GINVI_MMR6_DESC : GINV_MMR6_DESC_BASE<"ginvi", GPR32Opnd, II_GINVI> { dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = "ginvi\t$rs"; } class GINVT_MMR6_DESC : GINV_MMR6_DESC_BASE<"ginvt", GPR32Opnd, II_GINVT>; class SC_MMR6_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$dst); dag InOperandList = (ins GPR32Opnd:$rt, mem_mm_9:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayStore = 1; string Constraints = "$rt = $dst"; string DecoderMethod = "DecodeMemMMImm9"; } class LL_MMR6_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins mem_mm_9:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayLoad = 1; string DecoderMethod = "DecodeMemMMImm9"; } class SC_MMR6_DESC : SC_MMR6_DESC_BASE<"sc", II_SC>; class LL_MMR6_DESC : LL_MMR6_DESC_BASE<"ll", II_LL>; /// Floating Point Instructions class FARITH_MMR6_DESC_BASE : HARDFLOAT { dag OutOperandList = (outs RC:$fd); dag InOperandList = (ins RC:$ft, RC:$fs); string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); list Pattern = [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]; InstrItinClass Itinerary = Itin; bit isCommutable = isComm; } class FADD_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>; class FSUB_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>; class FMUL_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>; class FDIV_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>; class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd, II_MADDF_S>, HARDFLOAT; class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd, II_MADDF_D>, HARDFLOAT; class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd, II_MSUBF_S>, HARDFLOAT; class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd, II_MSUBF_D>, HARDFLOAT; class FMOV_FNEG_MMR6_DESC_BASE : HARDFLOAT, NeverHasSideEffects { dag OutOperandList = (outs DstRC:$ft); dag InOperandList = (ins SrcRC:$fs); string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; InstrItinClass Itinerary = Itin; Format Form = FrmFR; } class FMOV_S_MMR6_DESC : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>; class FNEG_S_MMR6_DESC : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>; class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd, II_MAX_S>, HARDFLOAT; class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd, II_MAX_D>, HARDFLOAT; class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd, II_MIN_S>, HARDFLOAT; class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd, II_MIN_D>, HARDFLOAT; class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd, II_MAXA_S>, HARDFLOAT; class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd, II_MAXA_D>, HARDFLOAT; class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd, II_MINA_S>, HARDFLOAT; class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd, II_MINA_D>, HARDFLOAT; class CVT_MMR6_DESC_BASE< string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin, SDPatternOperator OpNode = null_frag> : HARDFLOAT, NeverHasSideEffects { dag OutOperandList = (outs DstRC:$ft); dag InOperandList = (ins SrcRC:$fs); string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; InstrItinClass Itinerary = Itin; Format Form = FrmFR; } class CVT_L_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>; class CVT_L_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>; class CVT_W_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>; class CVT_D_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.l", FGR64Opnd, FGR64Opnd, II_CVT>, FGR_64; class CVT_S_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>; class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd, II_CVT>, FGR_64; multiclass CMP_CC_MMR6 format, string Typestr, RegisterOperand FGROpnd, InstrItinClass Itin> { def CMP_AF_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>, CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_UN_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>, CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, Itin, setuo>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_EQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>, CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, Itin, setoeq>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_UEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>, CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, Itin, setueq>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_LT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>, CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, Itin, setolt>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_ULT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>, CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, Itin, setult>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_LE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>, CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, Itin, setole>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_ULE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>, CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, Itin, setule>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SAF_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>, CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SUN_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>, CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>, CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SUEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>, CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SLT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>, CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SULT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>, CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SLE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>, CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SULE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>, CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; } class ABSS_FT_MMR6_DESC_BASE : HARDFLOAT, NeverHasSideEffects { dag OutOperandList = (outs DstRC:$ft); dag InOperandList = (ins SrcRC:$fs); string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; InstrItinClass Itinerary = Itin; Format Form = FrmFR; list EncodingPredicates = [HasStdEnc]; } class FLOOR_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.s", FGR64Opnd, FGR32Opnd, II_FLOOR>; class FLOOR_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>; class FLOOR_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>; class FLOOR_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>; class CEIL_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.s", FGR64Opnd, FGR32Opnd, II_CEIL>; class CEIL_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.d", FGR64Opnd, FGR64Opnd, II_CEIL>; class CEIL_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>; class CEIL_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>; class TRUNC_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.s", FGR64Opnd, FGR32Opnd, II_TRUNC>; class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd, FGR64Opnd, II_TRUNC>; class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>; class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>; class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>; class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd, II_SQRT_D, fsqrt>; class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>; class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>; class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>; class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd, FGR64Opnd, II_ROUND>; class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd, II_SEL_S>; class SEL_D_MMR6_DESC : COP1_SEL_D_DESC_BASE<"sel.d", FGR64Opnd, II_SEL_D>; class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd, II_SELCCZ_S>; class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd, II_SELCCZ_D>; class SELNEZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd, II_SELCCZ_S>; class SELNEZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd, II_SELCCZ_D>; class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd, II_RINT_S>; class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd, II_RINT_S>; class CLASS_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd, II_CLASS_S>; class CLASS_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd, II_CLASS_S>; class STORE_MMR6_DESC_BASE : Store, MMR6Arch { let DecoderMethod = "DecodeMemMMImm16"; InstrItinClass Itinerary = Itin; } class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd, II_SB>; class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd, II_SH>; class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>, MMR6Arch<"addu16"> { int AddedComplexity = 1; } class AND16_MMR6_DESC : LogicRMM16<"and16", GPRMM16Opnd, II_AND>, MMR6Arch<"and16">; class ANDI16_MMR6_DESC : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, MMR6Arch<"andi16">; class NOT16_MMR6_DESC : NotMM16<"not16", GPRMM16Opnd>, MMR6Arch<"not16"> { int AddedComplexity = 1; } class OR16_MMR6_DESC : LogicRMM16<"or16", GPRMM16Opnd, II_OR>, MMR6Arch<"or16">; class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>, MMR6Arch<"sll16">; class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>, MMR6Arch<"srl16">; class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">; class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, MMR6Arch<"li16">, IsAsCheapAsAMove; class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">; class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMoveP>, MMR6Arch<"movep">; class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">; class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, MMR6Arch<"subu16"> { int AddedComplexity = 1; } class XOR16_MMR6_DESC : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR>, MMR6Arch<"xor16">; class LW_MMR6_DESC : MMR6Arch<"lw">, MipsR6Inst { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins mem:$addr); string AsmString = "lw\t$rt, $addr"; let DecoderMethod = "DecodeMemMMImm16"; let canFoldAsLoad = 1; let mayLoad = 1; list Pattern = [(set GPR32Opnd:$rt, (load addrDefault:$addr))]; InstrItinClass Itinerary = II_LW; } class LUI_MMR6_DESC : IsAsCheapAsAMove, MMR6Arch<"lui">, MipsR6Inst{ dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins uimm16:$imm16); string AsmString = "lui\t$rt, $imm16"; list Pattern = []; bit hasSideEffects = 0; bit isReMaterializable = 1; InstrItinClass Itinerary = II_LUI; Format Form = FrmI; } class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst { dag OutOperandList = (outs); dag InOperandList = (ins uimm5:$stype); string AsmString = !strconcat("sync", "\t$stype"); list Pattern = [(MipsSync immZExt5:$stype)]; InstrItinClass Itinerary = II_SYNC; bit HasSideEffects = 1; } class SYNCI_MMR6_DESC : SYNCI_FT<"synci", mem_mm_16> { let DecoderMethod = "DecodeSynciR6"; } class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rd); string AsmString = !strconcat("rdpgpr", "\t$rt, $rd"); InstrItinClass Itinerary = II_RDPGPR; } class SDBBP_MMR6_DESC : MipsR6Inst { dag OutOperandList = (outs); dag InOperandList = (ins uimm20:$code_); string AsmString = !strconcat("sdbbp", "\t$code_"); list Pattern = []; InstrItinClass Itinerary = II_SDBBP; } class LWM16_MMR6_DESC : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr), !strconcat("lwm16", "\t$rt, $addr"), [], II_LWM, FrmI>, MMR6Arch<"lwm16"> { let DecoderMethod = "DecodeMemMMReglistImm4Lsl2"; let mayLoad = 1; ComplexPattern Addr = addr; } class SWM16_MMR6_DESC : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr), !strconcat("swm16", "\t$rt, $addr"), [], II_SWM, FrmI>, MMR6Arch<"swm16"> { let DecoderMethod = "DecodeMemMMReglistImm4Lsl2"; let mayStore = 1; ComplexPattern Addr = addr; } class SB16_MMR6_DESC_BASE : MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>, MMR6Arch { let DecoderMethod = "DecodeMemMMImm4"; let mayStore = 1; } class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei8, II_SB, mem_mm_4>; class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei16, II_SH, mem_mm_4_lsl1>; class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd, store, II_SW, mem_mm_4_lsl2>; class SWSP_MMR6_DESC : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset), !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>, MMR6Arch<"sw"> { let DecoderMethod = "DecodeMemMMSPImm5Lsl2"; let mayStore = 1; } class JALRC_HB_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat("jalrc.hb", "\t$rt, $rs"); list Pattern = []; InstrItinClass Itinerary = II_JALR_HB; Format Form = FrmJ; bit isIndirectBranch = 1; bit hasDelaySlot = 0; } class TLBINV_MMR6_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins); string AsmString = opstr; list Pattern = []; InstrItinClass Itinerary = Itin; } class TLBINV_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinv", II_TLBINV>; class TLBINVF_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinvf", II_TLBINVF>; class DVPEVP_MMR6_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rs); dag InOperandList = (ins); string AsmString = !strconcat(opstr, "\t$rs"); list Pattern = []; InstrItinClass Itinerary = Itin; bit hasUnModeledSideEffects = 1; } class DVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"dvp", II_DVP>; class EVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"evp", II_EVP>; class BEQZC_MMR6_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21_mm, GPR32Opnd>, MMR6Arch<"beqzc">; class BNEZC_MMR6_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21_mm, GPR32Opnd>, MMR6Arch<"bnezc">; class BRANCH_COP1_MMR6_DESC_BASE : InstSE<(outs), (ins FGR64Opnd:$rt, brtarget_mm:$offset), !strconcat(opstr, "\t$rt, $offset"), [], II_BC1CCZ, FrmI>, HARDFLOAT, BRANCH_DESC_BASE { list Defs = [AT]; } class BC1EQZC_MMR6_DESC : BRANCH_COP1_MMR6_DESC_BASE<"bc1eqzc">; class BC1NEZC_MMR6_DESC : BRANCH_COP1_MMR6_DESC_BASE<"bc1nezc">; class BRANCH_COP2_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins COP2Opnd:$rt, brtarget_mm:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(opstr, "\t$rt, $offset"); list Defs = [AT]; InstrItinClass Itinerary = Itin; } class BC2EQZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2eqzc", II_BC2CCZ>; class BC2NEZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2nezc", II_BC2CCZ>; class EXT_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$pos, uimm5_plus1:$size); string AsmString = !strconcat("ext", "\t$rt, $rs, $pos, $size"); list Pattern = [(set GPR32Opnd:$rt, (MipsExt GPR32Opnd:$rs, imm:$pos, imm:$size))]; InstrItinClass Itinerary = II_EXT; Format Form = FrmR; string BaseOpcode = "ext"; } class INS_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$pos, uimm5_inssize_plus1:$size, GPR32Opnd:$src); string AsmString = !strconcat("ins", "\t$rt, $rs, $pos, $size"); list Pattern = [(set GPR32Opnd:$rt, (MipsIns GPR32Opnd:$rs, imm:$pos, imm:$size, GPR32Opnd:$src))]; InstrItinClass Itinerary = II_INS; Format Form = FrmR; string BaseOpcode = "ins"; string Constraints = "$src = $rt"; } class JALRC_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat("jalrc", "\t$rt, $rs"); list Pattern = []; InstrItinClass Itinerary = II_JALRC; bit isCall = 1; bit hasDelaySlot = 0; list Defs = [RA]; } class BOVC_BNVC_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins GPROpnd:$rt, GPROpnd:$rs, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $offset"); list Defs = [AT]; InstrItinClass Itinerary = II_BCCC; } class BOVC_MMR6_DESC : BOVC_BNVC_MMR6_DESC_BASE<"bovc", brtargetr6, GPR32Opnd>; class BNVC_MMR6_DESC : BOVC_BNVC_MMR6_DESC_BASE<"bnvc", brtargetr6, GPR32Opnd>; //===----------------------------------------------------------------------===// // // Instruction Definitions // //===----------------------------------------------------------------------===// let DecoderNamespace = "MicroMipsR6" in { def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6; def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6; def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6; def ADDIUPC_MMR6 : R6MMR6Rel, ADDIUPC_MMR6_ENC, ADDIUPC_MMR6_DESC, ISA_MICROMIPS32R6; def ALUIPC_MMR6 : R6MMR6Rel, ALUIPC_MMR6_ENC, ALUIPC_MMR6_DESC, ISA_MICROMIPS32R6; def AND_MMR6 : StdMMR6Rel, AND_MMR6_DESC, AND_MMR6_ENC, ISA_MICROMIPS32R6; def ANDI_MMR6 : StdMMR6Rel, ANDI_MMR6_DESC, ANDI_MMR6_ENC, ISA_MICROMIPS32R6; def AUIPC_MMR6 : R6MMR6Rel, AUIPC_MMR6_ENC, AUIPC_MMR6_DESC, ISA_MICROMIPS32R6; def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6; def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6; def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6; def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6; def BC16_MMR6 : StdMMR6Rel, BC16_MMR6_DESC, BC16_MMR6_ENC, ISA_MICROMIPS32R6; def BEQZC_MMR6 : R6MMR6Rel, BEQZC_MMR6_ENC, BEQZC_MMR6_DESC, ISA_MICROMIPS32R6; def BEQZC16_MMR6 : StdMMR6Rel, BEQZC16_MMR6_DESC, BEQZC16_MMR6_ENC, ISA_MICROMIPS32R6; def BNEZC_MMR6 : R6MMR6Rel, BNEZC_MMR6_ENC, BNEZC_MMR6_DESC, ISA_MICROMIPS32R6; def BNEZC16_MMR6 : StdMMR6Rel, BNEZC16_MMR6_DESC, BNEZC16_MMR6_ENC, ISA_MICROMIPS32R6; def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC, ISA_MICROMIPS32R6; def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BNEZALC_MMR6 : R6MMR6Rel, BNEZALC_MMR6_ENC, BNEZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BREAK_MMR6 : StdMMR6Rel, BRK_MMR6_DESC, BRK_MMR6_ENC, ISA_MICROMIPS32R6; def CACHE_MMR6 : R6MMR6Rel, CACHE_MMR6_ENC, CACHE_MMR6_DESC, ISA_MICROMIPS32R6; def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6; def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6; def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6; def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6; def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6; def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6; def DI_MMR6 : StdMMR6Rel, DI_MMR6_DESC, DI_MMR6_ENC, ISA_MICROMIPS32R6; def ERET_MMR6 : StdMMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6; def DERET_MMR6 : StdMMR6Rel, DERET_MMR6_DESC, DERET_MMR6_ENC, ISA_MICROMIPS32R6; def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC, ISA_MICROMIPS32R6; def GINVI_MMR6 : R6MMR6Rel, GINVI_MMR6_ENC, GINVI_MMR6_DESC, ISA_MICROMIPS32R6, ASE_GINV; def GINVT_MMR6 : R6MMR6Rel, GINVT_MMR6_ENC, GINVT_MMR6_DESC, ISA_MICROMIPS32R6, ASE_GINV; let FastISelShouldIgnore = 1 in def JALRC16_MMR6 : R6MMR6Rel, JALRC16_MMR6_DESC, JALRC16_MMR6_ENC, ISA_MICROMIPS32R6; def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6; def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6; def JRC16_MMR6 : R6MMR6Rel, JRC16_MMR6_DESC, JRC16_MMR6_ENC, ISA_MICROMIPS32R6; def JRCADDIUSP_MMR6 : R6MMR6Rel, JRCADDIUSP_MMR6_DESC, JRCADDIUSP_MMR6_ENC, ISA_MICROMIPS32R6; def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6; def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6; def LWM16_MMR6 : StdMMR6Rel, LWM16_MMR6_DESC, LWM16_MMR6_ENC, ISA_MICROMIPS32R6; def MTC0_MMR6 : StdMMR6Rel, MTC0_MMR6_ENC, MTC0_MMR6_DESC, ISA_MICROMIPS32R6; def MTC1_MMR6 : StdMMR6Rel, MTC1_MMR6_DESC, MTC1_MMR6_ENC, ISA_MICROMIPS32R6; def MTC2_MMR6 : StdMMR6Rel, MTC2_MMR6_ENC, MTC2_MMR6_DESC, ISA_MICROMIPS32R6; def MTHC0_MMR6 : R6MMR6Rel, MTHC0_MMR6_ENC, MTHC0_MMR6_DESC, ISA_MICROMIPS32R6; def MTHC2_MMR6 : StdMMR6Rel, MTHC2_MMR6_ENC, MTHC2_MMR6_DESC, ISA_MICROMIPS32R6; def MFC0_MMR6 : StdMMR6Rel, MFC0_MMR6_ENC, MFC0_MMR6_DESC, ISA_MICROMIPS32R6; def MFC1_MMR6 : StdMMR6Rel, MFC1_MMR6_DESC, MFC1_MMR6_ENC, ISA_MICROMIPS32R6; def MFC2_MMR6 : StdMMR6Rel, MFC2_MMR6_ENC, MFC2_MMR6_DESC, ISA_MICROMIPS32R6; def MFHC0_MMR6 : R6MMR6Rel, MFHC0_MMR6_ENC, MFHC0_MMR6_DESC, ISA_MICROMIPS32R6; def MFHC2_MMR6 : StdMMR6Rel, MFHC2_MMR6_ENC, MFHC2_MMR6_DESC, ISA_MICROMIPS32R6; def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6; def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6; def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6; def MUH_MMR6 : R6MMR6Rel, MUH_MMR6_DESC, MUH_MMR6_ENC, ISA_MICROMIPS32R6; def MULU_MMR6 : R6MMR6Rel, MULU_MMR6_DESC, MULU_MMR6_ENC, ISA_MICROMIPS32R6; def MUHU_MMR6 : R6MMR6Rel, MUHU_MMR6_DESC, MUHU_MMR6_ENC, ISA_MICROMIPS32R6; def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6; def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6; def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6; def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6; def SB16_MMR6 : StdMMR6Rel, SB16_MMR6_DESC, SB16_MMR6_ENC, ISA_MICROMIPS32R6; def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC, ISA_MICROMIPS32R6; def SH16_MMR6 : StdMMR6Rel, SH16_MMR6_DESC, SH16_MMR6_ENC, ISA_MICROMIPS32R6; def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6; def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6; def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6; def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6; def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6; def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC, ISA_MICROMIPS32R6; def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6; def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6; def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6; def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6; def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6; def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6; def SSNOP_MMR6 : StdMMR6Rel, SSNOP_MMR6_DESC, SSNOP_MMR6_ENC, ISA_MICROMIPS32R6; def SYNC_MMR6 : StdMMR6Rel, SYNC_MMR6_DESC, SYNC_MMR6_ENC, ISA_MICROMIPS32R6; def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6; def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC, ISA_MICROMIPS32R6; def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6; def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6; def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6; let DecoderMethod = "DecodeMemMMImm16" in { def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6; } /// Floating Point Instructions def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC, ISA_MICROMIPS32R6; def FSUB_S_MMR6 : StdMMR6Rel, FSUB_S_MMR6_ENC, FSUB_S_MMR6_DESC, ISA_MICROMIPS32R6; def FMUL_S_MMR6 : StdMMR6Rel, FMUL_S_MMR6_ENC, FMUL_S_MMR6_DESC, ISA_MICROMIPS32R6; def FDIV_S_MMR6 : StdMMR6Rel, FDIV_S_MMR6_ENC, FDIV_S_MMR6_DESC, ISA_MICROMIPS32R6; def MADDF_S_MMR6 : R6MMR6Rel, MADDF_S_MMR6_ENC, MADDF_S_MMR6_DESC, ISA_MICROMIPS32R6; def MADDF_D_MMR6 : R6MMR6Rel, MADDF_D_MMR6_ENC, MADDF_D_MMR6_DESC, ISA_MICROMIPS32R6; def MSUBF_S_MMR6 : R6MMR6Rel, MSUBF_S_MMR6_ENC, MSUBF_S_MMR6_DESC, ISA_MICROMIPS32R6; def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC, ISA_MICROMIPS32R6; def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC, ISA_MICROMIPS32R6; def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC, ISA_MICROMIPS32R6; def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6; def MAX_D_MMR6 : R6MMR6Rel, MAX_D_MMR6_ENC, MAX_D_MMR6_DESC, ISA_MICROMIPS32R6; def MIN_S_MMR6 : R6MMR6Rel, MIN_S_MMR6_ENC, MIN_S_MMR6_DESC, ISA_MICROMIPS32R6; def MIN_D_MMR6 : R6MMR6Rel, MIN_D_MMR6_ENC, MIN_D_MMR6_DESC, ISA_MICROMIPS32R6; def MAXA_S_MMR6 : R6MMR6Rel, MAXA_S_MMR6_ENC, MAXA_S_MMR6_DESC, ISA_MICROMIPS32R6; def MAXA_D_MMR6 : R6MMR6Rel, MAXA_D_MMR6_ENC, MAXA_D_MMR6_DESC, ISA_MICROMIPS32R6; def MINA_S_MMR6 : R6MMR6Rel, MINA_S_MMR6_ENC, MINA_S_MMR6_DESC, ISA_MICROMIPS32R6; def MINA_D_MMR6 : R6MMR6Rel, MINA_D_MMR6_ENC, MINA_D_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_L_S_MMR6 : StdMMR6Rel, CVT_L_S_MMR6_ENC, CVT_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_L_D_MMR6 : StdMMR6Rel, CVT_L_D_MMR6_ENC, CVT_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_W_S_MMR6 : StdMMR6Rel, CVT_W_S_MMR6_ENC, CVT_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_D_L_MMR6 : StdMMR6Rel, CVT_D_L_MMR6_ENC, CVT_D_L_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC, ISA_MICROMIPS32R6; defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd, II_CMP_CC_S>; defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd, II_CMP_CC_D>; def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def FLOOR_L_D_MMR6 : StdMMR6Rel, FLOOR_L_D_MMR6_ENC, FLOOR_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def FLOOR_W_S_MMR6 : StdMMR6Rel, FLOOR_W_S_MMR6_ENC, FLOOR_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def FLOOR_W_D_MMR6 : StdMMR6Rel, FLOOR_W_D_MMR6_ENC, FLOOR_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_L_S_MMR6 : StdMMR6Rel, CEIL_L_S_MMR6_ENC, CEIL_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_L_D_MMR6 : StdMMR6Rel, CEIL_L_D_MMR6_ENC, CEIL_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_W_S_MMR6 : StdMMR6Rel, CEIL_W_S_MMR6_ENC, CEIL_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_W_D_MMR6 : StdMMR6Rel, CEIL_W_D_MMR6_ENC, CEIL_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_L_S_MMR6 : StdMMR6Rel, TRUNC_L_S_MMR6_ENC, TRUNC_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_L_D_MMR6 : StdMMR6Rel, TRUNC_L_D_MMR6_ENC, TRUNC_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_W_S_MMR6 : StdMMR6Rel, TRUNC_W_S_MMR6_ENC, TRUNC_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_W_D_MMR6 : StdMMR6Rel, TRUNC_W_D_MMR6_ENC, TRUNC_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6; def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6; def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6; def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6; def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC, ISA_MICROMIPS32R6; def AND16_MMR6 : StdMMR6Rel, AND16_MMR6_DESC, AND16_MMR6_ENC, ISA_MICROMIPS32R6; def ANDI16_MMR6 : StdMMR6Rel, ANDI16_MMR6_DESC, ANDI16_MMR6_ENC, ISA_MICROMIPS32R6; def NOT16_MMR6 : StdMMR6Rel, NOT16_MMR6_DESC, NOT16_MMR6_ENC, ISA_MICROMIPS32R6; def OR16_MMR6 : StdMMR6Rel, OR16_MMR6_DESC, OR16_MMR6_ENC, ISA_MICROMIPS32R6; def SLL16_MMR6 : StdMMR6Rel, SLL16_MMR6_DESC, SLL16_MMR6_ENC, ISA_MICROMIPS32R6; def SRL16_MMR6 : StdMMR6Rel, SRL16_MMR6_DESC, SRL16_MMR6_ENC, ISA_MICROMIPS32R6; def BREAK16_MMR6 : StdMMR6Rel, BREAK16_MMR6_DESC, BREAK16_MMR6_ENC, ISA_MICROMIPS32R6; def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC, ISA_MICROMIPS32R6; def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC, ISA_MICROMIPS32R6; def MOVEP_MMR6 : StdMMR6Rel, MOVEP_MMR6_DESC, MOVEP_MMR6_ENC, ISA_MICROMIPS32R6; def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC, ISA_MICROMIPS32R6; def XOR16_MMR6 : StdMMR6Rel, XOR16_MMR6_DESC, XOR16_MMR6_ENC, ISA_MICROMIPS32R6; def JALRC_HB_MMR6 : R6MMR6Rel, JALRC_HB_MMR6_ENC, JALRC_HB_MMR6_DESC, ISA_MICROMIPS32R6; def EXT_MMR6 : StdMMR6Rel, EXT_MMR6_ENC, EXT_MMR6_DESC, ISA_MICROMIPS32R6; def INS_MMR6 : StdMMR6Rel, INS_MMR6_ENC, INS_MMR6_DESC, ISA_MICROMIPS32R6; def JALRC_MMR6 : R6MMR6Rel, JALRC_MMR6_ENC, JALRC_MMR6_DESC, ISA_MICROMIPS32R6; def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC, ISA_MICROMIPS32R6; def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_W_S_MMR6 : StdMMR6Rel, ROUND_W_S_MMR6_ENC, ROUND_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_W_D_MMR6 : StdMMR6Rel, ROUND_W_D_MMR6_ENC, ROUND_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def SEL_S_MMR6 : R6MMR6Rel, SEL_S_MMR6_ENC, SEL_S_MMR6_DESC, ISA_MICROMIPS32R6; def SEL_D_MMR6 : R6MMR6Rel, SEL_D_MMR6_ENC, SEL_D_MMR6_DESC, ISA_MICROMIPS32R6; def SELEQZ_S_MMR6 : R6MMR6Rel, SELEQZ_S_MMR6_ENC, SELEQZ_S_MMR6_DESC, ISA_MICROMIPS32R6; def SELEQZ_D_MMR6 : R6MMR6Rel, SELEQZ_D_MMR6_ENC, SELEQZ_D_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_S_MMR6 : R6MMR6Rel, SELNEZ_S_MMR6_ENC, SELNEZ_S_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_D_MMR6 : R6MMR6Rel, SELNEZ_D_MMR6_ENC, SELNEZ_D_MMR6_DESC, ISA_MICROMIPS32R6; def CLASS_S_MMR6 : StdMMR6Rel, CLASS_S_MMR6_ENC, CLASS_S_MMR6_DESC, ISA_MICROMIPS32R6; def CLASS_D_MMR6 : StdMMR6Rel, CLASS_D_MMR6_ENC, CLASS_D_MMR6_DESC, ISA_MICROMIPS32R6; def TLBINV_MMR6 : StdMMR6Rel, TLBINV_MMR6_ENC, TLBINV_MMR6_DESC, ISA_MICROMIPS32R6; def TLBINVF_MMR6 : StdMMR6Rel, TLBINVF_MMR6_ENC, TLBINVF_MMR6_DESC, ISA_MICROMIPS32R6; def DVP_MMR6 : R6MMR6Rel, DVP_MMR6_ENC, DVP_MMR6_DESC, ISA_MICROMIPS32R6; def EVP_MMR6 : R6MMR6Rel, EVP_MMR6_ENC, EVP_MMR6_DESC, ISA_MICROMIPS32R6; def BC1EQZC_MMR6 : R6MMR6Rel, BC1EQZC_MMR6_DESC, BC1EQZC_MMR6_ENC, ISA_MICROMIPS32R6; def BC1NEZC_MMR6 : R6MMR6Rel, BC1NEZC_MMR6_DESC, BC1NEZC_MMR6_ENC, ISA_MICROMIPS32R6; def BC2EQZC_MMR6 : R6MMR6Rel, MipsR6Inst, BC2EQZC_MMR6_ENC, BC2EQZC_MMR6_DESC, ISA_MICROMIPS32R6; def BC2NEZC_MMR6 : R6MMR6Rel, MipsR6Inst, BC2NEZC_MMR6_ENC, BC2NEZC_MMR6_DESC, ISA_MICROMIPS32R6; let DecoderNamespace = "MicroMipsFP64" in { def LDC1_D64_MMR6 : StdMMR6Rel, LDC1_D64_MMR6_DESC, LDC1_MMR6_ENC, ISA_MICROMIPS32R6 { let BaseOpcode = "LDC164"; } def SDC1_D64_MMR6 : StdMMR6Rel, SDC1_D64_MMR6_DESC, SDC1_MMR6_ENC, ISA_MICROMIPS32R6; } def LDC2_MMR6 : StdMMR6Rel, LDC2_MMR6_ENC, LDC2_MMR6_DESC, ISA_MICROMIPS32R6; def SDC2_MMR6 : StdMMR6Rel, SDC2_MMR6_ENC, SDC2_MMR6_DESC, ISA_MICROMIPS32R6; def LWC2_MMR6 : StdMMR6Rel, LWC2_MMR6_ENC, LWC2_MMR6_DESC, ISA_MICROMIPS32R6; def SWC2_MMR6 : StdMMR6Rel, SWC2_MMR6_ENC, SWC2_MMR6_DESC, ISA_MICROMIPS32R6; def LL_MMR6 : R6MMR6Rel, LL_MMR6_ENC, LL_MMR6_DESC, ISA_MICROMIPS32R6; def SC_MMR6 : R6MMR6Rel, SC_MMR6_ENC, SC_MMR6_DESC, ISA_MICROMIPS32R6; } def BOVC_MMR6 : R6MMR6Rel, BOVC_MMR6_ENC, BOVC_MMR6_DESC, ISA_MICROMIPS32R6, MMDecodeDisambiguatedBy<"POP35GroupBranchMMR6">; def BNVC_MMR6 : R6MMR6Rel, BNVC_MMR6_ENC, BNVC_MMR6_DESC, ISA_MICROMIPS32R6, MMDecodeDisambiguatedBy<"POP37GroupBranchMMR6">; def BGEC_MMR6 : R6MMR6Rel, BGEC_MMR6_ENC, BGEC_MMR6_DESC, ISA_MICROMIPS32R6; def BGEUC_MMR6 : R6MMR6Rel, BGEUC_MMR6_ENC, BGEUC_MMR6_DESC, ISA_MICROMIPS32R6; def BLTC_MMR6 : R6MMR6Rel, BLTC_MMR6_ENC, BLTC_MMR6_DESC, ISA_MICROMIPS32R6; def BLTUC_MMR6 : R6MMR6Rel, BLTUC_MMR6_ENC, BLTUC_MMR6_DESC, ISA_MICROMIPS32R6; def BEQC_MMR6 : R6MMR6Rel, BEQC_MMR6_ENC, BEQC_MMR6_DESC, ISA_MICROMIPS32R6, DecodeDisambiguates<"POP35GroupBranchMMR6">; def BNEC_MMR6 : R6MMR6Rel, BNEC_MMR6_ENC, BNEC_MMR6_DESC, ISA_MICROMIPS32R6, DecodeDisambiguates<"POP37GroupBranchMMR6">; def BLTZC_MMR6 : R6MMR6Rel, BLTZC_MMR6_ENC, BLTZC_MMR6_DESC, ISA_MICROMIPS32R6; def BLEZC_MMR6 : R6MMR6Rel, BLEZC_MMR6_ENC, BLEZC_MMR6_DESC, ISA_MICROMIPS32R6; def BGEZC_MMR6 : R6MMR6Rel, BGEZC_MMR6_ENC, BGEZC_MMR6_DESC, ISA_MICROMIPS32R6; def BGTZC_MMR6 : R6MMR6Rel, BGTZC_MMR6_ENC, BGTZC_MMR6_DESC, ISA_MICROMIPS32R6; def BGEZALC_MMR6 : R6MMR6Rel, BGEZALC_MMR6_ENC, BGEZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BGTZALC_MMR6 : R6MMR6Rel, BGTZALC_MMR6_ENC, BGTZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BLEZALC_MMR6 : R6MMR6Rel, BLEZALC_MMR6_ENC, BLEZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BLTZALC_MMR6 : R6MMR6Rel, BLTZALC_MMR6_ENC, BLTZALC_MMR6_DESC, ISA_MICROMIPS32R6; //===----------------------------------------------------------------------===// // // MicroMips instruction aliases // //===----------------------------------------------------------------------===// def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"di", (DI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6; def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset), !strconcat("b", "\t$offset")> { string DecoderNamespace = "MicroMipsR6"; } def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"rdhwr $rt, $rs", (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mtc0 $rt, $rs", (MTC0_MMR6 COP0Opnd:$rs, GPR32Opnd:$rt, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mthc0 $rt, $rs", (MTHC0_MMR6 COP0Opnd:$rs, GPR32Opnd:$rt, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mfc0 $rt, $rs", (MFC0_MMR6 GPR32Opnd:$rt, COP0Opnd:$rs, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mfhc0 $rt, $rs", (MFHC0_MMR6 GPR32Opnd:$rt, COP0Opnd:$rs, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"jalrc.hb $rs", (JALRC_HB_MMR6 RA, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"jal $offset", (BALC_MMR6 brtarget26_mm:$offset), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"dvp", (DVP_MMR6 ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"evp", (EVP_MMR6 ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"jalrc $rs", (JALRC_MMR6 RA, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"and $rs, $rt, $imm", (ANDI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"and $rs, $imm", (ANDI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"or $rs, $rt, $imm", (ORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"or $rs, $imm", (ORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"xor $rs, $rt, $imm", (XORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"xor $rs, $imm", (XORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"not $rt, $rs", (NOR_MMR6 GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"not $rt", (NOR_MMR6 GPR32Opnd:$rt, GPR32Opnd:$rt, ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"lapc $rd, $imm", (ADDIUPC_MMR6 GPR32Opnd:$rd, simm19_lsl2:$imm)>, ISA_MICROMIPS32R6; def : MipsInstAlias<"neg $rt, $rs", (SUB_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"neg $rt", (SUB_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"negu $rt, $rs", (SUBU_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"negu $rt", (SUBU_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"beqz16 $rs, $offset", (BEQZC16_MMR6 GPRMM16Opnd:$rs, brtarget7_mm:$offset), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"bnez16 $rs, $offset", (BNEZC16_MMR6 GPRMM16Opnd:$rs, brtarget7_mm:$offset), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"b16 $offset", (BC16_MMR6 brtarget10_mm:$offset), 0>, ISA_MICROMIPS32R6; //===----------------------------------------------------------------------===// // // MicroMips arbitrary patterns that map to one or more instructions // //===----------------------------------------------------------------------===// def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr), (SW16_MMR6 GPRMM16:$src, addrimm4lsl2:$addr)>, ISA_MICROMIPS32R6; def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs), (SUBU_MMR6 GPR32:$lhs, GPR32:$rhs)>, ISA_MICROMIPS32R6; def : MipsPat<(select i32:$cond, i32:$t, i32:$f), (OR_MM (SELNEZ_MMR6 i32:$t, i32:$cond), (SELEQZ_MMR6 i32:$f, i32:$cond))>, ISA_MICROMIPS32R6; def : MipsPat<(select i32:$cond, i32:$t, immz), (SELNEZ_MMR6 i32:$t, i32:$cond)>, ISA_MICROMIPS32R6; def : MipsPat<(select i32:$cond, immz, i32:$f), (SELEQZ_MMR6 i32:$f, i32:$cond)>, ISA_MICROMIPS32R6; defm : SelectInt_Pats, ISA_MICROMIPS32R6; defm S_MMR6 : Cmp_Pats, ISA_MICROMIPS32R6; defm D_MMR6 : Cmp_Pats, ISA_MICROMIPS32R6; def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6; -def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1 ZERO))>, ISA_MICROMIPS32R6; +def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6; def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm), (ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>, ISA_MICROMIPS32R6; def : MipsPat<(and GPR32:$src, immZExt16:$imm), (ANDI_MMR6 GPR32:$src, immZExt16:$imm)>, ISA_MICROMIPS32R6; def : MipsPat<(i32 immZExt16:$imm), (XORI_MMR6 ZERO, immZExt16:$imm)>, ISA_MICROMIPS32R6; def : MipsPat<(not GPRMM16:$in), (NOT16_MMR6 GPRMM16:$in)>, ISA_MICROMIPS32R6; def : MipsPat<(not GPR32:$in), (NOR_MMR6 GPR32Opnd:$in, ZERO)>, ISA_MICROMIPS32R6; // Patterns for load with a reg+imm operand. let AddedComplexity = 41 in { def : LoadRegImmPat, FGR_64, ISA_MICROMIPS32R6; def : StoreRegImmPat, FGR_64, ISA_MICROMIPS32R6; } def TAILCALL_MMR6 : TailCall, ISA_MICROMIPS32R6; def TAILCALLREG_MMR6 : TailCallReg, ISA_MICROMIPS32R6; def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase, ISA_MICROMIPS32R6; def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), (TAILCALL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setne GPR32:$lhs, 0)), bb:$dst), (BNEZC_MMR6 GPR32:$lhs, bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (seteq GPR32:$lhs, 0)), bb:$dst), (BEQZC_MMR6 GPR32:$lhs, bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setge GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLT_MM GPR32:$lhs, GPR32:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setuge GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTu_MM GPR32:$lhs, GPR32:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setge GPR32:$lhs, immSExt16:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTi_MM GPR32:$lhs, immSExt16:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setuge GPR32:$lhs, immSExt16:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTiu_MM GPR32:$lhs, immSExt16:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setgt GPR32:$lhs, immSExt16Plus1:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTi_MM GPR32:$lhs, (Plus1 imm:$rhs)), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setugt GPR32:$lhs, immSExt16Plus1:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTiu_MM GPR32:$lhs, (Plus1 imm:$rhs)), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setle GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLT_MM GPR32:$rhs, GPR32:$lhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setule GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTu_MM GPR32:$rhs, GPR32:$lhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond GPR32:$cond, bb:$dst), (BNEZC_MMR6 GPR32:$cond, bb:$dst)>, ISA_MICROMIPS32R6; Index: vendor/llvm/dist-release_70/lib/Target/Mips/Mips64InstrInfo.td =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/Mips64InstrInfo.td (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/Mips64InstrInfo.td (revision 341365) @@ -1,1141 +1,1144 @@ //===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes Mips64 instructions. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. //===----------------------------------------------------------------------===// // shamt must fit in 6 bits. def immZExt6 : ImmLeaf; // Node immediate fits as 10-bit sign extended on target immediate. // e.g. seqi, snei def immSExt10_64 : PatLeaf<(i64 imm), [{ return isInt<10>(N->getSExtValue()); }]>; def immZExt16_64 : PatLeaf<(i64 imm), [{ return isUInt<16>(N->getZExtValue()); }]>; def immZExt5_64 : ImmLeaf; // Transformation function: get log2 of low 32 bits of immediate def Log2LO : SDNodeXFormgetZExtValue())); }]>; // Transformation function: get log2 of high 32 bits of immediate def Log2HI : SDNodeXFormgetZExtValue() >> 32))); }]>; // Predicate: True if immediate is a power of 2 and fits 32 bits def PowerOf2LO : PatLeaf<(imm), [{ if (N->getValueType(0) == MVT::i64) { uint64_t Imm = N->getZExtValue(); return isPowerOf2_64(Imm) && (Imm & 0xffffffff) == Imm; } else return false; }]>; // Predicate: True if immediate is a power of 2 and exceeds 32 bits def PowerOf2HI : PatLeaf<(imm), [{ if (N->getValueType(0) == MVT::i64) { uint64_t Imm = N->getZExtValue(); return isPowerOf2_64(Imm) && (Imm & 0xffffffff00000000) == Imm; } else return false; }]>; def PowerOf2LO_i32 : PatLeaf<(imm), [{ if (N->getValueType(0) == MVT::i32) { uint64_t Imm = N->getZExtValue(); return isPowerOf2_32(Imm) && isUInt<32>(Imm); } else return false; }]>; def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ return cast(N->getOperand(1))->getVT().bitsLT(MVT::i32); }]>; //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// let usesCustomInserter = 1 in { def ATOMIC_LOAD_ADD_I64 : Atomic2Ops; def ATOMIC_LOAD_SUB_I64 : Atomic2Ops; def ATOMIC_LOAD_AND_I64 : Atomic2Ops; def ATOMIC_LOAD_OR_I64 : Atomic2Ops; def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; def ATOMIC_SWAP_I64 : Atomic2Ops; def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; } def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA; def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA; /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def LOAD_ACC128 : Load<"", ACC128>; def STORE_ACC128 : Store<"", ACC128>; } //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd, II_DADDI>, ADDI_FM<0x18>, ISA_MIPS3_NOT_32R6_64R6; let AdditionalPredicates = [NotInMicroMips] in { def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU, immSExt16, add>, ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3; } let isCodeGenOnly = 1 in { def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, GPR64Opnd>, SLTI_FM<0xa>, GPR_64; def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, GPR64Opnd>, SLTI_FM<0xb>, GPR_64; def ANDi64 : ArithLogicI<"andi", uimm16_64, GPR64Opnd, II_AND, immZExt16, and>, ADDI_FM<0xc>, GPR_64; def ORi64 : ArithLogicI<"ori", uimm16_64, GPR64Opnd, II_OR, immZExt16, or>, ADDI_FM<0xd>, GPR_64; def XORi64 : ArithLogicI<"xori", uimm16_64, GPR64Opnd, II_XOR, immZExt16, xor>, ADDI_FM<0xe>, GPR_64; def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM, GPR_64; } /// Arithmetic Instructions (3-Operand, R-Type) let AdditionalPredicates = [NotInMicroMips] in { def DADD : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, ADD_FM<0, 0x2c>, ISA_MIPS3; def DADDu : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>, ADD_FM<0, 0x2d>, ISA_MIPS3; def DSUBu : ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>, ADD_FM<0, 0x2f>, ISA_MIPS3; def DSUB : ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB>, ADD_FM<0, 0x2e>, ISA_MIPS3; } let isCodeGenOnly = 1 in { def SLT64 : SetCC_R<"slt", setlt, GPR64Opnd>, ADD_FM<0, 0x2a>, GPR_64; def SLTu64 : SetCC_R<"sltu", setult, GPR64Opnd>, ADD_FM<0, 0x2b>, GPR_64; def AND64 : ArithLogicR<"and", GPR64Opnd, 1, II_AND, and>, ADD_FM<0, 0x24>, GPR_64; def OR64 : ArithLogicR<"or", GPR64Opnd, 1, II_OR, or>, ADD_FM<0, 0x25>, GPR_64; def XOR64 : ArithLogicR<"xor", GPR64Opnd, 1, II_XOR, xor>, ADD_FM<0, 0x26>, GPR_64; def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>, GPR_64; } /// Shift Instructions let AdditionalPredicates = [NotInMicroMips] in { def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl, immZExt6>, SRA_FM<0x38, 0>, ISA_MIPS3; def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl, immZExt6>, SRA_FM<0x3a, 0>, ISA_MIPS3; def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra, immZExt6>, SRA_FM<0x3b, 0>, ISA_MIPS3; def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>, SRLV_FM<0x14, 0>, ISA_MIPS3; def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>, SRLV_FM<0x17, 0>, ISA_MIPS3; def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>, SRLV_FM<0x16, 0>, ISA_MIPS3; def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>, SRA_FM<0x3c, 0>, ISA_MIPS3; def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd, II_DSRL32>, SRA_FM<0x3e, 0>, ISA_MIPS3; def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd, II_DSRA32>, SRA_FM<0x3f, 0>, ISA_MIPS3; // Rotate Instructions def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, rotr, immZExt6>, SRA_FM<0x3a, 1>, ISA_MIPS64R2; def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, rotr>, SRLV_FM<0x16, 1>, ISA_MIPS64R2; def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, II_DROTR32>, SRA_FM<0x3e, 1>, ISA_MIPS64R2; } /// Load and Store Instructions /// aligned let isCodeGenOnly = 1 in { def LB64 : Load<"lb", GPR64Opnd, sextloadi8, II_LB>, LW_FM<0x20>, GPR_64; def LBu64 : Load<"lbu", GPR64Opnd, zextloadi8, II_LBU>, LW_FM<0x24>, GPR_64; def LH64 : Load<"lh", GPR64Opnd, sextloadi16, II_LH>, LW_FM<0x21>, GPR_64; def LHu64 : Load<"lhu", GPR64Opnd, zextloadi16, II_LHU>, LW_FM<0x25>, GPR_64; def LW64 : Load<"lw", GPR64Opnd, sextloadi32, II_LW>, LW_FM<0x23>, GPR_64; def SB64 : Store<"sb", GPR64Opnd, truncstorei8, II_SB>, LW_FM<0x28>, GPR_64; def SH64 : Store<"sh", GPR64Opnd, truncstorei16, II_SH>, LW_FM<0x29>, GPR_64; def SW64 : Store<"sw", GPR64Opnd, truncstorei32, II_SW>, LW_FM<0x2b>, GPR_64; } let AdditionalPredicates = [NotInMicroMips] in { def LWu : MMRel, Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, LW_FM<0x27>, ISA_MIPS3; def LD : LoadMemory<"ld", GPR64Opnd, mem_simmptr, load, II_LD>, LW_FM<0x37>, ISA_MIPS3; def SD : StoreMemory<"sd", GPR64Opnd, mem_simmptr, store, II_SD>, LW_FM<0x3f>, ISA_MIPS3; } /// load/store left/right let isCodeGenOnly = 1 in { def LWL64 : LoadLeftRight<"lwl", MipsLWL, GPR64Opnd, II_LWL>, LW_FM<0x22>, GPR_64; def LWR64 : LoadLeftRight<"lwr", MipsLWR, GPR64Opnd, II_LWR>, LW_FM<0x26>, GPR_64; def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd, II_SWL>, LW_FM<0x2a>, GPR_64; def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, II_SWR>, LW_FM<0x2e>, GPR_64; } def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>, ISA_MIPS3_NOT_32R6_64R6; def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>, ISA_MIPS3_NOT_32R6_64R6; def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>, ISA_MIPS3_NOT_32R6_64R6; def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>, ISA_MIPS3_NOT_32R6_64R6; /// Load-linked, Store-conditional let AdditionalPredicates = [NotInMicroMips] in { def LLD : LLBase<"lld", GPR64Opnd, mem_simmptr>, LW_FM<0x34>, ISA_MIPS3_NOT_32R6_64R6; } def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3_NOT_32R6_64R6; let AdditionalPredicates = [NotInMicroMips], DecoderNamespace = "Mips32_64_PTR64" in { def LL64 : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, PTR_64, ISA_MIPS2_NOT_32R6_64R6; def SC64 : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, PTR_64, ISA_MIPS2_NOT_32R6_64R6; def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>, PTR_64; } def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; /// Jump and Branch Instructions let isCodeGenOnly = 1 in { def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>, GPR_64; def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>, GPR_64; def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>, GPR_64; def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>, GPR_64; def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>, GPR_64; def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>, GPR_64; let AdditionalPredicates = [NoIndirectJumpGuards] in def JALR64Pseudo : JumpLinkRegPseudo; } let AdditionalPredicates = [NotInMicroMips], DecoderNamespace = "Mips64" in { def JR_HB64 : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6; def JALR_HB64 : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32R2; } def PseudoReturn64 : PseudoReturnBase; let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, NoIndirectJumpGuards] in { def TAILCALLREG64 : TailCallReg, ISA_MIPS3_NOT_32R6_64R6, PTR_64; def PseudoIndirectBranch64 : PseudoIndirectBranchBase, ISA_MIPS3_NOT_32R6_64R6; } let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, UseIndirectJumpsHazard] in { def TAILCALLREGHB64 : TailCallReg, ISA_MIPS32R2_NOT_32R6_64R6, PTR_64; def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase, ISA_MIPS32R2_NOT_32R6_64R6; } /// Multiply and Divide Instructions. let AdditionalPredicates = [NotInMicroMips] in { def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1c>, ISA_MIPS3_NOT_32R6_64R6; def DMULTu : Mult<"dmultu", II_DMULTU, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1d>, ISA_MIPS3_NOT_32R6_64R6; } def PseudoDMULT : MultDivPseudo, ISA_MIPS3_NOT_32R6_64R6; def PseudoDMULTu : MultDivPseudo, ISA_MIPS3_NOT_32R6_64R6; let AdditionalPredicates = [NotInMicroMips] in { def DSDIV : Div<"ddiv", II_DDIV, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1e>, ISA_MIPS3_NOT_32R6_64R6; def DUDIV : Div<"ddivu", II_DDIVU, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1f>, ISA_MIPS3_NOT_32R6_64R6; } def PseudoDSDIV : MultDivPseudo, ISA_MIPS3_NOT_32R6_64R6; def PseudoDUDIV : MultDivPseudo, ISA_MIPS3_NOT_32R6_64R6; let isCodeGenOnly = 1 in { def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>, ISA_MIPS3_NOT_32R6_64R6; def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>, ISA_MIPS3_NOT_32R6_64R6; def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>, ISA_MIPS3_NOT_32R6_64R6; def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>, ISA_MIPS3_NOT_32R6_64R6; def PseudoMFHI64 : PseudoMFLOHI, ISA_MIPS3_NOT_32R6_64R6; def PseudoMFLO64 : PseudoMFLOHI, ISA_MIPS3_NOT_32R6_64R6; def PseudoMTLOHI64 : PseudoMTLOHI, ISA_MIPS3_NOT_32R6_64R6; /// Sign Ext In Register Instructions. def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>, ISA_MIPS32R2; def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>, ISA_MIPS32R2; } /// Count Leading let AdditionalPredicates = [NotInMicroMips] in { def DCLZ : CountLeading0<"dclz", GPR64Opnd, II_DCLZ>, CLO_FM<0x24>, ISA_MIPS64_NOT_64R6; def DCLO : CountLeading1<"dclo", GPR64Opnd, II_DCLO>, CLO_FM<0x25>, ISA_MIPS64_NOT_64R6; /// Double Word Swap Bytes/HalfWords def DSBH : SubwordSwap<"dsbh", GPR64Opnd, II_DSBH>, SEB_FM<2, 0x24>, ISA_MIPS64R2; def DSHD : SubwordSwap<"dshd", GPR64Opnd, II_DSHD>, SEB_FM<5, 0x24>, ISA_MIPS64R2; def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>, GPR_64; } let isCodeGenOnly = 1 in def RDHWR64 : ReadHardware, RDHWR_FM, GPR_64; let AdditionalPredicates = [NotInMicroMips] in { // The 'pos + size' constraints for code generation are enforced by the // code that lowers into MipsISD::Ext. // For assembly parsing, we alias dextu and dextm to dext, and match by // operand were possible then check the 'pos + size' in MipsAsmParser. // We override the generated decoder to enforce that dext always comes out // for dextm and dextu like binutils. let DecoderMethod = "DecodeDEXT" in { def DEXT : ExtBase<"dext", GPR64Opnd, uimm5_report_uimm6, uimm5_plus1_report_uimm6, immZExt5, immZExt5Plus1, MipsExt>, EXT_FM<3>, ISA_MIPS64R2; def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5, uimm5_plus33, immZExt5, immZExt5Plus33, MipsExt>, EXT_FM<1>, ISA_MIPS64R2; def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32, uimm5_plus1, immZExt5Plus32, immZExt5Plus1, MipsExt>, EXT_FM<2>, ISA_MIPS64R2; } // The 'pos + size' constraints for code generation are enforced by the // code that lowers into MipsISD::Ins. // For assembly parsing, we alias dinsu and dinsm to dins, and match by // operand were possible then check the 'pos + size' in MipsAsmParser. // We override the generated decoder to enforce that dins always comes out // for dinsm and dinsu like binutils. let DecoderMethod = "DecodeDINS" in { def DINS : InsBase<"dins", GPR64Opnd, uimm6, uimm5_inssize_plus1, immZExt5, immZExt5Plus1>, EXT_FM<7>, ISA_MIPS64R2; def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32, uimm5_inssize_plus1, immZExt5Plus32, immZExt5Plus1>, EXT_FM<6>, ISA_MIPS64R2; def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64, immZExt5, immZExtRange2To64>, EXT_FM<5>, ISA_MIPS64R2; } } let isCodeGenOnly = 1, AdditionalPredicates = [NotInMicroMips] in { def DEXT64_32 : InstSE<(outs GPR64Opnd:$rt), (ins GPR32Opnd:$rs, uimm5_report_uimm6:$pos, uimm5_plus1:$size), "dext $rt, $rs, $pos, $size", [], II_EXT, FrmR, "dext">, EXT_FM<3>, ISA_MIPS64R2; } let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def DSLL64_32 : FR<0x00, 0x3c, (outs GPR64:$rd), (ins GPR32:$rt), "dsll\t$rd, $rt, 32", [], II_DSLL>, GPR_64; let isMoveReg = 1 in { def SLL64_32 : FR<0x0, 0x00, (outs GPR64:$rd), (ins GPR32:$rt), "sll\t$rd, $rt, 0", [], II_SLL>, GPR_64; def SLL64_64 : FR<0x0, 0x00, (outs GPR64:$rd), (ins GPR64:$rt), "sll\t$rd, $rt, 0", [], II_SLL>, GPR_64; } } // We need the following pseudo instruction to avoid offset calculation for // long branches. See the comment in file MipsLongBranch.cpp for detailed // explanation. // Expands to: daddiu $dst, $src, %PART($tgt - $baltgt) // where %PART may be %hi or %lo, depending on the relocation kind // that $tgt is annotated with. def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst), (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64; // Cavium Octeon cnMIPS instructions let DecoderNamespace = "CnMips", // FIXME: The lack of HasStdEnc is probably a bug EncodingPredicates = [] in { class Count1s: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), [(set RO:$rd, (ctpop RO:$rs))], II_POP, FrmR, opstr> { let TwoOperandAliasConstraint = "$rd = $rs"; } class ExtsCins: InstSE<(outs RO:$rt), (ins RO:$rs, uimm5:$pos, uimm5:$lenm1), !strconcat(opstr, "\t$rt, $rs, $pos, $lenm1"), [(set RO:$rt, (Op RO:$rs, PosImm:$pos, imm:$lenm1))], itin, FrmR, opstr> { let TwoOperandAliasConstraint = "$rt = $rs"; } class SetCC64_R : InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), [(set GPR64Opnd:$rd, (zext (cond_op GPR64Opnd:$rs, GPR64Opnd:$rt)))], II_SEQ_SNE, FrmR, opstr> { let TwoOperandAliasConstraint = "$rd = $rs"; } class SetCC64_I: InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, simm10_64:$imm10), !strconcat(opstr, "\t$rt, $rs, $imm10"), [(set GPR64Opnd:$rt, (zext (cond_op GPR64Opnd:$rs, immSExt10_64:$imm10)))], II_SEQI_SNEI, FrmI, opstr> { let TwoOperandAliasConstraint = "$rt = $rs"; } class CBranchBitNum shift = 1> : InstSE<(outs), (ins RO:$rs, ImmOp:$p, opnd:$offset), !strconcat(opstr, "\t$rs, $p, $offset"), [(brcond (i32 (cond_op (and RO:$rs, (shl shift, immZExt5_64:$p)), 0)), bb:$offset)], II_BBIT, FrmI, opstr> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; let Defs = [AT]; } class MFC2OP : InstSE<(outs RO:$rt, uimm16:$imm16), (ins), !strconcat(asmstr, "\t$rt, $imm16"), [], itin, FrmFR>; // Unsigned Byte Add def BADDu : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>, ADD_FM<0x1c, 0x28>, ASE_CNMIPS { let Pattern = [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))]; } // Branch on Bit Clear /+32 def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd, uimm5_64_report_uimm6>, BBIT_FM<0x32>, ASE_CNMIPS; def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, uimm5_64, 0x100000000>, BBIT_FM<0x36>, ASE_CNMIPS; // Branch on Bit Set /+32 def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd, uimm5_64_report_uimm6>, BBIT_FM<0x3a>, ASE_CNMIPS; def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, uimm5_64, 0x100000000>, BBIT_FM<0x3e>, ASE_CNMIPS; // Multiply Doubleword to GPR def DMUL : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>, ADD_FM<0x1c, 0x03>, ASE_CNMIPS { let Defs = [HI0, LO0, P0, P1, P2]; } let AdditionalPredicates = [NotInMicroMips] in { // Extract a signed bit field /+32 def EXTS : ExtsCins<"exts", II_EXT, GPR64Opnd, immZExt5>, EXTS_FM<0x3a>, ASE_MIPS64_CNMIPS; def EXTS32: ExtsCins<"exts32", II_EXT, GPR64Opnd, immZExt5Plus32>, EXTS_FM<0x3b>, ASE_MIPS64_CNMIPS; // Clear and insert a bit field /+32 def CINS : ExtsCins<"cins", II_INS, GPR64Opnd, immZExt5, MipsCIns>, EXTS_FM<0x32>, ASE_MIPS64_CNMIPS; def CINS32: ExtsCins<"cins32", II_INS, GPR64Opnd, immZExt5Plus32, MipsCIns>, EXTS_FM<0x33>, ASE_MIPS64_CNMIPS; let isCodeGenOnly = 1 in { def CINS_i32 : ExtsCins<"cins", II_INS, GPR32Opnd, immZExt5, MipsCIns>, EXTS_FM<0x32>, ASE_MIPS64_CNMIPS; def CINS64_32 :InstSE<(outs GPR64Opnd:$rt), (ins GPR32Opnd:$rs, uimm5:$pos, uimm5:$lenm1), "cins\t$rt, $rs, $pos, $lenm1", [], II_INS, FrmR, "cins">, EXTS_FM<0x32>, ASE_MIPS64_CNMIPS; } } // Move to multiplier/product register def MTM0 : MoveToLOHI<"mtm0", GPR64Opnd, [MPL0, P0, P1, P2]>, MTMR_FM<0x08>, ASE_CNMIPS; def MTM1 : MoveToLOHI<"mtm1", GPR64Opnd, [MPL1, P0, P1, P2]>, MTMR_FM<0x0c>, ASE_CNMIPS; def MTM2 : MoveToLOHI<"mtm2", GPR64Opnd, [MPL2, P0, P1, P2]>, MTMR_FM<0x0d>, ASE_CNMIPS; def MTP0 : MoveToLOHI<"mtp0", GPR64Opnd, [P0]>, MTMR_FM<0x09>, ASE_CNMIPS; def MTP1 : MoveToLOHI<"mtp1", GPR64Opnd, [P1]>, MTMR_FM<0x0a>, ASE_CNMIPS; def MTP2 : MoveToLOHI<"mtp2", GPR64Opnd, [P2]>, MTMR_FM<0x0b>, ASE_CNMIPS; // Count Ones in a Word/Doubleword def POP : Count1s<"pop", GPR32Opnd>, POP_FM<0x2c>, ASE_CNMIPS; def DPOP : Count1s<"dpop", GPR64Opnd>, POP_FM<0x2d>, ASE_CNMIPS; // Set on equal/not equal def SEQ : SetCC64_R<"seq", seteq>, SEQ_FM<0x2a>, ASE_CNMIPS; def SEQi : SetCC64_I<"seqi", seteq>, SEQI_FM<0x2e>, ASE_CNMIPS; def SNE : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>, ASE_CNMIPS; def SNEi : SetCC64_I<"snei", setne>, SEQI_FM<0x2f>, ASE_CNMIPS; // 192-bit x 64-bit Unsigned Multiply and Add def V3MULU: ArithLogicR<"v3mulu", GPR64Opnd, 0, II_DMUL>, ADD_FM<0x1c, 0x11>, ASE_CNMIPS { let Defs = [P0, P1, P2]; } // 64-bit Unsigned Multiply and Add Move def VMM0 : ArithLogicR<"vmm0", GPR64Opnd, 0, II_DMUL>, ADD_FM<0x1c, 0x10>, ASE_CNMIPS { let Defs = [MPL0, P0, P1, P2]; } // 64-bit Unsigned Multiply and Add def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>, ADD_FM<0x1c, 0x0f>, ASE_CNMIPS { let Defs = [MPL1, MPL2, P0, P1, P2]; } // Move between CPU and coprocessor registers def DMFC2_OCTEON : MFC2OP<"dmfc2", GPR64Opnd, II_DMFC2>, MFC2OP_FM<0x12, 1>, ASE_CNMIPS; def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd, II_DMTC2>, MFC2OP_FM<0x12, 5>, ASE_CNMIPS; } } /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64", Predicates = [HasMips64] in { def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd, COP0Opnd, II_DMFC0>, MFC3OP_FM<0x10, 1, 0>, ISA_MIPS3; def DMTC0 : MTC3OP<"dmtc0", COP0Opnd, GPR64Opnd, II_DMTC0>, MFC3OP_FM<0x10, 5, 0>, ISA_MIPS3; def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd, COP2Opnd, II_DMFC2>, MFC3OP_FM<0x12, 1, 0>, ISA_MIPS3; def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5, 0>, ISA_MIPS3; } /// Move between CPU and guest coprocessor registers (Virtualization ASE) let DecoderNamespace = "Mips64" in { def DMFGC0 : MFC3OP<"dmfgc0", GPR64Opnd, COP0Opnd, II_DMFGC0>, MFC3OP_FM<0x10, 3, 1>, ISA_MIPS64R5, ASE_VIRT; def DMTGC0 : MTC3OP<"dmtgc0", COP0Opnd, GPR64Opnd, II_DMTGC0>, MFC3OP_FM<0x10, 3, 3>, ISA_MIPS64R5, ASE_VIRT; } let AdditionalPredicates = [UseIndirectJumpsHazard] in def JALRHB64Pseudo : JumpLinkRegPseudo; //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// // Materialize i64 constants. defm : MaterializeImms, ISA_MIPS3, GPR_64; def : MipsPat<(i64 immZExt32Low16Zero:$imm), (DSLL (ORi64 ZERO_64, (HI16 imm:$imm)), 16)>, ISA_MIPS3, GPR_64; def : MipsPat<(i64 immZExt32:$imm), (ORi64 (DSLL (ORi64 ZERO_64, (HI16 imm:$imm)), 16), (LO16 imm:$imm))>, ISA_MIPS3, GPR_64; // extended loads def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>, ISA_MIPS3, GPR_64; def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>, ISA_MIPS3, GPR_64; def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>, ISA_MIPS3, GPR_64; def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>, ISA_MIPS3, GPR_64; // hi/lo relocs let AdditionalPredicates = [NotInMicroMips] in defm : MipsHiLoRelocs, ISA_MIPS3, GPR_64, SYM_32; def : MipsPat<(MipsGotHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>, ISA_MIPS3, GPR_64; def : MipsPat<(MipsGotHi texternalsym:$in), (LUi64 texternalsym:$in)>, ISA_MIPS3, GPR_64; def : MipsPat<(MipsTlsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>, ISA_MIPS3, GPR_64; // highest/higher/hi/lo relocs let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)), (JAL texternalsym:$dst)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), (LUi64 tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 tblockaddress:$in)), (LUi64 tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 tjumptable:$in)), (LUi64 tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 tconstpool:$in)), (LUi64 tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 texternalsym:$in)), (LUi64 texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHigher (i64 tblockaddress:$in)), (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHigher (i64 tjumptable:$in)), (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHigher (i64 tconstpool:$in)), (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHigher (i64 texternalsym:$in)), (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tblockaddress:$lo))), (DADDiu GPR64:$hi, tblockaddress:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tjumptable:$lo))), (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tblockaddress:$lo))), (DADDiu GPR64:$hi, tblockaddress:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tjumptable:$lo))), (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tblockaddress:$lo))), (DADDiu GPR64:$hi, tblockaddress:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tjumptable:$lo))), (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))), (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; } // gp_rel relocs def : MipsPat<(add GPR64:$gp, (MipsGPRel tglobaladdr:$in)), (DADDiu GPR64:$gp, tglobaladdr:$in)>, ISA_MIPS3, ABI_N64; def : MipsPat<(add GPR64:$gp, (MipsGPRel tconstpool:$in)), (DADDiu GPR64:$gp, tconstpool:$in)>, ISA_MIPS3, ABI_N64; def : WrapperPat, ISA_MIPS3, GPR_64; def : WrapperPat, ISA_MIPS3, GPR_64; def : WrapperPat, ISA_MIPS3, GPR_64; def : WrapperPat, ISA_MIPS3, GPR_64; def : WrapperPat, ISA_MIPS3, GPR_64; def : WrapperPat, ISA_MIPS3, GPR_64; defm : BrcondPats, ISA_MIPS3, GPR_64; def : MipsPat<(brcond (i32 (setlt i64:$lhs, 1)), bb:$dst), (BLEZ64 i64:$lhs, bb:$dst)>, ISA_MIPS3, GPR_64; def : MipsPat<(brcond (i32 (setgt i64:$lhs, -1)), bb:$dst), (BGEZ64 i64:$lhs, bb:$dst)>, ISA_MIPS3, GPR_64; // setcc patterns let AdditionalPredicates = [NotInMicroMips] in { defm : SeteqPats, ISA_MIPS3, GPR_64; defm : SetlePats, ISA_MIPS3, GPR_64; defm : SetgtPats, ISA_MIPS3, GPR_64; defm : SetgePats, ISA_MIPS3, GPR_64; defm : SetgeImmPats, ISA_MIPS3, GPR_64; } // truncate def : MipsPat<(trunc (assertsext GPR64:$src)), (EXTRACT_SUBREG GPR64:$src, sub_32)>, ISA_MIPS3, GPR_64; // The forward compatibility strategy employed by MIPS requires us to treat // values as being sign extended to an infinite number of bits. This allows // existing software to run without modification on any future MIPS // implementation (e.g. 128-bit, or 1024-bit). Being compatible with this // strategy requires that truncation acts as a sign-extension for values being // fed into instructions operating on 32-bit values. Such instructions have // undefined results if this is not true. // For our case, this means that we can't issue an extract_subreg for nodes // such as (trunc:i32 (assertzext:i64 X, i32)), because the sign-bit of the // lower subreg would not be replicated into the upper half. def : MipsPat<(trunc (assertzext_lt_i32 GPR64:$src)), (EXTRACT_SUBREG GPR64:$src, sub_32)>, ISA_MIPS3, GPR_64; def : MipsPat<(i32 (trunc GPR64:$src)), (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, ISA_MIPS3, GPR_64; // variable shift instructions patterns def : MipsPat<(shl GPR64:$rt, (i32 (trunc GPR64:$rs))), (DSLLV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>, ISA_MIPS3, GPR_64; def : MipsPat<(srl GPR64:$rt, (i32 (trunc GPR64:$rs))), (DSRLV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>, ISA_MIPS3, GPR_64; def : MipsPat<(sra GPR64:$rt, (i32 (trunc GPR64:$rs))), (DSRAV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>, ISA_MIPS3, GPR_64; def : MipsPat<(rotr GPR64:$rt, (i32 (trunc GPR64:$rs))), (DROTRV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>, ISA_MIPS3, GPR_64; // 32-to-64-bit extension def : MipsPat<(i64 (anyext GPR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>, ISA_MIPS3, GPR_64; def : MipsPat<(i64 (zext GPR32:$src)), (DSRL (DSLL64_32 GPR32:$src), 32)>, ISA_MIPS3, GPR_64; def : MipsPat<(i64 (sext GPR32:$src)), (SLL64_32 GPR32:$src)>, ISA_MIPS3, GPR_64; let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(i64 (zext GPR32:$src)), (DEXT64_32 GPR32:$src, 0, 32)>, ISA_MIPS64R2, GPR_64; def : MipsPat<(i64 (zext (i32 (shl GPR32:$rt, immZExt5:$imm)))), (CINS64_32 GPR32:$rt, imm:$imm, (immZExt5To31 imm:$imm))>, ISA_MIPS64R2, GPR_64, ASE_MIPS64_CNMIPS; } // Sign extend in register def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)), (SLL64_64 GPR64:$src)>, ISA_MIPS3, GPR_64; // bswap MipsPattern def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>, ISA_MIPS64R2; // Carry pattern let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(subc GPR64:$lhs, GPR64:$rhs), (DSUBu GPR64:$lhs, GPR64:$rhs)>, ISA_MIPS3, GPR_64; def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs), (DADDu GPR64:$lhs, GPR64:$rhs)>, ISA_MIPS3, ASE_NOT_DSP, GPR_64; def : MipsPat<(addc GPR64:$lhs, immSExt16:$imm), (DADDiu GPR64:$lhs, imm:$imm)>, ISA_MIPS3, ASE_NOT_DSP, GPR_64; } // Octeon bbit0/bbit1 MipsPattern def : MipsPat<(brcond (i32 (seteq (and i64:$lhs, PowerOf2LO:$mask), 0)), bb:$dst), (BBIT0 i64:$lhs, (Log2LO PowerOf2LO:$mask), bb:$dst)>, ISA_MIPS64R2, ASE_MIPS64_CNMIPS; def : MipsPat<(brcond (i32 (seteq (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst), (BBIT032 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>, ISA_MIPS64R2, ASE_MIPS64_CNMIPS; def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2LO:$mask), 0)), bb:$dst), (BBIT1 i64:$lhs, (Log2LO PowerOf2LO:$mask), bb:$dst)>, ISA_MIPS64R2, ASE_MIPS64_CNMIPS; def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst), (BBIT132 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>, ISA_MIPS64R2, ASE_MIPS64_CNMIPS; def : MipsPat<(brcond (i32 (seteq (and i32:$lhs, PowerOf2LO_i32:$mask), 0)), bb:$dst), (BBIT0 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), i32:$lhs, sub_32), (Log2LO PowerOf2LO_i32:$mask), bb:$dst)>, ISA_MIPS64R2, ASE_MIPS64_CNMIPS; def : MipsPat<(brcond (i32 (setne (and i32:$lhs, PowerOf2LO_i32:$mask), 0)), bb:$dst), (BBIT1 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), i32:$lhs, sub_32), (Log2LO PowerOf2LO_i32:$mask), bb:$dst)>, ISA_MIPS64R2, ASE_MIPS64_CNMIPS; // Atomic load patterns. def : MipsPat<(atomic_load_8 addr:$a), (LB64 addr:$a)>, ISA_MIPS3, GPR_64; def : MipsPat<(atomic_load_16 addr:$a), (LH64 addr:$a)>, ISA_MIPS3, GPR_64; def : MipsPat<(atomic_load_32 addr:$a), (LW64 addr:$a)>, ISA_MIPS3, GPR_64; def : MipsPat<(atomic_load_64 addr:$a), (LD addr:$a)>, ISA_MIPS3, GPR_64; // Atomic store patterns. def : MipsPat<(atomic_store_8 addr:$a, GPR64:$v), (SB64 GPR64:$v, addr:$a)>, ISA_MIPS3, GPR_64; def : MipsPat<(atomic_store_16 addr:$a, GPR64:$v), (SH64 GPR64:$v, addr:$a)>, ISA_MIPS3, GPR_64; def : MipsPat<(atomic_store_32 addr:$a, GPR64:$v), (SW64 GPR64:$v, addr:$a)>, ISA_MIPS3, GPR_64; def : MipsPat<(atomic_store_64 addr:$a, GPR64:$v), (SD GPR64:$v, addr:$a)>, ISA_MIPS3, GPR_64; // Patterns used for matching away redundant sign extensions. // MIPS32 arithmetic instructions sign extend their result implicitly. def : MipsPat<(i64 (sext (i32 (add GPR32:$src, immSExt16:$imm16)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDiu GPR32:$src, immSExt16:$imm16), sub_32)>; def : MipsPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDu GPR32:$src, GPR32:$src2), sub_32)>; def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SUBu GPR32:$src, GPR32:$src2), sub_32)>; def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (MUL GPR32:$src, GPR32:$src2), sub_32)>; + (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6; def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (PseudoMFHI ACC64:$src), sub_32)>; def : MipsPat<(i64 (sext (i32 (MipsMFLO ACC64:$src)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (PseudoMFLO ACC64:$src), sub_32)>; def : MipsPat<(i64 (sext (i32 (shl GPR32:$src, immZExt5:$imm5)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SLL GPR32:$src, immZExt5:$imm5), sub_32)>; def : MipsPat<(i64 (sext (i32 (shl GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SLLV GPR32:$src, GPR32:$src2), sub_32)>; def : MipsPat<(i64 (sext (i32 (srl GPR32:$src, immZExt5:$imm5)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SRL GPR32:$src, immZExt5:$imm5), sub_32)>; def : MipsPat<(i64 (sext (i32 (srl GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SRLV GPR32:$src, GPR32:$src2), sub_32)>; def : MipsPat<(i64 (sext (i32 (sra GPR32:$src, immZExt5:$imm5)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SRA GPR32:$src, immZExt5:$imm5), sub_32)>; def : MipsPat<(i64 (sext (i32 (sra GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (SRAV GPR32:$src, GPR32:$src2), sub_32)>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"move $dst, $src", (OR64 GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; def : MipsInstAlias<"move $dst, $src", (DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; def : MipsInstAlias<"dadd $rs, $rt, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dadd $rs, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"daddu $rs, $rt, $imm", (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), 0>, ISA_MIPS3; def : MipsInstAlias<"daddu $rs, $imm", (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), 0>, ISA_MIPS3; defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi64, GPR64Opnd, imm64>, ISA_MIPS3, GPR_64; defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi64, GPR64Opnd, imm64>, ISA_MIPS3, GPR_64; defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>, ISA_MIPS3, GPR_64; } let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dneg $rt, $rs", (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS3; def : MipsInstAlias<"dneg $rt", (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>, ISA_MIPS3; def : MipsInstAlias<"dnegu $rt, $rs", (DSUBu GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS3; def : MipsInstAlias<"dnegu $rt", (DSUBu GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>, ISA_MIPS3; } def : MipsInstAlias<"dsubi $rs, $rt, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, InvertedImOperand64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dsubi $rs, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, InvertedImOperand64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dsub $rs, $rt, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, InvertedImOperand64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dsub $rs, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, InvertedImOperand64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dsubu $rt, $rs, $imm", (DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs, InvertedImOperand64:$imm), 0>, ISA_MIPS3; def : MipsInstAlias<"dsubu $rs, $imm", (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, InvertedImOperand64:$imm), 0>, ISA_MIPS3; } def : MipsInstAlias<"dsra $rd, $rt, $rs", (DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dsll $rd, $rt, $rs", (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; def : MipsInstAlias<"dsrl $rd, $rt, $rs", (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; def : MipsInstAlias<"dsrl $rd, $rt", (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>, ISA_MIPS3; def : MipsInstAlias<"dsll $rd, $rt", (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>, ISA_MIPS3; def : MipsInstAlias<"dins $rt, $rs, $pos, $size", (DINSM GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5:$pos, uimm_range_2_64:$size), 0>, ISA_MIPS64R2; def : MipsInstAlias<"dins $rt, $rs, $pos, $size", (DINSU GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32:$pos, uimm5_plus1:$size), 0>, ISA_MIPS64R2; def : MipsInstAlias<"dext $rt, $rs, $pos, $size", (DEXTM GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5:$pos, uimm5_plus33:$size), 0>, ISA_MIPS64R2; def : MipsInstAlias<"dext $rt, $rs, $pos, $size", (DEXTU GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32:$pos, uimm5_plus1:$size), 0>, ISA_MIPS64R2; def : MipsInstAlias<"jalr.hb $rs", (JALR_HB64 RA_64, GPR64Opnd:$rs), 1>, ISA_MIPS64; // Two operand (implicit 0 selector) versions: def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>; def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, COP0Opnd:$rd, 0), 0>; def : MipsInstAlias<"dmfgc0 $rt, $rd", (DMFGC0 GPR64Opnd:$rt, COP0Opnd:$rd, 0), 0>, ISA_MIPS64R5, ASE_VIRT; def : MipsInstAlias<"dmtgc0 $rt, $rd", (DMTGC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>, ISA_MIPS64R5, ASE_VIRT; } def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, COP2Opnd:$rd, 0), 0>; def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 COP2Opnd:$rd, GPR64Opnd:$rt, 0), 0>; def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>, ASE_MIPS64_CNMIPS; def : MipsInstAlias<"syncs", (SYNC 0x6), 0>, ASE_MIPS64_CNMIPS; def : MipsInstAlias<"syncw", (SYNC 0x4), 0>, ASE_MIPS64_CNMIPS; def : MipsInstAlias<"syncws", (SYNC 0x5), 0>, ASE_MIPS64_CNMIPS; // cnMIPS Aliases. // bbit* with $p 32-63 converted to bbit*32 with $p 0-31 def : MipsInstAlias<"bbit0 $rs, $p, $offset", (BBIT032 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p, brtarget:$offset), 0>, ASE_CNMIPS; def : MipsInstAlias<"bbit1 $rs, $p, $offset", (BBIT132 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p, brtarget:$offset), 0>, ASE_CNMIPS; // exts with $pos 32-63 in converted to exts32 with $pos 0-31 def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1", (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, ASE_MIPS64_CNMIPS; def : MipsInstAlias<"exts $rt, $pos, $lenm1", (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt, uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, ASE_MIPS64_CNMIPS; // cins with $pos 32-63 in converted to cins32 with $pos 0-31 def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1", (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, ASE_MIPS64_CNMIPS; def : MipsInstAlias<"cins $rt, $pos, $lenm1", (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt, uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, ASE_MIPS64_CNMIPS; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// class LoadImmediate64 : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64), !strconcat(instr_asm, "\t$rt, $imm64")> ; def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>; def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr), "dla\t$rt, $addr">; def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64), "dla\t$rt, $imm64">; def DMULImmMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt, simm32_relaxed:$imm), "dmul\t$rs, $rt, $imm">, ISA_MIPS3_NOT_32R6_64R6; def DMULOMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt, GPR64Opnd:$rd), "dmulo\t$rs, $rt, $rd">, ISA_MIPS3_NOT_32R6_64R6; def DMULOUMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt, GPR64Opnd:$rd), "dmulou\t$rs, $rt, $rd">, ISA_MIPS3_NOT_32R6_64R6; def DMULMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt, GPR64Opnd:$rd), "dmul\t$rs, $rt, $rd"> { let InsnPredicates = [HasMips3, NotMips64r6, NotCnMips]; } let AdditionalPredicates = [NotInMicroMips] in { def DSDivMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), "ddiv\t$rd, $rs, $rt">, ISA_MIPS3_NOT_32R6_64R6; def DSDivIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, imm64:$imm), "ddiv\t$rd, $rs, $imm">, ISA_MIPS3_NOT_32R6_64R6; def DUDivMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), "ddivu\t$rd, $rs, $rt">, ISA_MIPS3_NOT_32R6_64R6; def DUDivIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, imm64:$imm), "ddivu\t$rd, $rs, $imm">, ISA_MIPS3_NOT_32R6_64R6; // GAS expands 'div' and 'ddiv' differently when the destination // register is $zero and the instruction is in the two operand // form. 'ddiv' gets expanded, while 'div' is not expanded. def : MipsInstAlias<"ddiv $rs, $rt", (DSDivMacro GPR64Opnd:$rs, GPR64Opnd:$rs, GPR64Opnd:$rt), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"ddiv $rd, $imm", (DSDivIMacro GPR64Opnd:$rd, GPR64Opnd:$rd, imm64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; // GAS expands 'divu' and 'ddivu' differently when the destination // register is $zero and the instruction is in the two operand // form. 'ddivu' gets expanded, while 'divu' is not expanded. def : MipsInstAlias<"ddivu $rt, $rs", (DUDivMacro GPR64Opnd:$rt, GPR64Opnd:$rt, GPR64Opnd:$rs), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"ddivu $rd, $imm", (DUDivIMacro GPR64Opnd:$rd, GPR64Opnd:$rd, imm64:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def DSRemMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), "drem\t$rd, $rs, $rt">, ISA_MIPS3_NOT_32R6_64R6; def DSRemIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, simm32_relaxed:$imm), "drem\t$rd, $rs, $imm">, ISA_MIPS3_NOT_32R6_64R6; def DURemMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), "dremu\t$rd, $rs, $rt">, ISA_MIPS3_NOT_32R6_64R6; def DURemIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, simm32_relaxed:$imm), "dremu\t$rd, $rs, $imm">, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"drem $rt, $rs", (DSRemMacro GPR64Opnd:$rt, GPR64Opnd:$rt, GPR64Opnd:$rs), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"drem $rd, $imm", (DSRemIMacro GPR64Opnd:$rd, GPR64Opnd:$rd, simm32_relaxed:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dremu $rt, $rs", (DURemMacro GPR64Opnd:$rt, GPR64Opnd:$rt, GPR64Opnd:$rs), 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dremu $rd, $imm", (DURemIMacro GPR64Opnd:$rd, GPR64Opnd:$rd, simm32_relaxed:$imm), 0>, ISA_MIPS3_NOT_32R6_64R6; } def NORImm64 : NORIMM_DESC_BASE, GPR_64; def : MipsInstAlias<"nor\t$rs, $imm", (NORImm64 GPR64Opnd:$rs, GPR64Opnd:$rs, imm64:$imm)>, GPR_64; def SLTImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs), (ins GPR64Opnd:$rt, imm64:$imm), "slt\t$rs, $rt, $imm">, GPR_64; def : MipsInstAlias<"slt\t$rs, $imm", (SLTImm64 GPR64Opnd:$rs, GPR64Opnd:$rs, imm64:$imm)>, GPR_64; def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs), (ins GPR64Opnd:$rt, imm64:$imm), "sltu\t$rs, $rt, $imm">, GPR_64; def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs, imm64:$imm)>, GPR_64; + +def : MipsInstAlias<"rdhwr $rt, $rs", + (RDHWR64 GPR64Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, GPR_64; Index: vendor/llvm/dist-release_70/lib/Target/Mips/Mips64r6InstrInfo.td =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/Mips64r6InstrInfo.td (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/Mips64r6InstrInfo.td (revision 341365) @@ -1,334 +1,337 @@ //=- Mips64r6InstrInfo.td - Mips64r6 Instruction Information -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes Mips64r6 instructions. // //===----------------------------------------------------------------------===// // Notes about removals/changes from MIPS32r6: // Reencoded: dclo, dclz //===----------------------------------------------------------------------===// // // Instruction Encodings // //===----------------------------------------------------------------------===// class DALIGN_ENC : SPECIAL3_DALIGN_FM; class DAUI_ENC : DAUI_FM; class DAHI_ENC : REGIMM_FM; class DATI_ENC : REGIMM_FM; class DBITSWAP_ENC : SPECIAL3_2R_FM; class DCLO_R6_ENC : SPECIAL_2R_FM; class DCLZ_R6_ENC : SPECIAL_2R_FM; class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>; class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>; class DLSA_R6_ENC : SPECIAL_LSA_FM; class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>; class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>; class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b011100>; class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011101>; class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011100>; class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b011101>; class LDPC_ENC : PCREL18_FM; class LLD_R6_ENC : SPECIAL3_LL_SC_FM; class SCD_R6_ENC : SPECIAL3_LL_SC_FM; class CRC32D_ENC : SPECIAL3_2R_SZ_CRC<3,0>; class CRC32CD_ENC : SPECIAL3_2R_SZ_CRC<3,1>; //===----------------------------------------------------------------------===// // // Instruction Descriptions // //===----------------------------------------------------------------------===// class AHI_ATI_DESC_BASE { dag OutOperandList = (outs GPROpnd:$rs); dag InOperandList = (ins GPROpnd:$rt, uimm16_altrelaxed:$imm); string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm"); string Constraints = "$rs = $rt"; InstrItinClass Itinerary = itin; } class DALIGN_DESC : ALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3, II_DALIGN>; class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd, II_DAHI>; class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd, II_DATI>; class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd, II_DAUI>; class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd, II_DBITSWAP>; class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd, II_DCLO>; class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd, II_DCLZ>; class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, II_DDIV, sdiv>; class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, II_DDIVU, udiv>; class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2_plus1, II_DLSA>; class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, II_DMOD, srem>; class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, II_DMODU, urem>; class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, II_DMUH, mulhs>; class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd, II_DMUHU, mulhu>; class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>; class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMUL>; class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, II_LDPC>; class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simmptr, II_LLD>; class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>; class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>; class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>; class BGEC64_DESC : CMP_BC_DESC_BASE<"bgec", brtarget, GPR64Opnd>; class BGEUC64_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR64Opnd>; class BEQC64_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR64Opnd>; class BNEC64_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR64Opnd>; class BLTC64_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR64Opnd>; class BLTUC64_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR64Opnd>; class BLTZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR64Opnd>; class BGEZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR64Opnd>; class BLEZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezc", brtarget, GPR64Opnd>; class BGTZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzc", brtarget, GPR64Opnd>; class BEQZC64_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21, GPR64Opnd>; class BNEZC64_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21, GPR64Opnd>; class JIALC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16, GPR64Opnd, II_JIALC> { bit isCall = 1; list Defs = [RA]; } class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd, II_JIC> { bit isBarrier = 1; bit isTerminator = 1; list Defs = [AT]; } class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>; class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>; class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> { bit isBranch = 1; bit isIndirectBranch = 1; bit hasDelaySlot = 1; bit isTerminator=1; bit isBarrier=1; bit isCTI = 1; InstrItinClass Itinerary = II_JR_HB; } class CRC32D_DESC : CRC_DESC_BASE<"crc32d", GPR32Opnd, II_CRC32D>; class CRC32CD_DESC : CRC_DESC_BASE<"crc32cd", GPR32Opnd, II_CRC32CD>; //===----------------------------------------------------------------------===// // // Instruction Definitions // //===----------------------------------------------------------------------===// let AdditionalPredicates = [NotInMicroMips] in { let DecoderMethod = "DecodeDAHIDATI" in { def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6; def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6; } def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6; def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6; def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6; def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6; def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6; def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6; def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6; def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6; def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6; def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6; def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6; def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6; def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6; def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6; def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS64R6; } def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6; def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6; let DecoderNamespace = "Mips32r6_64r6_GP64" in { def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64; def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64; def JR_HB64_R6 : JR_HB_R6_ENC, JR_HB64_R6_DESC, ISA_MIPS32R6; } let AdditionalPredicates = [NotInMicroMips], DecoderNamespace = "Mips32r6_64r6_PTR64" in { def LL64_R6 : LL_R6_ENC, LL64_R6_DESC, PTR_64, ISA_MIPS64R6; def SC64_R6 : SC_R6_ENC, SC64_R6_DESC, PTR_64, ISA_MIPS64R6; } let DecoderNamespace = "Mips32r6_64r6_GP64" in { // Jump and Branch Instructions def JIALC64 : JIALC_ENC, JIALC64_DESC, ISA_MIPS64R6, GPR_64; def JIC64 : JIC_ENC, JIC64_DESC, ISA_MIPS64R6, GPR_64; def BEQC64 : BEQC_ENC, BEQC64_DESC, ISA_MIPS64R6, GPR_64; def BEQZC64 : BEQZC_ENC, BEQZC64_DESC, ISA_MIPS64R6, GPR_64; def BGEC64 : BGEC_ENC, BGEC64_DESC, ISA_MIPS64R6, GPR_64; def BGEUC64 : BGEUC_ENC, BGEUC64_DESC, ISA_MIPS64R6, GPR_64; def BGTZC64 : BGTZC_ENC, BGTZC64_DESC, ISA_MIPS64R6, GPR_64; def BLEZC64 : BLEZC_ENC, BLEZC64_DESC, ISA_MIPS64R6, GPR_64; def BLTC64 : BLTC_ENC, BLTC64_DESC, ISA_MIPS64R6, GPR_64; def BLTUC64 : BLTUC_ENC, BLTUC64_DESC, ISA_MIPS64R6, GPR_64; def BNEC64 : BNEC_ENC, BNEC64_DESC, ISA_MIPS64R6, GPR_64; def BNEZC64 : BNEZC_ENC, BNEZC64_DESC, ISA_MIPS64R6, GPR_64; } let DecoderNamespace = "Mips32r6_64r6_BranchZero" in { def BLTZC64 : BLTZC_ENC, BLTZC64_DESC, ISA_MIPS64R6, GPR_64; def BGEZC64 : BGEZC_ENC, BGEZC64_DESC, ISA_MIPS64R6, GPR_64; } let AdditionalPredicates = [NotInMicroMips] in { def CRC32D : R6MMR6Rel, CRC32D_ENC, CRC32D_DESC, ISA_MIPS64R6, ASE_CRC; def CRC32CD : R6MMR6Rel, CRC32CD_ENC, CRC32CD_DESC, ISA_MIPS64R6, ASE_CRC; } //===----------------------------------------------------------------------===// // // Instruction Aliases // //===----------------------------------------------------------------------===// def : MipsInstAlias<"jr $rs", (JALR64 ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS64R6; def : MipsInstAlias<"jrc $rs", (JIC64 GPR64Opnd:$rs, 0), 1>, ISA_MIPS64R6; def : MipsInstAlias<"jalrc $rs", (JIALC64 GPR64Opnd:$rs, 0), 1>, ISA_MIPS64R6; //===----------------------------------------------------------------------===// // // Patterns and Pseudo Instructions // //===----------------------------------------------------------------------===// // i64 selects def : MipsPat<(select i64:$cond, i64:$t, i64:$f), (OR64 (SELNEZ64 i64:$t, i64:$cond), (SELEQZ64 i64:$f, i64:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, i64:$f), (OR64 (SELEQZ64 i64:$t, i64:$cond), (SELNEZ64 i64:$f, i64:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, i64:$f), (OR64 (SELNEZ64 i64:$t, i64:$cond), (SELEQZ64 i64:$f, i64:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f), (OR64 (SELEQZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), (SELNEZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f), (OR64 (SELNEZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), (SELEQZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, ISA_MIPS64R6; def : MipsPat< (select (i32 (setgt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f), (OR64 (SELEQZ64 i64:$t, (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)), sub_32)), (SELNEZ64 i64:$f, (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)), sub_32)))>, ISA_MIPS64R6; def : MipsPat< (select (i32 (setugt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f), (OR64 (SELEQZ64 i64:$t, (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)), sub_32)), (SELNEZ64 i64:$f, (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)), sub_32)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, immz), (SELNEZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, immz), (SELEQZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i64:$cond, immz)), immz, i64:$f), (SELEQZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i64:$cond, immz)), immz, i64:$f), (SELNEZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6; // i64 selects from an i32 comparison // One complicating factor here is that bits 32-63 of an i32 are undefined. // FIXME: Ideally, setcc would always produce an i64 on MIPS64 targets. // This would allow us to remove the sign-extensions here. def : MipsPat<(select i32:$cond, i64:$t, i64:$f), (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)), (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, i64:$f), (OR64 (SELEQZ64 i64:$t, (SLL64_32 i32:$cond)), (SELNEZ64 i64:$f, (SLL64_32 i32:$cond)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, i64:$f), (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)), (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i64:$t, i64:$f), (OR64 (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond, immZExt16:$imm))), (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond, immZExt16:$imm))))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i64:$t, i64:$f), (OR64 (SELNEZ64 i64:$t, (SLL64_32 (XORi i32:$cond, immZExt16:$imm))), (SELEQZ64 i64:$f, (SLL64_32 (XORi i32:$cond, immZExt16:$imm))))>, ISA_MIPS64R6; def : MipsPat<(select i32:$cond, i64:$t, immz), (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, immz), (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, immz), (SELEQZ64 i64:$t, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; def : MipsPat<(select i32:$cond, immz, i64:$f), (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f), (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f), (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; // Patterns used for matching away redundant sign extensions. // MIPS32 arithmetic instructions sign extend their result implicitly. +def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_R6 GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; def : MipsPat<(i64 (sext (i32 (sdiv GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (DIV GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; def : MipsPat<(i64 (sext (i32 (udiv GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (DIVU GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; def : MipsPat<(i64 (sext (i32 (srem GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOD GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; def : MipsPat<(i64 (sext (i32 (urem GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MODU GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; // Pseudo instructions let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, NoIndirectJumpGuards] in { def TAILCALL64R6REG : TailCallRegR6, ISA_MIPS64R6; def PseudoIndirectBranch64R6 : PseudoIndirectBranchBaseR6, ISA_MIPS64R6; } let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, UseIndirectJumpsHazard] in { def TAILCALLHB64R6REG : TailCallReg, ISA_MIPS64R6; def PseudoIndrectHazardBranch64R6 : PseudoIndirectBranchBase, ISA_MIPS64R6; } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MipsFastISel.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MipsFastISel.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MipsFastISel.cpp (revision 341365) @@ -1,2123 +1,2128 @@ //===- MipsFastISel.cpp - Mips FastISel implementation --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the MIPS-specific support for the FastISel class. /// Some of the target-specific code is generated by tablegen in the file /// MipsGenFastISel.inc, which is #included here. /// //===----------------------------------------------------------------------===// #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" #include "MipsISelLowering.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #define DEBUG_TYPE "mips-fastisel" using namespace llvm; namespace { class MipsFastISel final : public FastISel { // All possible address modes. class Address { public: using BaseKind = enum { RegBase, FrameIndexBase }; private: BaseKind Kind = RegBase; union { unsigned Reg; int FI; } Base; int64_t Offset = 0; const GlobalValue *GV = nullptr; public: // Innocuous defaults for our address. Address() { Base.Reg = 0; } void setKind(BaseKind K) { Kind = K; } BaseKind getKind() const { return Kind; } bool isRegBase() const { return Kind == RegBase; } bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { assert(isRegBase() && "Invalid base register access!"); Base.Reg = Reg; } unsigned getReg() const { assert(isRegBase() && "Invalid base register access!"); return Base.Reg; } void setFI(unsigned FI) { assert(isFIBase() && "Invalid base frame index access!"); Base.FI = FI; } unsigned getFI() const { assert(isFIBase() && "Invalid base frame index access!"); return Base.FI; } void setOffset(int64_t Offset_) { Offset = Offset_; } int64_t getOffset() const { return Offset; } void setGlobalValue(const GlobalValue *G) { GV = G; } const GlobalValue *getGlobalValue() { return GV; } }; /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can /// make the right decision when generating code for different targets. const TargetMachine &TM; const MipsSubtarget *Subtarget; const TargetInstrInfo &TII; const TargetLowering &TLI; MipsFunctionInfo *MFI; // Convenience variables to avoid some queries. LLVMContext *Context; bool fastLowerArguments() override; bool fastLowerCall(CallLoweringInfo &CLI) override; bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle // floating point but not reject doing fast-isel in other // situations private: // Selection routines. bool selectLogicalOp(const Instruction *I); bool selectLoad(const Instruction *I); bool selectStore(const Instruction *I); bool selectBranch(const Instruction *I); bool selectSelect(const Instruction *I); bool selectCmp(const Instruction *I); bool selectFPExt(const Instruction *I); bool selectFPTrunc(const Instruction *I); bool selectFPToInt(const Instruction *I, bool IsSigned); bool selectRet(const Instruction *I); bool selectTrunc(const Instruction *I); bool selectIntExt(const Instruction *I); bool selectShift(const Instruction *I); bool selectDivRem(const Instruction *I, unsigned ISDOpcode); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); bool isTypeSupported(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool computeAddress(const Value *Obj, Address &Addr); bool computeCallAddress(const Value *V, Address &Addr); void simplifyAddress(Address &Addr); // Emit helper routines. bool emitCmp(unsigned DestReg, const CmpInst *CI); bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0); bool emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO = nullptr); bool emitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); bool emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt); bool emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); bool emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); bool emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); bool emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned); unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, const Value *RHS); unsigned materializeFP(const ConstantFP *CFP, MVT VT); unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned materializeExternalCallSym(MCSymbol *Syn); MachineInstrBuilder emitInst(unsigned Opc) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); } MachineInstrBuilder emitInst(unsigned Opc, unsigned DstReg) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DstReg); } MachineInstrBuilder emitInstStore(unsigned Opc, unsigned SrcReg, unsigned MemReg, int64_t MemOffset) { return emitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset); } MachineInstrBuilder emitInstLoad(unsigned Opc, unsigned DstReg, unsigned MemReg, int64_t MemOffset) { return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset); } unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); // for some reason, this default is not generated by tablegen // so we explicitly generate it here. unsigned fastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t imm1, uint64_t imm2, unsigned Op3, bool Op3IsKill) { return 0; } // Call handling routines. private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &ArgVTs, unsigned &NumBytes); bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); const MipsABIInfo &getABI() const { return static_cast(TM).getABI(); } public: // Backend specific FastISel code. explicit MipsFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()), Subtarget(&funcInfo.MF->getSubtarget()), TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) { MFI = funcInfo.MF->getInfo(); Context = &funcInfo.Fn->getContext(); UnsupportedFPMode = Subtarget->isFP64bit() || Subtarget->useSoftFloat(); } unsigned fastMaterializeAlloca(const AllocaInst *AI) override; unsigned fastMaterializeConstant(const Constant *C) override; bool fastSelectInstruction(const Instruction *I) override; #include "MipsGenFastISel.inc" }; } // end anonymous namespace static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { llvm_unreachable("should not be called"); } static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { llvm_unreachable("should not be called"); } #include "MipsGenCallingConv.inc" CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const { return CC_MipsO32; } unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, const Value *RHS) { // Canonicalize immediates to the RHS first. if (isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); unsigned Opc; switch (ISDOpc) { case ISD::AND: Opc = Mips::AND; break; case ISD::OR: Opc = Mips::OR; break; case ISD::XOR: Opc = Mips::XOR; break; default: llvm_unreachable("unexpected opcode"); } unsigned LHSReg = getRegForValue(LHS); if (!LHSReg) return 0; unsigned RHSReg; if (const auto *C = dyn_cast(RHS)) RHSReg = materializeInt(C, MVT::i32); else RHSReg = getRegForValue(RHS); if (!RHSReg) return 0; unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!ResultReg) return 0; emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg); return ResultReg; } unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) { assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 && "Alloca should always return a pointer."); DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LEA_ADDiu), ResultReg) .addFrameIndex(SI->second) .addImm(0); return ResultReg; } return 0; } unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; const TargetRegisterClass *RC = &Mips::GPR32RegClass; const ConstantInt *CI = cast(C); return materialize32BitInt(CI->getZExtValue(), RC); } unsigned MipsFastISel::materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC) { unsigned ResultReg = createResultReg(RC); if (isInt<16>(Imm)) { unsigned Opc = Mips::ADDiu; emitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm); return ResultReg; } else if (isUInt<16>(Imm)) { emitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm); return ResultReg; } unsigned Lo = Imm & 0xFFFF; unsigned Hi = (Imm >> 16) & 0xFFFF; if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); emitInst(Mips::LUi, TmpReg).addImm(Hi); emitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo); } else { emitInst(Mips::LUi, ResultReg).addImm(Hi); } return ResultReg; } unsigned MipsFastISel::materializeFP(const ConstantFP *CFP, MVT VT) { if (UnsupportedFPMode) return 0; int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); if (VT == MVT::f32) { const TargetRegisterClass *RC = &Mips::FGR32RegClass; unsigned DestReg = createResultReg(RC); unsigned TempReg = materialize32BitInt(Imm, &Mips::GPR32RegClass); emitInst(Mips::MTC1, DestReg).addReg(TempReg); return DestReg; } else if (VT == MVT::f64) { const TargetRegisterClass *RC = &Mips::AFGR64RegClass; unsigned DestReg = createResultReg(RC); unsigned TempReg1 = materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass); unsigned TempReg2 = materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass); emitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1); return DestReg; } return 0; } unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32) return 0; const TargetRegisterClass *RC = &Mips::GPR32RegClass; unsigned DestReg = createResultReg(RC); const GlobalVariable *GVar = dyn_cast(GV); bool IsThreadLocal = GVar && GVar->isThreadLocal(); // TLS not supported at this time. if (IsThreadLocal) return 0; emitInst(Mips::LW, DestReg) .addReg(MFI->getGlobalBaseReg()) .addGlobalAddress(GV, 0, MipsII::MO_GOT); if ((GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa(GV)))) { unsigned TempReg = createResultReg(RC); emitInst(Mips::ADDiu, TempReg) .addReg(DestReg) .addGlobalAddress(GV, 0, MipsII::MO_ABS_LO); DestReg = TempReg; } return DestReg; } unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) { const TargetRegisterClass *RC = &Mips::GPR32RegClass; unsigned DestReg = createResultReg(RC); emitInst(Mips::LW, DestReg) .addReg(MFI->getGlobalBaseReg()) .addSym(Sym, MipsII::MO_GOT); return DestReg; } // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast(C)) return (UnsupportedFPMode) ? 0 : materializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) return materializeGV(GV, VT); else if (isa(C)) return materializeInt(C, VT); return 0; } bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from // another block, otherwise it may not have a virtual register assigned. if (FuncInfo.StaticAllocaMap.count(static_cast(Obj)) || FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { Opcode = I->getOpcode(); U = I; } } else if (const ConstantExpr *C = dyn_cast(Obj)) { Opcode = C->getOpcode(); U = C; } switch (Opcode) { default: break; case Instruction::BitCast: // Look through bitcasts. return computeAddress(U->getOperand(0), Addr); case Instruction::GetElementPtr: { Address SavedAddr = Addr; int64_t TmpOffset = Addr.getOffset(); // Iterate through the GEP folding the constants into offsets where // we can. gep_type_iterator GTI = gep_type_begin(U); for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); while (true) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; break; } if (canFoldAddIntoGEP(U, Op)) { // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast(cast(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; // Iterate on the other operand. Op = cast(Op)->getOperand(0); continue; } // Unsupported goto unsupported_gep; } } } // Try to grab the base operand now. Addr.setOffset(TmpOffset); if (computeAddress(U->getOperand(0), Addr)) return true; // We failed, restore everything and try the other options. Addr = SavedAddr; unsupported_gep: break; } case Instruction::Alloca: { const AllocaInst *AI = cast(Obj); DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { Addr.setKind(Address::FrameIndexBase); Addr.setFI(SI->second); return true; } break; } } Addr.setReg(getRegForValue(Obj)); return Addr.getReg() != 0; } bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const auto *I = dyn_cast(V)) { // Check if the value is defined in the same basic block. This information // is crucial to know whether or not folding an operand is valid. if (I->getParent() == FuncInfo.MBB->getBasicBlock()) { Opcode = I->getOpcode(); U = I; } } else if (const auto *C = dyn_cast(V)) { Opcode = C->getOpcode(); U = C; } switch (Opcode) { default: break; case Instruction::BitCast: // Look past bitcasts if its operand is in the same BB. return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (TLI.getValueType(DL, U->getOperand(0)->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; } if (const GlobalValue *GV = dyn_cast(V)) { Addr.setGlobalValue(GV); return true; } // If all else fails, try to materialize the value in a register. if (!Addr.getGlobalValue()) { Addr.setReg(getRegForValue(V)); return Addr.getReg() != 0; } return false; } bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; VT = evt.getSimpleVT(); // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); } bool MipsFastISel::isTypeSupported(Type *Ty, MVT &VT) { if (Ty->isVectorTy()) return false; if (isTypeLegal(Ty, VT)) return true; // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) return true; return false; } bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; // We will extend this in a later patch: // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. if (VT == MVT::i8 || VT == MVT::i16) return true; return false; } // Because of how EmitCmp is called with fast-isel, you can // end up with redundant "andi" instructions after the sequences emitted below. // We should try and solve this issue in the future. // bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) { const Value *Left = CI->getOperand(0), *Right = CI->getOperand(1); bool IsUnsigned = CI->isUnsigned(); unsigned LeftReg = getRegEnsuringSimpleIntegerWidening(Left, IsUnsigned); if (LeftReg == 0) return false; unsigned RightReg = getRegEnsuringSimpleIntegerWidening(Right, IsUnsigned); if (RightReg == 0) return false; CmpInst::Predicate P = CI->getPredicate(); switch (P) { default: return false; case CmpInst::ICMP_EQ: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::SLTiu, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_NE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg); break; } case CmpInst::ICMP_UGT: emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg); break; case CmpInst::ICMP_ULT: emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg); break; case CmpInst::ICMP_UGE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_ULE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLTu, TempReg).addReg(RightReg).addReg(LeftReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_SGT: emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg); break; case CmpInst::ICMP_SLT: emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg); break; case CmpInst::ICMP_SGE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_SLE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLT, TempReg).addReg(RightReg).addReg(LeftReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UNE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE: case CmpInst::FCMP_OGT: case CmpInst::FCMP_OGE: { if (UnsupportedFPMode) return false; bool IsFloat = Left->getType()->isFloatTy(); bool IsDouble = Left->getType()->isDoubleTy(); if (!IsFloat && !IsDouble) return false; unsigned Opc, CondMovOpc; switch (P) { case CmpInst::FCMP_OEQ: Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_UNE: Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32; CondMovOpc = Mips::MOVF_I; break; case CmpInst::FCMP_OLT: Opc = IsFloat ? Mips::C_OLT_S : Mips::C_OLT_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_OLE: Opc = IsFloat ? Mips::C_OLE_S : Mips::C_OLE_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_OGT: Opc = IsFloat ? Mips::C_ULE_S : Mips::C_ULE_D32; CondMovOpc = Mips::MOVF_I; break; case CmpInst::FCMP_OGE: Opc = IsFloat ? Mips::C_ULT_S : Mips::C_ULT_D32; CondMovOpc = Mips::MOVF_I; break; default: llvm_unreachable("Only switching of a subset of CCs."); } unsigned RegWithZero = createResultReg(&Mips::GPR32RegClass); unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0); emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1); emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg) .addReg(RightReg); emitInst(CondMovOpc, ResultReg) .addReg(RegWithOne) .addReg(Mips::FCC0) .addReg(RegWithZero); break; } } return true; } bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment) { // // more cases will be handled here in following patches. // unsigned Opc; switch (VT.SimpleTy) { case MVT::i32: ResultReg = createResultReg(&Mips::GPR32RegClass); Opc = Mips::LW; break; case MVT::i16: ResultReg = createResultReg(&Mips::GPR32RegClass); Opc = Mips::LHu; break; case MVT::i8: ResultReg = createResultReg(&Mips::GPR32RegClass); Opc = Mips::LBu; break; case MVT::f32: if (UnsupportedFPMode) return false; ResultReg = createResultReg(&Mips::FGR32RegClass); Opc = Mips::LWC1; break; case MVT::f64: if (UnsupportedFPMode) return false; ResultReg = createResultReg(&Mips::AFGR64RegClass); Opc = Mips::LDC1; break; default: return false; } if (Addr.isRegBase()) { simplifyAddress(Addr); emitInstLoad(Opc, ResultReg, Addr.getReg(), Addr.getOffset()); return true; } if (Addr.isFIBase()) { unsigned FI = Addr.getFI(); unsigned Align = 4; int64_t Offset = Addr.getOffset(); MachineFrameInfo &MFI = MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); return true; } return false; } bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment) { // // more cases will be handled here in following patches. // unsigned Opc; switch (VT.SimpleTy) { case MVT::i8: Opc = Mips::SB; break; case MVT::i16: Opc = Mips::SH; break; case MVT::i32: Opc = Mips::SW; break; case MVT::f32: if (UnsupportedFPMode) return false; Opc = Mips::SWC1; break; case MVT::f64: if (UnsupportedFPMode) return false; Opc = Mips::SDC1; break; default: return false; } if (Addr.isRegBase()) { simplifyAddress(Addr); emitInstStore(Opc, SrcReg, Addr.getReg(), Addr.getOffset()); return true; } if (Addr.isFIBase()) { unsigned FI = Addr.getFI(); unsigned Align = 4; int64_t Offset = Addr.getOffset(); MachineFrameInfo &MFI = MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); return true; } return false; } bool MipsFastISel::selectLogicalOp(const Instruction *I) { MVT VT; if (!isTypeSupported(I->getType(), VT)) return false; unsigned ResultReg; switch (I->getOpcode()) { default: llvm_unreachable("Unexpected instruction."); case Instruction::And: ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); break; case Instruction::Or: ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); break; case Instruction::Xor: ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); break; } if (!ResultReg) return false; updateValueMap(I, ResultReg); return true; } bool MipsFastISel::selectLoad(const Instruction *I) { // Atomic loads need special handling. if (cast(I)->isAtomic()) return false; // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getType(), VT)) return false; // See if we can handle this address. Address Addr; if (!computeAddress(I->getOperand(0), Addr)) return false; unsigned ResultReg; if (!emitLoad(VT, ResultReg, Addr, cast(I)->getAlignment())) return false; updateValueMap(I, ResultReg); return true; } bool MipsFastISel::selectStore(const Instruction *I) { Value *Op0 = I->getOperand(0); unsigned SrcReg = 0; // Atomic stores need special handling. if (cast(I)->isAtomic()) return false; // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) return false; // Get the value to be stored into a register. SrcReg = getRegForValue(Op0); if (SrcReg == 0) return false; // See if we can handle this address. Address Addr; if (!computeAddress(I->getOperand(1), Addr)) return false; if (!emitStore(VT, SrcReg, Addr, cast(I)->getAlignment())) return false; return true; } // This can cause a redundant sltiu to be generated. // FIXME: try and eliminate this in a future patch. bool MipsFastISel::selectBranch(const Instruction *I) { const BranchInst *BI = cast(I); MachineBasicBlock *BrBB = FuncInfo.MBB; // // TBB is the basic block for the case where the comparison is true. // FBB is the basic block for the case where the comparison is false. // if (cond) goto TBB // goto FBB // TBB: // MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // For now, just try the simplest case where it's fed by a compare. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { + MVT CIMVT = + TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT(); + if (CIMVT == MVT::i1) + return false; + unsigned CondReg = getRegForValue(CI); BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) .addReg(CondReg) .addMBB(TBB); finishCondBranch(BI->getParent(), TBB, FBB); return true; } return false; } bool MipsFastISel::selectCmp(const Instruction *I) { const CmpInst *CI = cast(I); unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!emitCmp(ResultReg, CI)) return false; updateValueMap(I, ResultReg); return true; } // Attempt to fast-select a floating-point extend instruction. bool MipsFastISel::selectFPExt(const Instruction *I) { if (UnsupportedFPMode) return false; Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; unsigned SrcReg = getRegForValue(Src); // this must be a 32bit floating point register class // maybe we should handle this differently if (!SrcReg) return false; unsigned DestReg = createResultReg(&Mips::AFGR64RegClass); emitInst(Mips::CVT_D32_S, DestReg).addReg(SrcReg); updateValueMap(I, DestReg); return true; } bool MipsFastISel::selectSelect(const Instruction *I) { assert(isa(I) && "Expected a select instruction."); LLVM_DEBUG(dbgs() << "selectSelect\n"); MVT VT; if (!isTypeSupported(I->getType(), VT) || UnsupportedFPMode) { LLVM_DEBUG( dbgs() << ".. .. gave up (!isTypeSupported || UnsupportedFPMode)\n"); return false; } unsigned CondMovOpc; const TargetRegisterClass *RC; if (VT.isInteger() && !VT.isVector() && VT.getSizeInBits() <= 32) { CondMovOpc = Mips::MOVN_I_I; RC = &Mips::GPR32RegClass; } else if (VT == MVT::f32) { CondMovOpc = Mips::MOVN_I_S; RC = &Mips::FGR32RegClass; } else if (VT == MVT::f64) { CondMovOpc = Mips::MOVN_I_D32; RC = &Mips::AFGR64RegClass; } else return false; const SelectInst *SI = cast(I); const Value *Cond = SI->getCondition(); unsigned Src1Reg = getRegForValue(SI->getTrueValue()); unsigned Src2Reg = getRegForValue(SI->getFalseValue()); unsigned CondReg = getRegForValue(Cond); if (!Src1Reg || !Src2Reg || !CondReg) return false; unsigned ZExtCondReg = createResultReg(&Mips::GPR32RegClass); if (!ZExtCondReg) return false; if (!emitIntExt(MVT::i1, CondReg, MVT::i32, ZExtCondReg, true)) return false; unsigned ResultReg = createResultReg(RC); unsigned TempReg = createResultReg(RC); if (!ResultReg || !TempReg) return false; emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg); emitInst(CondMovOpc, ResultReg) .addReg(Src1Reg).addReg(ZExtCondReg).addReg(TempReg); updateValueMap(I, ResultReg); return true; } // Attempt to fast-select a floating-point truncate instruction. bool MipsFastISel::selectFPTrunc(const Instruction *I) { if (UnsupportedFPMode) return false; Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; unsigned DestReg = createResultReg(&Mips::FGR32RegClass); if (!DestReg) return false; emitInst(Mips::CVT_S_D32, DestReg).addReg(SrcReg); updateValueMap(I, DestReg); return true; } // Attempt to fast-select a floating-point-to-integer conversion. bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) { if (UnsupportedFPMode) return false; MVT DstVT, SrcVT; if (!IsSigned) return false; // We don't handle this case yet. There is no native // instruction for this but it can be synthesized. Type *DstTy = I->getType(); if (!isTypeLegal(DstTy, DstVT)) return false; if (DstVT != MVT::i32) return false; Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) return false; if (SrcVT != MVT::f32 && SrcVT != MVT::f64) return false; unsigned SrcReg = getRegForValue(Src); if (SrcReg == 0) return false; // Determine the opcode for the conversion, which takes place // entirely within FPRs. unsigned DestReg = createResultReg(&Mips::GPR32RegClass); unsigned TempReg = createResultReg(&Mips::FGR32RegClass); unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32; // Generate the convert. emitInst(Opc, TempReg).addReg(SrcReg); emitInst(Mips::MFC1, DestReg).addReg(TempReg); updateValueMap(I, DestReg); return true; } bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &OutVTs, unsigned &NumBytes) { CallingConv::ID CC = CLI.CallConv; SmallVector ArgLocs; CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); // This is the minimum argument area used for A0-A3. if (NumBytes < 16) NumBytes = 16; emitInst(Mips::ADJCALLSTACKDOWN).addImm(16).addImm(0); // Process the args. MVT firstMVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; if (i == 0) { firstMVT = ArgVT; if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F12); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D6); } } else if (i == 1) { if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) { if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F14); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D7); } } } if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32) || (ArgVT == MVT::i16) || (ArgVT == MVT::i8)) && VA.isMemLoc()) { switch (VA.getLocMemOffset()) { case 0: VA.convertToReg(Mips::A0); break; case 4: VA.convertToReg(Mips::A1); break; case 8: VA.convertToReg(Mips::A2); break; case 12: VA.convertToReg(Mips::A3); break; default: break; } } unsigned ArgReg = getRegForValue(ArgVal); if (!ArgReg) return false; // Handle arg promotion: SExt, ZExt, AExt. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::AExt: case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); if (!ArgReg) return false; break; } case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); if (!ArgReg) return false; break; } default: llvm_unreachable("Unknown arg promotion!"); } // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); CLI.OutRegs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { llvm_unreachable("Mips does not use custom args."); return false; } else { // // FIXME: This path will currently return false. It was copied // from the AArch64 port and should be essentially fine for Mips too. // The work to finish up this path will be done in a follow-on patch. // assert(VA.isMemLoc() && "Assuming store on stack."); // Don't emit stores for undef values. if (isa(ArgVal)) continue; // Need to store on the stack. // FIXME: This alignment is incorrect but this path is disabled // for now (will return false). We need to determine the right alignment // based on the normal alignment for the underlying machine type. // unsigned ArgSize = alignTo(ArgVT.getSizeInBits(), 4); unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittle()) BEAlign = 8 - ArgSize; Address Addr; Addr.setKind(Address::RegBase); Addr.setReg(Mips::SP); Addr.setOffset(VA.getLocMemOffset() + BEAlign); unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); (void)(MMO); // if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; // can't store on the stack yet. } } return true; } bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes) { CallingConv::ID CC = CLI.CallConv; emitInst(Mips::ADJCALLSTACKUP).addImm(16).addImm(0); if (RetVT != MVT::isVoid) { SmallVector RVLocs; MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy, CLI.Symbol ? CLI.Symbol->getName().data() : nullptr); // Only handle a single return value. if (RVLocs.size() != 1) return false; // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); // Special handling for extended integers. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) CopyVT = MVT::i32; unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); if (!ResultReg) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(RVLocs[0].getLocReg()); CLI.InRegs.push_back(RVLocs[0].getLocReg()); CLI.ResultReg = ResultReg; CLI.NumResultRegs = 1; } return true; } bool MipsFastISel::fastLowerArguments() { LLVM_DEBUG(dbgs() << "fastLowerArguments\n"); if (!FuncInfo.CanLowerReturn) { LLVM_DEBUG(dbgs() << ".. gave up (!CanLowerReturn)\n"); return false; } const Function *F = FuncInfo.Fn; if (F->isVarArg()) { LLVM_DEBUG(dbgs() << ".. gave up (varargs)\n"); return false; } CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::C) { LLVM_DEBUG(dbgs() << ".. gave up (calling convention is not C)\n"); return false; } std::array GPR32ArgRegs = {{Mips::A0, Mips::A1, Mips::A2, Mips::A3}}; std::array FGR32ArgRegs = {{Mips::F12, Mips::F14}}; std::array AFGR64ArgRegs = {{Mips::D6, Mips::D7}}; auto NextGPR32 = GPR32ArgRegs.begin(); auto NextFGR32 = FGR32ArgRegs.begin(); auto NextAFGR64 = AFGR64ArgRegs.begin(); struct AllocatedReg { const TargetRegisterClass *RC; unsigned Reg; AllocatedReg(const TargetRegisterClass *RC, unsigned Reg) : RC(RC), Reg(Reg) {} }; // Only handle simple cases. i.e. All arguments are directly mapped to // registers of the appropriate type. SmallVector Allocation; for (const auto &FormalArg : F->args()) { if (FormalArg.hasAttribute(Attribute::InReg) || FormalArg.hasAttribute(Attribute::StructRet) || FormalArg.hasAttribute(Attribute::ByVal)) { LLVM_DEBUG(dbgs() << ".. gave up (inreg, structret, byval)\n"); return false; } Type *ArgTy = FormalArg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) { LLVM_DEBUG(dbgs() << ".. gave up (struct, array, or vector)\n"); return false; } EVT ArgVT = TLI.getValueType(DL, ArgTy); LLVM_DEBUG(dbgs() << ".. " << FormalArg.getArgNo() << ": " << ArgVT.getEVTString() << "\n"); if (!ArgVT.isSimple()) { LLVM_DEBUG(dbgs() << ".. .. gave up (not a simple type)\n"); return false; } switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i1: case MVT::i8: case MVT::i16: if (!FormalArg.hasAttribute(Attribute::SExt) && !FormalArg.hasAttribute(Attribute::ZExt)) { // It must be any extend, this shouldn't happen for clang-generated IR // so just fall back on SelectionDAG. LLVM_DEBUG(dbgs() << ".. .. gave up (i8/i16 arg is not extended)\n"); return false; } if (NextGPR32 == GPR32ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of GPR32 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. GPR32(" << *NextGPR32 << ")\n"); Allocation.emplace_back(&Mips::GPR32RegClass, *NextGPR32++); // Allocating any GPR32 prohibits further use of floating point arguments. NextFGR32 = FGR32ArgRegs.end(); NextAFGR64 = AFGR64ArgRegs.end(); break; case MVT::i32: if (FormalArg.hasAttribute(Attribute::ZExt)) { // The O32 ABI does not permit a zero-extended i32. LLVM_DEBUG(dbgs() << ".. .. gave up (i32 arg is zero extended)\n"); return false; } if (NextGPR32 == GPR32ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of GPR32 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. GPR32(" << *NextGPR32 << ")\n"); Allocation.emplace_back(&Mips::GPR32RegClass, *NextGPR32++); // Allocating any GPR32 prohibits further use of floating point arguments. NextFGR32 = FGR32ArgRegs.end(); NextAFGR64 = AFGR64ArgRegs.end(); break; case MVT::f32: if (UnsupportedFPMode) { LLVM_DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode)\n"); return false; } if (NextFGR32 == FGR32ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of FGR32 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. FGR32(" << *NextFGR32 << ")\n"); Allocation.emplace_back(&Mips::FGR32RegClass, *NextFGR32++); // Allocating an FGR32 also allocates the super-register AFGR64, and // ABI rules require us to skip the corresponding GPR32. if (NextGPR32 != GPR32ArgRegs.end()) NextGPR32++; if (NextAFGR64 != AFGR64ArgRegs.end()) NextAFGR64++; break; case MVT::f64: if (UnsupportedFPMode) { LLVM_DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode)\n"); return false; } if (NextAFGR64 == AFGR64ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of AFGR64 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. AFGR64(" << *NextAFGR64 << ")\n"); Allocation.emplace_back(&Mips::AFGR64RegClass, *NextAFGR64++); // Allocating an FGR32 also allocates the super-register AFGR64, and // ABI rules require us to skip the corresponding GPR32 pair. if (NextGPR32 != GPR32ArgRegs.end()) NextGPR32++; if (NextGPR32 != GPR32ArgRegs.end()) NextGPR32++; if (NextFGR32 != FGR32ArgRegs.end()) NextFGR32++; break; default: LLVM_DEBUG(dbgs() << ".. .. gave up (unknown type)\n"); return false; } } for (const auto &FormalArg : F->args()) { unsigned ArgNo = FormalArg.getArgNo(); unsigned SrcReg = Allocation[ArgNo].Reg; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). unsigned ResultReg = createResultReg(Allocation[ArgNo].RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(DstReg, getKillRegState(true)); updateValueMap(&FormalArg, ResultReg); } // Calculate the size of the incoming arguments area. // We currently reject all the cases where this would be non-zero. unsigned IncomingArgSizeInBytes = 0; // Account for the reserved argument area on ABI's that have one (O32). // It seems strange to do this on the caller side but it's necessary in // SelectionDAG's implementation. IncomingArgSizeInBytes = std::min(getABI().GetCalleeAllocdArgSizeInBytes(CC), IncomingArgSizeInBytes); MF->getInfo()->setFormalArgInfo(IncomingArgSizeInBytes, false); return true; } bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { CallingConv::ID CC = CLI.CallConv; bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; MCSymbol *Symbol = CLI.Symbol; // Do not handle FastCC. if (CC == CallingConv::Fast) return false; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // Let SDISel handle vararg functions. if (IsVarArg) return false; // FIXME: Only handle *simple* calls for now. MVT RetVT; if (CLI.RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeSupported(CLI.RetTy, RetVT)) return false; for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) return false; // Set up the argument vectors. SmallVector OutVTs; OutVTs.reserve(CLI.OutVals.size()); for (auto *Val : CLI.OutVals) { MVT VT; if (!isTypeLegal(Val->getType(), VT) && !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) return false; // We don't handle vector parameters yet. if (VT.isVector() || VT.getSizeInBits() > 64) return false; OutVTs.push_back(VT); } Address Addr; if (!computeCallAddress(Callee, Addr)) return false; // Handle the arguments now that we've gotten them. unsigned NumBytes; if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; if (!Addr.getGlobalValue()) return false; // Issue the call. unsigned DestAddress; if (Symbol) DestAddress = materializeExternalCallSym(Symbol); else DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), Mips::RA).addReg(Mips::T9); // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; // Finish off the call including any return values. return finishCall(CLI, RetVT, NumBytes); } bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: return false; case Intrinsic::bswap: { Type *RetTy = II->getCalledFunction()->getReturnType(); MVT VT; if (!isTypeSupported(RetTy, VT)) return false; unsigned SrcReg = getRegForValue(II->getOperand(0)); if (SrcReg == 0) return false; unsigned DestReg = createResultReg(&Mips::GPR32RegClass); if (DestReg == 0) return false; if (VT == MVT::i16) { if (Subtarget->hasMips32r2()) { emitInst(Mips::WSBH, DestReg).addReg(SrcReg); updateValueMap(II, DestReg); return true; } else { unsigned TempReg[3]; for (int i = 0; i < 3; i++) { TempReg[i] = createResultReg(&Mips::GPR32RegClass); if (TempReg[i] == 0) return false; } emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); updateValueMap(II, DestReg); return true; } } else if (VT == MVT::i32) { if (Subtarget->hasMips32r2()) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::WSBH, TempReg).addReg(SrcReg); emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16); updateValueMap(II, DestReg); return true; } else { unsigned TempReg[8]; for (int i = 0; i < 8; i++) { TempReg[i] = createResultReg(&Mips::GPR32RegClass); if (TempReg[i] == 0) return false; } emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24); emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00); emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]); emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00); emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8); emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24); emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]); emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]); updateValueMap(II, DestReg); return true; } } return false; } case Intrinsic::memcpy: case Intrinsic::memmove: { const auto *MTI = cast(II); // Don't handle volatile. if (MTI->isVolatile()) return false; if (!MTI->getLength()->getType()->isIntegerTy(32)) return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); // Don't handle volatile. if (MSI->isVolatile()) return false; if (!MSI->getLength()->getType()->isIntegerTy(32)) return false; return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } } return false; } bool MipsFastISel::selectRet(const Instruction *I) { const Function &F = *I->getParent()->getParent(); const ReturnInst *Ret = cast(I); LLVM_DEBUG(dbgs() << "selectRet\n"); if (!FuncInfo.CanLowerReturn) return false; // Build a list of return value registers. SmallVector RetRegs; if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); // Do not handle FastCC. if (CC == CallingConv::Fast) return false; SmallVector Outs; GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; MipsCCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); CCAssignFn *RetCC = RetCC_Mips; CCInfo.AnalyzeReturn(Outs, RetCC); // Only handle a single return value for now. if (ValLocs.size() != 1) return false; CCValAssign &VA = ValLocs[0]; const Value *RV = Ret->getOperand(0); // Don't bother handling odd stuff for now. if ((VA.getLocInfo() != CCValAssign::Full) && (VA.getLocInfo() != CCValAssign::BCvt)) return false; // Only handle register returns for now. if (!VA.isRegLoc()) return false; unsigned Reg = getRegForValue(RV); if (Reg == 0) return false; unsigned SrcReg = Reg + VA.getValNo(); unsigned DestReg = VA.getLocReg(); // Avoid a cross-class copy. This is very unlikely. if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; if (RVEVT.isVector()) return false; MVT RVVT = RVEVT.getSimpleVT(); if (RVVT == MVT::f128) return false; // Do not handle FGR64 returns for now. if (RVVT == MVT::f64 && UnsupportedFPMode) { LLVM_DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode\n"); return false; } MVT DestVT = VA.getValVT(); // Special handling for extended integers. if (RVVT != DestVT) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) return false; if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) { bool IsZExt = Outs[0].Flags.isZExt(); SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); if (SrcReg == 0) return false; } } // Make the copy. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); // Add register to return instruction. RetRegs.push_back(VA.getLocReg()); } MachineInstrBuilder MIB = emitInst(Mips::RetRA); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; } bool MipsFastISel::selectTrunc(const Instruction *I) { // The high bits for a type smaller than the register size are assumed to be // undefined. Value *Op = I->getOperand(0); EVT SrcVT, DestVT; SrcVT = TLI.getValueType(DL, Op->getType(), true); DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; unsigned SrcReg = getRegForValue(Op); if (!SrcReg) return false; // Because the high bits are undefined, a truncate doesn't generate // any code. updateValueMap(I, SrcReg); return true; } bool MipsFastISel::selectIntExt(const Instruction *I) { Type *DestTy = I->getType(); Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); bool isZExt = isa(I); unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; EVT SrcEVT, DestEVT; SrcEVT = TLI.getValueType(DL, SrcTy, true); DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!emitIntExt(SrcVT, SrcReg, DestVT, ResultReg, isZExt)) return false; updateValueMap(I, ResultReg); return true; } bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { unsigned ShiftAmt; switch (SrcVT.SimpleTy) { default: return false; case MVT::i8: ShiftAmt = 24; break; case MVT::i16: ShiftAmt = 16; break; } unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLL, TempReg).addReg(SrcReg).addImm(ShiftAmt); emitInst(Mips::SRA, DestReg).addReg(TempReg).addImm(ShiftAmt); return true; } bool MipsFastISel::emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { switch (SrcVT.SimpleTy) { default: return false; case MVT::i8: emitInst(Mips::SEB, DestReg).addReg(SrcReg); break; case MVT::i16: emitInst(Mips::SEH, DestReg).addReg(SrcReg); break; } return true; } bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { if ((DestVT != MVT::i32) && (DestVT != MVT::i16)) return false; if (Subtarget->hasMips32r2()) return emitIntSExt32r2(SrcVT, SrcReg, DestVT, DestReg); return emitIntSExt32r1(SrcVT, SrcReg, DestVT, DestReg); } bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { int64_t Imm; switch (SrcVT.SimpleTy) { default: return false; case MVT::i1: Imm = 1; break; case MVT::i8: Imm = 0xff; break; case MVT::i16: Imm = 0xffff; break; } emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(Imm); return true; } bool MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt) { // FastISel does not have plumbing to deal with extensions where the SrcVT or // DestVT are odd things, so test to make sure that they are both types we can // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise // bail out to SelectionDAG. if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && (DestVT != MVT::i32)) || ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && (SrcVT != MVT::i16))) return false; if (IsZExt) return emitIntZExt(SrcVT, SrcReg, DestVT, DestReg); return emitIntSExt(SrcVT, SrcReg, DestVT, DestReg); } unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { unsigned DestReg = createResultReg(&Mips::GPR32RegClass); bool Success = emitIntExt(SrcVT, SrcReg, DestVT, DestReg, isZExt); return Success ? DestReg : 0; } bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) { EVT DestEVT = TLI.getValueType(DL, I->getType(), true); if (!DestEVT.isSimple()) return false; MVT DestVT = DestEVT.getSimpleVT(); if (DestVT != MVT::i32) return false; unsigned DivOpc; switch (ISDOpcode) { default: return false; case ISD::SDIV: case ISD::SREM: DivOpc = Mips::SDIV; break; case ISD::UDIV: case ISD::UREM: DivOpc = Mips::UDIV; break; } unsigned Src0Reg = getRegForValue(I->getOperand(0)); unsigned Src1Reg = getRegForValue(I->getOperand(1)); if (!Src0Reg || !Src1Reg) return false; emitInst(DivOpc).addReg(Src0Reg).addReg(Src1Reg); emitInst(Mips::TEQ).addReg(Src1Reg).addReg(Mips::ZERO).addImm(7); unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!ResultReg) return false; unsigned MFOpc = (ISDOpcode == ISD::SREM || ISDOpcode == ISD::UREM) ? Mips::MFHI : Mips::MFLO; emitInst(MFOpc, ResultReg); updateValueMap(I, ResultReg); return true; } bool MipsFastISel::selectShift(const Instruction *I) { MVT RetVT; if (!isTypeSupported(I->getType(), RetVT)) return false; unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!ResultReg) return false; unsigned Opcode = I->getOpcode(); const Value *Op0 = I->getOperand(0); unsigned Op0Reg = getRegForValue(Op0); if (!Op0Reg) return false; // If AShr or LShr, then we need to make sure the operand0 is sign extended. if (Opcode == Instruction::AShr || Opcode == Instruction::LShr) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); if (!TempReg) return false; MVT Op0MVT = TLI.getValueType(DL, Op0->getType(), true).getSimpleVT(); bool IsZExt = Opcode == Instruction::LShr; if (!emitIntExt(Op0MVT, Op0Reg, MVT::i32, TempReg, IsZExt)) return false; Op0Reg = TempReg; } if (const auto *C = dyn_cast(I->getOperand(1))) { uint64_t ShiftVal = C->getZExtValue(); switch (Opcode) { default: llvm_unreachable("Unexpected instruction."); case Instruction::Shl: Opcode = Mips::SLL; break; case Instruction::AShr: Opcode = Mips::SRA; break; case Instruction::LShr: Opcode = Mips::SRL; break; } emitInst(Opcode, ResultReg).addReg(Op0Reg).addImm(ShiftVal); updateValueMap(I, ResultReg); return true; } unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (!Op1Reg) return false; switch (Opcode) { default: llvm_unreachable("Unexpected instruction."); case Instruction::Shl: Opcode = Mips::SLLV; break; case Instruction::AShr: Opcode = Mips::SRAV; break; case Instruction::LShr: Opcode = Mips::SRLV; break; } emitInst(Opcode, ResultReg).addReg(Op0Reg).addReg(Op1Reg); updateValueMap(I, ResultReg); return true; } bool MipsFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; case Instruction::Load: return selectLoad(I); case Instruction::Store: return selectStore(I); case Instruction::SDiv: if (!selectBinaryOp(I, ISD::SDIV)) return selectDivRem(I, ISD::SDIV); return true; case Instruction::UDiv: if (!selectBinaryOp(I, ISD::UDIV)) return selectDivRem(I, ISD::UDIV); return true; case Instruction::SRem: if (!selectBinaryOp(I, ISD::SREM)) return selectDivRem(I, ISD::SREM); return true; case Instruction::URem: if (!selectBinaryOp(I, ISD::UREM)) return selectDivRem(I, ISD::UREM); return true; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: return selectShift(I); case Instruction::And: case Instruction::Or: case Instruction::Xor: return selectLogicalOp(I); case Instruction::Br: return selectBranch(I); case Instruction::Ret: return selectRet(I); case Instruction::Trunc: return selectTrunc(I); case Instruction::ZExt: case Instruction::SExt: return selectIntExt(I); case Instruction::FPTrunc: return selectFPTrunc(I); case Instruction::FPExt: return selectFPExt(I); case Instruction::FPToSI: return selectFPToInt(I, /*isSigned*/ true); case Instruction::FPToUI: return selectFPToInt(I, /*isSigned*/ false); case Instruction::ICmp: case Instruction::FCmp: return selectCmp(I); case Instruction::Select: return selectSelect(I); } return false; } unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V, bool IsUnsigned) { unsigned VReg = getRegForValue(V); if (VReg == 0) return 0; MVT VMVT = TLI.getValueType(DL, V->getType(), true).getSimpleVT(); if (VMVT == MVT::i1) return 0; if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned)) return 0; VReg = TempReg; } return VReg; } void MipsFastISel::simplifyAddress(Address &Addr) { if (!isInt<16>(Addr.getOffset())) { unsigned TempReg = materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass); unsigned DestReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg()); Addr.setReg(DestReg); Addr.setOffset(0); } } unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { // We treat the MUL instruction in a special way because it clobbers // the HI0 & LO0 registers. The TableGen definition of this instruction can // mark these registers only as implicitly defined. As a result, the // register allocator runs out of registers when this instruction is // followed by another instruction that defines the same registers too. // We can fix this by explicitly marking those registers as dead. if (MachineInstOpcode == Mips::MUL) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, getKillRegState(Op0IsKill)) .addReg(Op1, getKillRegState(Op1IsKill)) .addReg(Mips::HI0, RegState::ImplicitDefine | RegState::Dead) .addReg(Mips::LO0, RegState::ImplicitDefine | RegState::Dead); return ResultReg; } return FastISel::fastEmitInst_rr(MachineInstOpcode, RC, Op0, Op0IsKill, Op1, Op1IsKill); } namespace llvm { FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { return new MipsFastISel(funcInfo, libInfo); } } // end namespace llvm Index: vendor/llvm/dist-release_70/lib/Target/Mips/MipsInstrFPU.td =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MipsInstrFPU.td (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MipsInstrFPU.td (revision 341365) @@ -1,936 +1,936 @@ //===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the Mips FPU instruction set. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Floating Point Instructions // ------------------------ // * 64bit fp: // - 32 64-bit registers (default mode) // - 16 even 32-bit registers (32-bit compatible mode) for // single and double access. // * 32bit fp: // - 16 even 32-bit registers - single and double (aliased) // - 32 32-bit registers (within single-only mode) //===----------------------------------------------------------------------===// // Floating Point Compare and Branch def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<2, OtherVT>]>; def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, SDTCisVT<2, i32>]>; def SDT_MipsCMovFP : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>, SDTCisSameAs<1, 3>]>; def SDT_MipsTruncIntFP : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, f64>, SDTCisVT<2, i32>]>; def SDT_MipsMTC1_D64 : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisVT<1, i32>]>; def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>; def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>; def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>; def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, [SDNPHasChain, SDNPOptInGlue]>; def MipsTruncIntFP : SDNode<"MipsISD::TruncIntFP", SDT_MipsTruncIntFP>; def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>; def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64", SDT_MipsExtractElementF64>; def MipsMTC1_D64 : SDNode<"MipsISD::MTC1_D64", SDT_MipsMTC1_D64>; // Operand for printing out a condition code. let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in def condcode : Operand; //===----------------------------------------------------------------------===// // Feature predicates. //===----------------------------------------------------------------------===// def IsFP64bit : Predicate<"Subtarget->isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; def NotFP64bit : Predicate<"!Subtarget->isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; def IsSingleFloat : Predicate<"Subtarget->isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; def IsNotSoftFloat : Predicate<"!Subtarget->useSoftFloat()">, AssemblerPredicate<"!FeatureSoftFloat">; //===----------------------------------------------------------------------===// // Mips FGR size adjectives. // They are mutually exclusive. //===----------------------------------------------------------------------===// class FGR_32 { list FGRPredicates = [NotFP64bit]; } class FGR_64 { list FGRPredicates = [IsFP64bit]; } class HARDFLOAT { list HardFloatPredicate = [IsNotSoftFloat]; } //===----------------------------------------------------------------------===// // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; def fpimm0neg : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; //===----------------------------------------------------------------------===// // Instruction Class Templates // // A set of multiclasses is used to address the register usage. // // S32 - single precision in 16 32bit even fp registers // single precision in 32 32bit fp registers in SingleOnly mode // S64 - single precision in 32 64bit fp registers (In64BitMode) // D32 - double precision in 16 32bit even fp registers // D64 - double precision in 32 64bit fp registers (In64BitMode) // // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// class ADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fs, $ft"), [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR, opstr>, HARDFLOAT { let isCommutable = IsComm; } multiclass ADDS_M { def _D32 : MMRel, ADDS_FT, FGR_32; def _D64 : ADDS_FT, FGR_64 { string DecoderNamespace = "MipsFP64"; } } class ABSS_FT : InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"), [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>, HARDFLOAT, NeverHasSideEffects; multiclass ABSS_M { def _D32 : MMRel, ABSS_FT, FGR_32; def _D64 : ABSS_FT, FGR_64 { string DecoderNamespace = "MipsFP64"; } } multiclass ROUND_M { def _D32 : MMRel, ABSS_FT, FGR_32; def _D64 : StdMMR6Rel, ABSS_FT, FGR_64 { let DecoderNamespace = "MipsFP64"; } } class MFC1_FT : InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>, HARDFLOAT { let isMoveReg = 1; } class MTC1_FT : InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>, HARDFLOAT { let isMoveReg = 1; } class MTC1_64_FT : InstSE<(outs DstRC:$fs), (ins DstRC:$fs_in, SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr>, HARDFLOAT { // $fs_in is part of a white lie to work around a widespread bug in the FPU // implementation. See expandBuildPairF64 for details. let Constraints = "$fs = $fs_in"; } class LW_FT : InstSE<(outs RC:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr>, HARDFLOAT { let DecoderMethod = "DecodeFMem"; let mayLoad = 1; } class SW_FT : InstSE<(outs), (ins RC:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr>, HARDFLOAT { let DecoderMethod = "DecodeFMem"; let mayStore = 1; } class MADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR, opstr>, HARDFLOAT; class NMADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))], Itin, FrmFR, opstr>, HARDFLOAT; class LWXC1_FT : InstSE<(outs DRC:$fd), (ins PtrRC:$base, PtrRC:$index), !strconcat(opstr, "\t$fd, ${index}(${base})"), [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin, FrmFI, opstr>, HARDFLOAT { let AddedComplexity = 20; } class SWXC1_FT : InstSE<(outs), (ins DRC:$fs, PtrRC:$base, PtrRC:$index), !strconcat(opstr, "\t$fs, ${index}(${base})"), [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin, FrmFI, opstr>, HARDFLOAT { let AddedComplexity = 20; } class BC1F_FT : InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset), !strconcat(opstr, "\t$fcc, $offset"), [(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin, FrmFI, opstr>, HARDFLOAT { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; let Defs = [AT]; let hasFCCRegOperand = 1; } class BC1XL_FT : InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset), !strconcat(opstr, "\t$fcc, $offset"), [], Itin, FrmFI, opstr>, HARDFLOAT { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; let Defs = [AT]; let hasFCCRegOperand = 1; } class CEQS_FT : InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond), !strconcat("c.$cond.", typestr, "\t$fs, $ft"), [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR, !strconcat("c.$cond.", typestr)>, HARDFLOAT { let Defs = [FCC0]; let isCodeGenOnly = 1; let hasFCCRegOperand = 1; } // Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather // duplicating the instruction definition for MIPS1 - MIPS3, we expand // c.cond.ft if necessary, and reject it after constructing the // instruction if the ISA doesn't support it. class C_COND_FT : InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft), !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin, FrmFR>, HARDFLOAT { let isCompare = 1; let hasFCCRegOperand = 1; } multiclass C_COND_M fmt, InstrItinClass itin> { def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.f."#NAME; let isCommutable = 1; } def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.un."#NAME; let isCommutable = 1; } def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.eq."#NAME; let isCommutable = 1; } def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ueq."#NAME; let isCommutable = 1; } def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.olt."#NAME; } def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ult."#NAME; } def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ole."#NAME; } def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ule."#NAME; } def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.sf."#NAME; let isCommutable = 1; } def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ngle."#NAME; } def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.seq."#NAME; let isCommutable = 1; } def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ngl."#NAME; } def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.lt."#NAME; } def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.nge."#NAME; } def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.le."#NAME; } def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM { let BaseOpcode = "c.ngt."#NAME; } } let AdditionalPredicates = [NotInMicroMips] in { defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6; defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "MipsFP64" in defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_64; } //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// let AdditionalPredicates = [NotInMicroMips] in { def ROUND_W_S : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, ABSS_FM<0xc, 16>, ISA_MIPS2; defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2; def TRUNC_W_S : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>, ABSS_FM<0xd, 16>, ISA_MIPS2; def CEIL_W_S : MMRel, StdMMR6Rel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, ABSS_FM<0xe, 16>, ISA_MIPS2; def FLOOR_W_S : MMRel, StdMMR6Rel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>, ABSS_FM<0xf, 16>, ISA_MIPS2; def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x24, 16>, ISA_MIPS1; defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2; defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2; defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2; defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>, ISA_MIPS1; } let AdditionalPredicates = [NotInMicroMips] in { def RECIP_S : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd, II_RECIP_S>, ABSS_FM<0b010101, 0x10>, INSN_MIPS4_32R2; def RECIP_D32 : MMRel, ABSS_FT<"recip.d", AFGR64Opnd, AFGR64Opnd, II_RECIP_D>, ABSS_FM<0b010101, 0x11>, INSN_MIPS4_32R2, FGR_32 { let BaseOpcode = "RECIP_D32"; } let DecoderNamespace = "MipsFP64" in def RECIP_D64 : MMRel, ABSS_FT<"recip.d", FGR64Opnd, FGR64Opnd, II_RECIP_D>, ABSS_FM<0b010101, 0x11>, INSN_MIPS4_32R2, FGR_64; def RSQRT_S : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd, II_RSQRT_S>, ABSS_FM<0b010110, 0x10>, INSN_MIPS4_32R2; def RSQRT_D32 : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd, II_RSQRT_D>, ABSS_FM<0b010110, 0x11>, INSN_MIPS4_32R2, FGR_32 { let BaseOpcode = "RSQRT_D32"; } let DecoderNamespace = "MipsFP64" in def RSQRT_D64 : MMRel, ABSS_FT<"rsqrt.d", FGR64Opnd, FGR64Opnd, II_RSQRT_D>, ABSS_FM<0b010110, 0x11>, INSN_MIPS4_32R2, FGR_64; } let DecoderNamespace = "MipsFP64" in { let AdditionalPredicates = [NotInMicroMips] in { def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>, ABSS_FM<0x8, 16>, ISA_MIPS2, FGR_64; def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>, ABSS_FM<0x8, 17>, INSN_MIPS3_32, FGR_64; def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64Opnd, FGR32Opnd, II_TRUNC>, ABSS_FM<0x9, 16>, ISA_MIPS2, FGR_64; def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64Opnd, FGR64Opnd, II_TRUNC>, ABSS_FM<0x9, 17>, INSN_MIPS3_32, FGR_64; def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64Opnd, FGR32Opnd, II_CEIL>, ABSS_FM<0xa, 16>, ISA_MIPS2, FGR_64; def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64Opnd, FGR64Opnd, II_CEIL>, ABSS_FM<0xa, 17>, INSN_MIPS3_32, FGR_64; def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64Opnd, FGR32Opnd, II_FLOOR>, ABSS_FM<0xb, 16>, ISA_MIPS2, FGR_64; def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>, ABSS_FM<0xb, 17>, INSN_MIPS3_32, FGR_64; } } let AdditionalPredicates = [NotInMicroMips] in{ def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x20, 20>, ISA_MIPS1; def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x25, 16>, INSN_MIPS3_32R2; def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, ABSS_FM<0x25, 17>, INSN_MIPS3_32R2; } let AdditionalPredicates = [NotInMicroMips] in { def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>, ABSS_FM<0x20, 17>, ISA_MIPS1, FGR_32; def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x21, 16>, ISA_MIPS1, FGR_32; def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x21, 20>, ISA_MIPS1, FGR_32; } let DecoderNamespace = "MipsFP64" in { let AdditionalPredicates = [NotInMicroMips] in { def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>, ABSS_FM<0x20, 21>, INSN_MIPS3_32R2, FGR_64; def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>, ABSS_FM<0x20, 17>, ISA_MIPS1, FGR_64; def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x21, 20>, ISA_MIPS1, FGR_64; def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x21, 16>, ISA_MIPS1, FGR_64; def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64Opnd, FGR64Opnd, II_CVT>, ABSS_FM<0x21, 21>, INSN_MIPS3_32R2, FGR_64; } } let isPseudo = 1, isCodeGenOnly = 1 in { def PseudoCVT_S_W : ABSS_FT<"", FGR32Opnd, GPR32Opnd, II_CVT>; def PseudoCVT_D32_W : ABSS_FT<"", AFGR64Opnd, GPR32Opnd, II_CVT>; def PseudoCVT_S_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>; def PseudoCVT_D64_W : ABSS_FT<"", FGR64Opnd, GPR32Opnd, II_CVT>; def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>; } let AdditionalPredicates = [NotInMicroMips] in { def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, ABSS_FM<0x5, 16>, ISA_MIPS1; defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>, ISA_MIPS1; } def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, ABSS_FM<0x7, 16>, ISA_MIPS1; let AdditionalPredicates = [NotInMicroMips] in { defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>, ISA_MIPS1; } let AdditionalPredicates = [NotInMicroMips] in { def FSQRT_S : MMRel, StdMMR6Rel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>, ABSS_FM<0x4, 16>, ISA_MIPS2; defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2; } // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. // When defining instructions, we reference all 32-bit registers, // regardless of register aliasing. /// Move Control Registers From/To CPU Registers let AdditionalPredicates = [NotInMicroMips] in { def CFC1 : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>, MFC1_FM<2>, ISA_MIPS1; def CTC1 : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, MFC1_FM<6>, ISA_MIPS1; - def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, - bitconvert>, MFC1_FM<0>, ISA_MIPS1; + def MFC1 : MMRel, StdMMR6Rel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, + bitconvert>, MFC1_FM<0>, ISA_MIPS1; def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>, ISA_MIPS1, FGR_64 { let DecoderNamespace = "MipsFP64"; } - def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, - bitconvert>, MFC1_FM<4>, ISA_MIPS1; + def MTC1 : MMRel, StdMMR6Rel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, + bitconvert>, MFC1_FM<4>, ISA_MIPS1; def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>, ISA_MIPS1, FGR_64 { let DecoderNamespace = "MipsFP64"; } def MFHC1_D32 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>, MFC1_FM<3>, ISA_MIPS32R2, FGR_32; def MFHC1_D64 : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>, MFC1_FM<3>, ISA_MIPS32R2, FGR_64 { let DecoderNamespace = "MipsFP64"; } def MTHC1_D32 : MMRel, StdMMR6Rel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>, MFC1_FM<7>, ISA_MIPS32R2, FGR_32; def MTHC1_D64 : MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>, MFC1_FM<7>, ISA_MIPS32R2, FGR_64 { let DecoderNamespace = "MipsFP64"; } def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1, bitconvert>, MFC1_FM<5>, ISA_MIPS3; def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1, bitconvert>, MFC1_FM<1>, ISA_MIPS3; let isMoveReg = 1 in { def FMOV_S : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>, ABSS_FM<0x6, 16>, ISA_MIPS1; def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>, ABSS_FM<0x6, 17>, ISA_MIPS1, FGR_32; def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>, ABSS_FM<0x6, 17>, ISA_MIPS1, FGR_64 { let DecoderNamespace = "MipsFP64"; } } // isMoveReg } /// Floating Point Memory Instructions let AdditionalPredicates = [NotInMicroMips] in { def LWC1 : MMRel, LW_FT<"lwc1", FGR32Opnd, mem_simm16, II_LWC1, load>, LW_FM<0x31>, ISA_MIPS1; def SWC1 : MMRel, SW_FT<"swc1", FGR32Opnd, mem_simm16, II_SWC1, store>, LW_FM<0x39>, ISA_MIPS1; } let DecoderNamespace = "MipsFP64", AdditionalPredicates = [NotInMicroMips] in { def LDC164 : StdMMR6Rel, LW_FT<"ldc1", FGR64Opnd, mem_simm16, II_LDC1, load>, LW_FM<0x35>, ISA_MIPS2, FGR_64 { let BaseOpcode = "LDC164"; } def SDC164 : StdMMR6Rel, SW_FT<"sdc1", FGR64Opnd, mem_simm16, II_SDC1, store>, LW_FM<0x3d>, ISA_MIPS2, FGR_64; } let AdditionalPredicates = [NotInMicroMips] in { def LDC1 : MMRel, StdMMR6Rel, LW_FT<"ldc1", AFGR64Opnd, mem_simm16, II_LDC1, load>, LW_FM<0x35>, ISA_MIPS2, FGR_32 { let BaseOpcode = "LDC132"; } def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_simm16, II_SDC1, store>, LW_FM<0x3d>, ISA_MIPS2, FGR_32; } // Indexed loads and stores. // Base register + offset register addressing mode (indicated by "x" in the // instruction mnemonic) is disallowed under NaCl. let AdditionalPredicates = [IsNotNaCl] in { def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>, INSN_MIPS4_32R2_NOT_32R6_64R6; } let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in { def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; } let DecoderNamespace="MipsFP64" in { def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; } // Load/store doubleword indexed unaligned. // FIXME: This instruction should not be defined for FGR_32. let AdditionalPredicates = [IsNotNaCl, NotInMicroMips] in { def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32; def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32; } let AdditionalPredicates = [IsNotNaCl, NotInMicroMips], DecoderNamespace="MipsFP64" in { def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64; def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64; } /// Floating-point Aritmetic let AdditionalPredicates = [NotInMicroMips] in { def FADD_S : MMRel, ADDS_FT<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>, ADDS_FM<0x00, 16>, ISA_MIPS1; defm FADD : ADDS_M<"add.d", II_ADD_D, 1, fadd>, ADDS_FM<0x00, 17>, ISA_MIPS1; def FDIV_S : MMRel, ADDS_FT<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>, ADDS_FM<0x03, 16>, ISA_MIPS1; defm FDIV : ADDS_M<"div.d", II_DIV_D, 0, fdiv>, ADDS_FM<0x03, 17>, ISA_MIPS1; def FMUL_S : MMRel, ADDS_FT<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>, ADDS_FM<0x02, 16>, ISA_MIPS1; defm FMUL : ADDS_M<"mul.d", II_MUL_D, 1, fmul>, ADDS_FM<0x02, 17>, ISA_MIPS1; def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, ADDS_FM<0x01, 16>, ISA_MIPS1; defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>, ISA_MIPS1; } let AdditionalPredicates = [NotInMicroMips, HasMadd4] in { def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "MipsFP64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; } } let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, NotInMicroMips] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, MADDS_FM<7, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "MipsFP64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>, MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; } } //===----------------------------------------------------------------------===// // Floating Point Branch Codes //===----------------------------------------------------------------------===// // Mips branch codes. These correspond to condcode in MipsInstrInfo.h. // They must be kept in synch. def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; let AdditionalPredicates = [NotInMicroMips] in { def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>, BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; def BC1FL : MMRel, BC1XL_FT<"bc1fl", brtarget, II_BC1FL>, BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6; def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>, BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; def BC1TL : MMRel, BC1XL_FT<"bc1tl", brtarget, II_BC1TL>, BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6; /// Floating Point Compare def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>, ISA_MIPS1_NOT_32R6_64R6 { // FIXME: This is a required to work around the fact that these instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. bits<3> fcc = 0; } def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, ISA_MIPS1_NOT_32R6_64R6, FGR_32 { // FIXME: This is a required to work around the fact that these instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. bits<3> fcc = 0; } } let DecoderNamespace = "MipsFP64" in def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, ISA_MIPS1_NOT_32R6_64R6, FGR_64 { // FIXME: This is a required to work around the fact that thiese instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. bits<3> fcc = 0; } //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. class BuildPairF64Base : PseudoSE<(outs RO:$dst), (ins GPR32Opnd:$lo, GPR32Opnd:$hi), [(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))], II_MTC1>; def BuildPairF64 : BuildPairF64Base, FGR_32, HARDFLOAT; def BuildPairF64_64 : BuildPairF64Base, FGR_64, HARDFLOAT; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. // This node has associated scheduling information as the pre RA scheduler // asserts otherwise. class ExtractElementF64Base : PseudoSE<(outs GPR32Opnd:$dst), (ins RO:$src, i32imm:$n), [(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))], II_MFC1>; def ExtractElementF64 : ExtractElementF64Base, FGR_32, HARDFLOAT; def ExtractElementF64_64 : ExtractElementF64Base, FGR_64, HARDFLOAT; def PseudoTRUNC_W_S : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), (ins FGR32Opnd:$fs, GPR32Opnd:$rs), "trunc.w.s\t$fd, $fs, $rs">; def PseudoTRUNC_W_D32 : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), (ins AFGR64Opnd:$fs, GPR32Opnd:$rs), "trunc.w.d\t$fd, $fs, $rs">, FGR_32, HARDFLOAT; def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), (ins FGR64Opnd:$fs, GPR32Opnd:$rs), "trunc.w.d\t$fd, $fs, $rs">, FGR_64, HARDFLOAT; def LoadImmSingleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins imm64:$fpimm), "li.s\t$rd, $fpimm">; def LoadImmSingleFGR : MipsAsmPseudoInst<(outs StrictlyFGR32Opnd:$rd), (ins imm64:$fpimm), "li.s\t$rd, $fpimm">, HARDFLOAT; def LoadImmDoubleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins imm64:$fpimm), "li.d\t$rd, $fpimm">; def LoadImmDoubleFGR_32 : MipsAsmPseudoInst<(outs StrictlyAFGR64Opnd:$rd), (ins imm64:$fpimm), "li.d\t$rd, $fpimm">, FGR_32, HARDFLOAT; def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd), (ins imm64:$fpimm), "li.d\t$rd, $fpimm">, FGR_64, HARDFLOAT; //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// def : MipsInstAlias <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>, ISA_MIPS2, HARDFLOAT; def : MipsInstAlias <"s.d $fd, $addr", (SDC1 AFGR64Opnd:$fd, mem_simm16:$addr), 0>, FGR_32, ISA_MIPS2, HARDFLOAT; def : MipsInstAlias <"s.d $fd, $addr", (SDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>, FGR_64, ISA_MIPS2, HARDFLOAT; def : MipsInstAlias <"l.s $fd, $addr", (LWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>, ISA_MIPS2, HARDFLOAT; def : MipsInstAlias <"l.d $fd, $addr", (LDC1 AFGR64Opnd:$fd, mem_simm16:$addr), 0>, FGR_32, ISA_MIPS2, HARDFLOAT; def : MipsInstAlias <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>, FGR_64, ISA_MIPS2, HARDFLOAT; multiclass C_COND_ALIASES { def : MipsInstAlias("C_F_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_UN_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_EQ_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_UEQ_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_OLT_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_ULT_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_OLE_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_ULE_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_SF_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_NGLE_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_SEQ_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_NGL_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_LT_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_NGE_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_LE_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; def : MipsInstAlias("C_NGT_"#NAME) FCC0, RC:$fs, RC:$ft), 1>; } multiclass BC1_ALIASES { def : MipsInstAlias; def : MipsInstAlias; } let AdditionalPredicates = [NotInMicroMips] in { defm S : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, ISA_MIPS1_NOT_32R6_64R6; defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, ISA_MIPS1_NOT_32R6_64R6, FGR_32; defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, ISA_MIPS1_NOT_32R6_64R6, FGR_64; defm : BC1_ALIASES, ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; defm : BC1_ALIASES, ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT; } //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>, ISA_MIPS1; def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>, ISA_MIPS1; def : MipsPat<(f32 (sint_to_fp GPR32Opnd:$src)), (PseudoCVT_S_W GPR32Opnd:$src)>; def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), (TRUNC_W_S FGR32Opnd:$src)>, ISA_MIPS1; def : MipsPat<(MipsMTC1_D64 GPR32Opnd:$src), (MTC1_D64 GPR32Opnd:$src)>, ISA_MIPS1, FGR_64; def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), (PseudoCVT_D32_W GPR32Opnd:$src)>, FGR_32; let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), (TRUNC_W_D32 AFGR64Opnd:$src)>, ISA_MIPS2, FGR_32; def : MipsPat<(f32 (fpround AFGR64Opnd:$src)), (CVT_S_D32 AFGR64Opnd:$src)>, ISA_MIPS1, FGR_32; def : MipsPat<(f64 (fpextend FGR32Opnd:$src)), (CVT_D32_S FGR32Opnd:$src)>, ISA_MIPS1, FGR_32; } def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>, ISA_MIPS3, GPR_64, FGR_64; def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>, ISA_MIPS3, GPR_64, FGR_64; def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), (PseudoCVT_D64_W GPR32Opnd:$src)>, FGR_64; def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)), (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>, FGR_64; def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)), (PseudoCVT_D64_L GPR64Opnd:$src)>, FGR_64; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_W_D64 FGR64Opnd:$src)>, ISA_MIPS2, FGR_64; def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), (TRUNC_L_S FGR32Opnd:$src)>, ISA_MIPS2, FGR_64; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_L_D64 FGR64Opnd:$src)>, ISA_MIPS2, FGR_64; let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(f32 (fpround FGR64Opnd:$src)), (CVT_S_D64 FGR64Opnd:$src)>, ISA_MIPS1, FGR_64; def : MipsPat<(f64 (fpextend FGR32Opnd:$src)), (CVT_D64_S FGR32Opnd:$src)>, ISA_MIPS1, FGR_64; } // To generate NMADD and NMSUB instructions when fneg node is present multiclass NMADD_NMSUB { def : MipsPat<(fneg (fadd (fmul RC:$fs, RC:$ft), RC:$fr)), (Nmadd RC:$fr, RC:$fs, RC:$ft)>; def : MipsPat<(fneg (fsub (fmul RC:$fs, RC:$ft), RC:$fr)), (Nmsub RC:$fr, RC:$fs, RC:$ft)>; } let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, NotInMicroMips] in { defm : NMADD_NMSUB, INSN_MIPS4_32R2_NOT_32R6_64R6; defm : NMADD_NMSUB, FGR_32, INSN_MIPS4_32R2_NOT_32R6_64R6; defm : NMADD_NMSUB, FGR_64, INSN_MIPS4_32R2_NOT_32R6_64R6; } // Patterns for loads/stores with a reg+imm operand. let AdditionalPredicates = [NotInMicroMips] in { let AddedComplexity = 40 in { def : LoadRegImmPat, ISA_MIPS1; def : StoreRegImmPat, ISA_MIPS1; def : LoadRegImmPat, ISA_MIPS1, FGR_64; def : StoreRegImmPat, ISA_MIPS1, FGR_64; def : LoadRegImmPat, ISA_MIPS1, FGR_32; def : StoreRegImmPat, ISA_MIPS1, FGR_32; } } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEFrameLowering.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEFrameLowering.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEFrameLowering.cpp (revision 341365) @@ -1,914 +1,921 @@ //===- MipsSEFrameLowering.cpp - Mips32/64 Frame Information --------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the Mips32/64 implementation of TargetFrameLowering class. // //===----------------------------------------------------------------------===// #include "MipsSEFrameLowering.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include #include #include #include using namespace llvm; static std::pair getMFHiLoOpc(unsigned Src) { if (Mips::ACC64RegClass.contains(Src)) return std::make_pair((unsigned)Mips::PseudoMFHI, (unsigned)Mips::PseudoMFLO); if (Mips::ACC64DSPRegClass.contains(Src)) return std::make_pair((unsigned)Mips::MFHI_DSP, (unsigned)Mips::MFLO_DSP); if (Mips::ACC128RegClass.contains(Src)) return std::make_pair((unsigned)Mips::PseudoMFHI64, (unsigned)Mips::PseudoMFLO64); return std::make_pair(0, 0); } namespace { /// Helper class to expand pseudos. class ExpandPseudo { public: ExpandPseudo(MachineFunction &MF); bool expand(); private: using Iter = MachineBasicBlock::iterator; bool expandInstr(MachineBasicBlock &MBB, Iter I); void expandLoadCCond(MachineBasicBlock &MBB, Iter I); void expandStoreCCond(MachineBasicBlock &MBB, Iter I); void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize); void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc, unsigned MFLoOpc, unsigned RegSize); bool expandCopy(MachineBasicBlock &MBB, Iter I); bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc, unsigned MFLoOpc); bool expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const; bool expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const; MachineFunction &MF; MachineRegisterInfo &MRI; const MipsSubtarget &Subtarget; const MipsSEInstrInfo &TII; const MipsRegisterInfo &RegInfo; }; } // end anonymous namespace ExpandPseudo::ExpandPseudo(MachineFunction &MF_) : MF(MF_), MRI(MF.getRegInfo()), Subtarget(static_cast(MF.getSubtarget())), TII(*static_cast(Subtarget.getInstrInfo())), RegInfo(*Subtarget.getRegisterInfo()) {} bool ExpandPseudo::expand() { bool Expanded = false; for (auto &MBB : MF) { for (Iter I = MBB.begin(), End = MBB.end(); I != End;) Expanded |= expandInstr(MBB, I++); } return Expanded; } bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { switch(I->getOpcode()) { case Mips::LOAD_CCOND_DSP: expandLoadCCond(MBB, I); break; case Mips::STORE_CCOND_DSP: expandStoreCCond(MBB, I); break; case Mips::LOAD_ACC64: case Mips::LOAD_ACC64DSP: expandLoadACC(MBB, I, 4); break; case Mips::LOAD_ACC128: expandLoadACC(MBB, I, 8); break; case Mips::STORE_ACC64: expandStoreACC(MBB, I, Mips::PseudoMFHI, Mips::PseudoMFLO, 4); break; case Mips::STORE_ACC64DSP: expandStoreACC(MBB, I, Mips::MFHI_DSP, Mips::MFLO_DSP, 4); break; case Mips::STORE_ACC128: expandStoreACC(MBB, I, Mips::PseudoMFHI64, Mips::PseudoMFLO64, 8); break; case Mips::BuildPairF64: if (expandBuildPairF64(MBB, I, false)) MBB.erase(I); return false; case Mips::BuildPairF64_64: if (expandBuildPairF64(MBB, I, true)) MBB.erase(I); return false; case Mips::ExtractElementF64: if (expandExtractElementF64(MBB, I, false)) MBB.erase(I); return false; case Mips::ExtractElementF64_64: if (expandExtractElementF64(MBB, I, true)) MBB.erase(I); return false; case TargetOpcode::COPY: if (!expandCopy(MBB, I)) return false; break; default: return false; } MBB.erase(I); return true; } void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) { // load $vr, FI // copy ccond, $vr assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(4); unsigned VR = MRI.createVirtualRegister(RC); unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst) .addReg(VR, RegState::Kill); } void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) { // copy $vr, ccond // store $vr, FI assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(4); unsigned VR = MRI.createVirtualRegister(RC); unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR) .addReg(Src, getKillRegState(I->getOperand(0).isKill())); TII.storeRegToStack(MBB, I, VR, true, FI, RC, &RegInfo, 0); } void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize) { // load $vr0, FI // copy lo, $vr0 // load $vr1, FI + 4 // copy hi, $vr1 assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); DebugLoc DL = I->getDebugLoc(); const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0); BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill); TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize); BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill); } void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc, unsigned MFLoOpc, unsigned RegSize) { // mflo $vr0, src // store $vr0, FI // mfhi $vr1, src // store $vr1, FI + 4 assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); unsigned SrcKill = getKillRegState(I->getOperand(0).isKill()); DebugLoc DL = I->getDebugLoc(); BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src); TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0); BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill); TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize); } bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) { unsigned Src = I->getOperand(1).getReg(); std::pair Opcodes = getMFHiLoOpc(Src); if (!Opcodes.first) return false; return expandCopyACC(MBB, I, Opcodes.first, Opcodes.second); } bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc, unsigned MFLoOpc) { // mflo $vr0, src // copy dst_lo, $vr0 // mfhi $vr1, src // copy dst_hi, $vr1 unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst); unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16; const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); unsigned SrcKill = getKillRegState(I->getOperand(1).isKill()); unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); DebugLoc DL = I->getDebugLoc(); BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src); BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo) .addReg(VR0, RegState::Kill); BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill); BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi) .addReg(VR1, RegState::Kill); return true; } /// This method expands the same instruction that MipsSEInstrInfo:: /// expandBuildPairF64 does, for the case when ABI is fpxx and mthc1 is not /// available and the case where the ABI is FP64A. It is implemented here /// because frame indexes are eliminated before MipsSEInstrInfo:: /// expandBuildPairF64 is called. bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const { // For fpxx and when mthc1 is not available, use: // spill + reload via ldc1 // // The case where dmtc1 is available doesn't need to be handled here // because it never creates a BuildPairF64 node. // // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence // for odd-numbered double precision values (because the lower 32-bits is // transferred with mtc1 which is redirected to the upper half of the even // register). Unfortunately, we have to make this decision before register // allocation so for now we use a spill/reload sequence for all // double-precision values in regardless of being an odd/even register. - if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || - (FP64 && !Subtarget.useOddSPReg())) { + // + // For the cases that should be covered here MipsSEISelDAGToDAG adds $sp as + // implicit operand, so other passes (like ShrinkWrapping) are aware that + // stack is used. + if (I->getNumOperands() == 4 && I->getOperand(3).isReg() + && I->getOperand(3).getReg() == Mips::SP) { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(); unsigned HiReg = I->getOperand(2).getReg(); // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are // the cases where mthc1 is not available). 64-bit architectures and // MIPS32r2 or later can use FGR64 though. assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() || !Subtarget.isFP64bit()); const TargetRegisterClass *RC = &Mips::GPR32RegClass; const TargetRegisterClass *RC2 = FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; // We re-use the same spill slot each time so that the stack frame doesn't // grow too much in functions with a large number of moves. int FI = MF.getInfo()->getMoveF64ViaSpillFI(RC2); if (!Subtarget.isLittle()) std::swap(LoReg, HiReg); TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &RegInfo, 0); TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &RegInfo, 4); TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, 0); return true; } return false; } /// This method expands the same instruction that MipsSEInstrInfo:: /// expandExtractElementF64 does, for the case when ABI is fpxx and mfhc1 is not /// available and the case where the ABI is FP64A. It is implemented here /// because frame indexes are eliminated before MipsSEInstrInfo:: /// expandExtractElementF64 is called. bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const { const MachineOperand &Op1 = I->getOperand(1); const MachineOperand &Op2 = I->getOperand(2); if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) { unsigned DstReg = I->getOperand(0).getReg(); BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg); return true; } // For fpxx and when mfhc1 is not available, use: // spill + reload via ldc1 // // The case where dmfc1 is available doesn't need to be handled here // because it never creates a ExtractElementF64 node. // // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence // for odd-numbered double precision values (because the lower 32-bits is // transferred with mfc1 which is redirected to the upper half of the even // register). Unfortunately, we have to make this decision before register // allocation so for now we use a spill/reload sequence for all // double-precision values in regardless of being an odd/even register. - - if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || - (FP64 && !Subtarget.useOddSPReg())) { + // + // For the cases that should be covered here MipsSEISelDAGToDAG adds $sp as + // implicit operand, so other passes (like ShrinkWrapping) are aware that + // stack is used. + if (I->getNumOperands() == 4 && I->getOperand(3).isReg() + && I->getOperand(3).getReg() == Mips::SP) { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = Op1.getReg(); unsigned N = Op2.getImm(); int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N)); // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are // the cases where mfhc1 is not available). 64-bit architectures and // MIPS32r2 or later can use FGR64 though. assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() || !Subtarget.isFP64bit()); const TargetRegisterClass *RC = FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; const TargetRegisterClass *RC2 = &Mips::GPR32RegClass; // We re-use the same spill slot each time so that the stack frame doesn't // grow too much in functions with a large number of moves. int FI = MF.getInfo()->getMoveF64ViaSpillFI(RC); TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0); TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset); return true; } return false; } MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI) : MipsFrameLowering(STI, STI.getStackAlignment()) {} void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); const MipsSEInstrInfo &TII = *static_cast(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl; MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned MOVE = ABI.GetGPRMoveOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI.getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI.adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (MF.getFunction().hasFnAttribute("interrupt")) emitInterruptPrologueStub(MF, MBB); const std::vector &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (Mips::FGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } if (MipsFI->callsEhReturn()) { // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI.getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg unsigned VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI.getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = -(int)MFI.getMaxAlignment(); BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); if (hasBP(MF)) { // move $s7, $sp unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP) .addReg(SP) .addReg(ZERO); } } } } void MipsSEFrameLowering::emitInterruptPrologueStub( MachineFunction &MF, MachineBasicBlock &MBB) const { MipsFunctionInfo *MipsFI = MF.getInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Report an error the target doesn't support Mips32r2 or later. // The epilogue relies on the use of the "ehb" to clear execution // hazards. Pre R2 Mips relies on an implementation defined number // of "ssnop"s to clear the execution hazard. Support for ssnop hazard // clearing is not provided so reject that configuration. if (!STI.hasMips32r2()) report_fatal_error( "\"interrupt\" attribute is not supported on pre-MIPS32R2 or " "MIPS16 targets."); // The GP register contains the "user" value, so we cannot perform // any gp relative loads until we restore the "kernel" or "system" gp // value. Until support is written we shall only accept the static // relocation model. if ((STI.getRelocationModel() != Reloc::Static)) report_fatal_error("\"interrupt\" attribute is only supported for the " "static relocation model on MIPS at the present time."); if (!STI.isABI_O32() || STI.hasMips64()) report_fatal_error("\"interrupt\" attribute is only supported for the " "O32 ABI on MIPS32R2+ at the present time."); // Perform ISR handling like GCC StringRef IntKind = MF.getFunction().getFnAttribute("interrupt").getValueAsString(); const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass; // EIC interrupt handling needs to read the Cause register to disable // interrupts. if (IntKind == "eic") { // Coprocessor registers are always live per se. MBB.addLiveIn(Mips::COP013); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K0) .addReg(Mips::COP013) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EXT), Mips::K0) .addReg(Mips::K0) .addImm(10) .addImm(6) .setMIFlag(MachineInstr::FrameSetup); } // Fetch and spill EPC MBB.addLiveIn(Mips::COP014); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1) .addReg(Mips::COP014) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, MipsFI->getISRRegFI(0), PtrRC, STI.getRegisterInfo(), 0); // Fetch and Spill Status MBB.addLiveIn(Mips::COP012); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1) .addReg(Mips::COP012) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, MipsFI->getISRRegFI(1), PtrRC, STI.getRegisterInfo(), 0); // Build the configuration for disabling lower priority interrupts. Non EIC // interrupts need to be masked off with zero, EIC from the Cause register. unsigned InsPosition = 8; unsigned InsSize = 0; unsigned SrcReg = Mips::ZERO; // If the interrupt we're tied to is the EIC, switch the source for the // masking off interrupts to the cause register. if (IntKind == "eic") { SrcReg = Mips::K0; InsPosition = 10; InsSize = 6; } else InsSize = StringSwitch(IntKind) .Case("sw0", 1) .Case("sw1", 2) .Case("hw0", 3) .Case("hw1", 4) .Case("hw2", 5) .Case("hw3", 6) .Case("hw4", 7) .Case("hw5", 8) .Default(0); assert(InsSize != 0 && "Unknown interrupt type!"); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) .addReg(SrcReg) .addImm(InsPosition) .addImm(InsSize) .addReg(Mips::K1) .setMIFlag(MachineInstr::FrameSetup); // Mask off KSU, ERL, EXL BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) .addReg(Mips::ZERO) .addImm(1) .addImm(4) .addReg(Mips::K1) .setMIFlag(MachineInstr::FrameSetup); // Disable the FPU as we are not spilling those register sets. if (!STI.useSoftFloat()) BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) .addReg(Mips::ZERO) .addImm(29) .addImm(1) .addReg(Mips::K1) .setMIFlag(MachineInstr::FrameSetup); // Set the new status BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) .addReg(Mips::K1) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); } void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); MachineFrameInfo &MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); const MipsSEInstrInfo &TII = *static_cast(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast(STI.getRegisterInfo()); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned MOVE = ABI.GetGPRMoveOp(); // if framepointer enabled, restore the stack pointer. if (hasFP(MF)) { // Find the first instruction that restores a callee-saved register. MachineBasicBlock::iterator I = MBBI; for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) --I; // Insert instruction "move $sp, $fp" at this location. BuildMI(MBB, I, DL, TII.get(MOVE), SP).addReg(FP).addReg(ZERO); } if (MipsFI->callsEhReturn()) { const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // Find first instruction that restores a callee-saved register. MachineBasicBlock::iterator I = MBBI; for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) --I; // Insert instructions that restore eh data registers. for (int J = 0; J < 4; ++J) { TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), MipsFI->getEhDataRegFI(J), RC, &RegInfo); } } if (MF.getFunction().hasFnAttribute("interrupt")) emitInterruptEpilogueStub(MF, MBB); // Get the number of bytes from FrameInfo uint64_t StackSize = MFI.getStackSize(); if (!StackSize) return; // Adjust stack. TII.adjustStackPtr(SP, StackSize, MBB, MBBI); } void MipsSEFrameLowering::emitInterruptEpilogueStub( MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MipsFunctionInfo *MipsFI = MF.getInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Perform ISR handling like GCC const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass; // Disable Interrupts. BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::DI), Mips::ZERO); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EHB)); // Restore EPC STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, MipsFI->getISRRegFI(0), PtrRC, STI.getRegisterInfo()); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014) .addReg(Mips::K1) .addImm(0); // Restore Status STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, MipsFI->getISRRegFI(1), PtrRC, STI.getRegisterInfo()); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) .addReg(Mips::K1) .addImm(0); } int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); MipsABIInfo ABI = STI.getABI(); if (MFI.isFixedObjectIndex(FI)) FrameReg = hasFP(MF) ? ABI.GetFramePtr() : ABI.GetStackPtr(); else FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr(); return MFI.getObjectOffset(FI) + MFI.getStackSize() - getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); } bool MipsSEFrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *STI.getInstrInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. Do not add if the register is // RA and return address is taken, because it has already been added in // method MipsTargetLowering::lowerRETURNADDR. // It's killed at the spill, unless the register is RA and return address // is taken. unsigned Reg = CSI[i].getReg(); bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) && MF->getFrameInfo().isReturnAddressTaken(); if (!IsRAAndRetAddrIsTaken) MBB.addLiveIn(Reg); // ISRs require HI/LO to be spilled into kernel registers to be then // spilled to the stack frame. bool IsLOHI = (Reg == Mips::LO0 || Reg == Mips::LO0_64 || Reg == Mips::HI0 || Reg == Mips::HI0_64); const Function &Func = MBB.getParent()->getFunction(); if (IsLOHI && Func.hasFnAttribute("interrupt")) { DebugLoc DL = MI->getDebugLoc(); unsigned Op = 0; if (!STI.getABI().ArePtrs64bit()) { Op = (Reg == Mips::HI0) ? Mips::MFHI : Mips::MFLO; Reg = Mips::K0; } else { Op = (Reg == Mips::HI0) ? Mips::MFHI64 : Mips::MFLO64; Reg = Mips::K0_64; } BuildMI(MBB, MI, DL, TII.get(Op), Mips::K0) .setMIFlag(MachineInstr::FrameSetup); } // Insert the spill to the stack frame. bool IsKill = !IsRAAndRetAddrIsTaken; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CSI[i].getFrameIdx(), RC, TRI); } return true; } bool MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); // Reserve call frame if the size of the maximum call frame fits into 16-bit // immediate field and there are no variable sized objects on the stack. // Make sure the second register scavenger spill slot can be accessed with one // instruction. return isInt<16>(MFI.getMaxCallFrameSize() + getStackAlignment()) && !MFI.hasVarSizedObjects(); } /// Mark \p Reg and all registers aliasing it in the bitset. static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) SavedRegs.set(*AI); } void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7; // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) setAliasRegs(MF, SavedRegs, FP); // Mark $s7 as used if function has dedicated base pointer. if (hasBP(MF)) setAliasRegs(MF, SavedRegs, BP); // Create spill slots for eh data registers if function calls eh_return. if (MipsFI->callsEhReturn()) MipsFI->createEhDataRegsFI(); // Create spill slots for Coprocessor 0 registers if function is an ISR. if (MipsFI->isISR()) MipsFI->createISRRegFI(); // Expand pseudo instructions which load, store or copy accumulators. // Add an emergency spill slot if a pseudo was expanded. if (ExpandPseudo(MF).expand()) { // The spill slot should be half the size of the accumulator. If target have // general-purpose registers 64 bits wide, it should be 64-bit, otherwise // it should be 32-bit. const TargetRegisterClass &RC = STI.isGP64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass; int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), TRI->getSpillAlignment(RC), false); RS->addScavengingFrameIndex(FI); } // Set scavenging frame index if necessary. uint64_t MaxSPOffset = estimateStackSize(MF); // MSA has a minimum offset of 10 bits signed. If there is a variable // sized object on the stack, the estimation cannot account for it. if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) && !MF.getFrameInfo().hasVarSizedObjects()) return; const TargetRegisterClass &RC = ABI.ArePtrs64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass; int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), TRI->getSpillAlignment(RC), false); RS->addScavengingFrameIndex(FI); } const MipsFrameLowering * llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) { return new MipsSEFrameLowering(ST); } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEISelDAGToDAG.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEISelDAGToDAG.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEISelDAGToDAG.cpp (revision 341365) @@ -1,1366 +1,1378 @@ //===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Subclass of MipsDAGToDAGISel specialized for mips32/64. // //===----------------------------------------------------------------------===// #include "MipsSEISelDAGToDAG.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "mips-isel" bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { Subtarget = &static_cast(MF.getSubtarget()); if (Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); } void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); SelectionDAGISel::getAnalysisUsage(AU); } void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF) { MachineInstrBuilder MIB(MF, &MI); unsigned Mask = MI.getOperand(1).getImm(); unsigned Flag = IsDef ? RegState::ImplicitDefine : RegState::Implicit | RegState::Undef; if (Mask & 1) MIB.addReg(Mips::DSPPos, Flag); if (Mask & 2) MIB.addReg(Mips::DSPSCount, Flag); if (Mask & 4) MIB.addReg(Mips::DSPCarry, Flag); if (Mask & 8) MIB.addReg(Mips::DSPOutFlag, Flag); if (Mask & 16) MIB.addReg(Mips::DSPCCond, Flag); if (Mask & 32) MIB.addReg(Mips::DSPEFI, Flag); } unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const { switch (cast(RegIdx)->getZExtValue()) { default: llvm_unreachable("Could not map int to register"); case 0: return Mips::MSAIR; case 1: return Mips::MSACSR; case 2: return Mips::MSAAccess; case 3: return Mips::MSASave; case 4: return Mips::MSAModify; case 5: return Mips::MSARequest; case 6: return Mips::MSAMap; case 7: return Mips::MSAUnmap; } } bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { unsigned DstReg = 0, ZeroReg = 0; // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". if ((MI.getOpcode() == Mips::ADDiu) && (MI.getOperand(1).getReg() == Mips::ZERO) && (MI.getOperand(2).isImm()) && (MI.getOperand(2).getImm() == 0)) { DstReg = MI.getOperand(0).getReg(); ZeroReg = Mips::ZERO; } else if ((MI.getOpcode() == Mips::DADDiu) && (MI.getOperand(1).getReg() == Mips::ZERO_64) && (MI.getOperand(2).isImm()) && (MI.getOperand(2).getImm() == 0)) { DstReg = MI.getOperand(0).getReg(); ZeroReg = Mips::ZERO_64; } if (!DstReg) return false; // Replace uses with ZeroReg. for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), E = MRI->use_end(); U != E;) { MachineOperand &MO = *U; unsigned OpNo = U.getOperandNo(); MachineInstr *MI = MO.getParent(); ++U; // Do not replace if it is a phi's operand or is tied to def operand. if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; // Also, we have to check that the register class of the operand // contains the zero register. if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg)) continue; MO.setReg(ZeroReg); } return true; } void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MipsFunctionInfo *MipsFI = MF.getInfo(); if (!MipsFI->globalBaseRegSet()) return; MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL; unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC; const MipsABIInfo &ABI = static_cast(TM).getABI(); RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; V0 = RegInfo.createVirtualRegister(RC); V1 = RegInfo.createVirtualRegister(RC); if (ABI.IsN64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); MBB.addLiveIn(Mips::T9_64); // lui $v0, %hi(%neg(%gp_rel(fname))) // daddu $v1, $v0, $t9 // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) const GlobalValue *FName = &MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); return; } if (!MF.getTarget().isPositionIndependent()) { // Set global register to __gnu_local_gp. // // lui $v0, %hi(__gnu_local_gp) // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); return; } MF.getRegInfo().addLiveIn(Mips::T9); MBB.addLiveIn(Mips::T9); if (ABI.IsN32()) { // lui $v0, %hi(%neg(%gp_rel(fname))) // addu $v1, $v0, $t9 // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) const GlobalValue *FName = &MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); return; } assert(ABI.IsO32()); // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: // // 0. lui $2, %hi(_gp_disp) // 1. addiu $2, $2, %lo(_gp_disp) // 2. addu $globalbasereg, $2, $t9 // // We emit only the last instruction here. // // GNU linker requires that the first two instructions appear at the beginning // of a function and no instructions be inserted before or between them. // The two instructions are emitted during lowering to MC layer in order to // avoid any reordering. // // Register $2 (Mips::V0) is added to the list of live-in registers to ensure // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) // reads it. MF.getRegInfo().addLiveIn(Mips::V0); MBB.addLiveIn(Mips::V0); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) .addReg(Mips::V0).addReg(Mips::T9); } void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { initGlobalBaseReg(MF); MachineRegisterInfo *MRI = &MF.getRegInfo(); for (auto &MBB: MF) { for (auto &MI: MBB) { switch (MI.getOpcode()) { case Mips::RDDSP: addDSPCtrlRegOperands(false, MI, MF); break; case Mips::WRDSP: addDSPCtrlRegOperands(true, MI, MF); break; + case Mips::BuildPairF64_64: + case Mips::ExtractElementF64_64: + if (!Subtarget->useOddSPReg()) { + MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); + break; + } + // fallthrough + case Mips::BuildPairF64: + case Mips::ExtractElementF64: + if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1()) + MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); + break; default: replaceUsesWithZeroReg(MRI, MI); } } } } void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { SDValue InFlag = Node->getOperand(2); unsigned Opc = InFlag.getOpcode(); SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); // In the base case, we can rely on the carry bit from the addsc // instruction. if (Opc == ISD::ADDC) { SDValue Ops[3] = {LHS, RHS, InFlag}; CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); return; } assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); // The more complex case is when there is a chain of ISD::ADDE nodes like: // (adde (adde (adde (addc a b) c) d) e). // // The addwc instruction does not write to the carry bit, instead it writes // to bit 20 of the dsp control register. To match this series of nodes, each // intermediate adde node must be expanded to write the carry bit before the // addition. // Start by reading the overflow field for addsc and moving the value to the // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP // corresponds to reading/writing the entire control register to/from a GPR. SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); SDNode *DSPCtrlField = CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); SDNode *Carry = CurDAG->getMachineNode( Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); SDValue Ops[4] = {SDValue(DSPCtrlField, 0), CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, SDValue(Carry, 0)}; SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); // My reading of the MIPS DSP 3.01 specification isn't as clear as I // would like about whether bit 20 always gets overwritten by addwc. // Hence take an extremely conservative view and presume it's sticky. We // therefore need to clear it. SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, SDValue(DSPCtrlFinal, 0), CstOne); SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); } /// Match frameindex bool MipsSEDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { EVT ValTy = Addr.getValueType(); Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); return true; } return false; } /// Match frameindex+offset and frameindex|offset bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset( SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, unsigned ShiftAmount = 0) const { if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { EVT ValTy = Addr.getValueType(); // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else { Base = Addr.getOperand(0); // If base is a FI, additional offset calculation is done in // eliminateFrameIndex, otherwise we need to check the alignment if (OffsetToAlignment(CN->getZExtValue(), 1ull << ShiftAmount) != 0) return false; } Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), ValTy); return true; } } return false; } /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { // if Address is FI, get the TargetFrameIndex. if (selectAddrFrameIndex(Addr, Base, Offset)) return true; // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (!TM.isPositionIndependent()) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16)) return true; // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa(Opnd0) || isa(Opnd0) || isa(Opnd0)) { Base = Addr.getOperand(0); Offset = Opnd0; return true; } } } return false; } /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { Base = Addr; Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); return true; } bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const { return selectAddrRegImm(Addr, Base, Offset) || selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9)) return true; return false; } /// Used on microMIPS LWC2, LDC2, SWC2 and SDC2 instructions (11-bit offset) bool MipsSEDAGToDAGISel::selectAddrRegImm11(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 11)) return true; return false; } /// Used on microMIPS Load/Store unaligned instructions (12-bit offset) bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) return true; return false; } bool MipsSEDAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16)) return true; return false; } bool MipsSEDAGToDAGISel::selectIntAddr11MM(SDValue Addr, SDValue &Base, SDValue &Offset) const { return selectAddrRegImm11(Addr, Base, Offset) || selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddr12MM(SDValue Addr, SDValue &Base, SDValue &Offset) const { return selectAddrRegImm12(Addr, Base, Offset) || selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddr16MM(SDValue Addr, SDValue &Base, SDValue &Offset) const { return selectAddrRegImm16(Addr, Base, Offset) || selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndexOffset(Addr, Base, Offset, 7)) { if (isa(Base)) return false; if (ConstantSDNode *CN = dyn_cast(Offset)) { unsigned CnstOff = CN->getZExtValue(); return (CnstOff == (CnstOff & 0x3c)); } return false; } // For all other cases where "lw" would be selected, don't select "lw16" // because it would result in additional instructions to prepare operands. if (selectAddrRegImm(Addr, Base, Offset)) return false; return selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) return true; return selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1)) return true; return selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2)) return true; return selectAddrDefault(Addr, Base, Offset); } bool MipsSEDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, SDValue &Offset) const { if (selectAddrFrameIndex(Addr, Base, Offset)) return true; if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3)) return true; return selectAddrDefault(Addr, Base, Offset); } // Select constant vector splats. // // Returns true and sets Imm if: // * MSA is enabled // * N is a ISD::BUILD_VECTOR representing a constant splat bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const { if (!Subtarget->hasMSA()) return false; BuildVectorSDNode *Node = dyn_cast(N); if (!Node) return false; APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, MinSizeInBits, !Subtarget->isLittle())) return false; Imm = SplatValue; return true; } // Select constant vector splats. // // In addition to the requirements of selectVSplat(), this function returns // true and sets Imm if: // * The splat value is the same width as the elements of the vector // * The splat value fits in an integer with the specified signed-ness and // width. // // This function looks through ISD::BITCAST nodes. // TODO: This might not be appropriate for big-endian MSA since BITCAST is // sometimes a shuffle in big-endian mode. // // It's worth noting that this function is not used as part of the selection // of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd] // instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in // MipsSEDAGToDAGISel::selectNode. bool MipsSEDAGToDAGISel:: selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, unsigned ImmBitSize) const { APInt ImmValue; EVT EltTy = N->getValueType(0).getVectorElementType(); if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ImmValue.getBitWidth() == EltTy.getSizeInBits()) { if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) || (!Signed && ImmValue.isIntN(ImmBitSize))) { Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); return true; } } return false; } // Select constant vector splats. bool MipsSEDAGToDAGISel:: selectVSplatUimm1(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 1); } bool MipsSEDAGToDAGISel:: selectVSplatUimm2(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 2); } bool MipsSEDAGToDAGISel:: selectVSplatUimm3(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 3); } // Select constant vector splats. bool MipsSEDAGToDAGISel:: selectVSplatUimm4(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 4); } // Select constant vector splats. bool MipsSEDAGToDAGISel:: selectVSplatUimm5(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 5); } // Select constant vector splats. bool MipsSEDAGToDAGISel:: selectVSplatUimm6(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 6); } // Select constant vector splats. bool MipsSEDAGToDAGISel:: selectVSplatUimm8(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, false, 8); } // Select constant vector splats. bool MipsSEDAGToDAGISel:: selectVSplatSimm5(SDValue N, SDValue &Imm) const { return selectVSplatCommon(N, Imm, true, 5); } // Select constant vector splats whose value is a power of 2. // // In addition to the requirements of selectVSplat(), this function returns // true and sets Imm if: // * The splat value is the same width as the elements of the vector // * The splat value is a power of two. // // This function looks through ISD::BITCAST nodes. // TODO: This might not be appropriate for big-endian MSA since BITCAST is // sometimes a shuffle in big-endian mode. bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { APInt ImmValue; EVT EltTy = N->getValueType(0).getVectorElementType(); if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ImmValue.getBitWidth() == EltTy.getSizeInBits()) { int32_t Log2 = ImmValue.exactLogBase2(); if (Log2 != -1) { Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); return true; } } return false; } // Select constant vector splats whose value only has a consecutive sequence // of left-most bits set (e.g. 0b11...1100...00). // // In addition to the requirements of selectVSplat(), this function returns // true and sets Imm if: // * The splat value is the same width as the elements of the vector // * The splat value is a consecutive sequence of left-most bits. // // This function looks through ISD::BITCAST nodes. // TODO: This might not be appropriate for big-endian MSA since BITCAST is // sometimes a shuffle in big-endian mode. bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { APInt ImmValue; EVT EltTy = N->getValueType(0).getVectorElementType(); if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ImmValue.getBitWidth() == EltTy.getSizeInBits()) { // Extract the run of set bits starting with bit zero from the bitwise // inverse of ImmValue, and test that the inverse of this is the same // as the original value. if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), EltTy); return true; } } return false; } // Select constant vector splats whose value only has a consecutive sequence // of right-most bits set (e.g. 0b00...0011...11). // // In addition to the requirements of selectVSplat(), this function returns // true and sets Imm if: // * The splat value is the same width as the elements of the vector // * The splat value is a consecutive sequence of right-most bits. // // This function looks through ISD::BITCAST nodes. // TODO: This might not be appropriate for big-endian MSA since BITCAST is // sometimes a shuffle in big-endian mode. bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { APInt ImmValue; EVT EltTy = N->getValueType(0).getVectorElementType(); if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ImmValue.getBitWidth() == EltTy.getSizeInBits()) { // Extract the run of set bits starting with bit zero, and test that the // result is the same as the original value if (ImmValue == (ImmValue & ~(ImmValue + 1))) { Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), EltTy); return true; } } return false; } bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const { APInt ImmValue; EVT EltTy = N->getValueType(0).getVectorElementType(); if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ImmValue.getBitWidth() == EltTy.getSizeInBits()) { int32_t Log2 = (~ImmValue).exactLogBase2(); if (Log2 != -1) { Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); return true; } } return false; } bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); /// // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// switch(Opcode) { default: break; case ISD::ADDE: { selectAddE(Node, DL); return true; } case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget->isGP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); ReplaceNode(Node, CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero)); } else if (Subtarget->isFP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); ReplaceNode(Node, CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64, Zero, Zero)); } else { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); ReplaceNode(Node, CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, Zero)); } return true; } break; } case ISD::Constant: { const ConstantSDNode *CN = dyn_cast(Node); int64_t Imm = CN->getSExtValue(); unsigned Size = CN->getValueSizeInBits(0); if (isInt<32>(Imm)) break; MipsAnalyzeImmediate AnalyzeImm; const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, false); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); SDLoc DL(CN); SDNode *RegOpnd; SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), DL, MVT::i64); // The first instruction can be a LUi which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. if (Inst->Opc == Mips::LUi64) RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); else RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, CurDAG->getRegister(Mips::ZERO_64, MVT::i64), ImmOpnd); // The remaining instructions in the sequence are handled here. for (++Inst; Inst != Seq.end(); ++Inst) { ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), DL, MVT::i64); RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, SDValue(RegOpnd, 0), ImmOpnd); } ReplaceNode(Node, RegOpnd); return true; } case ISD::INTRINSIC_W_CHAIN: { switch (cast(Node->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::mips_cfcmsa: { SDValue ChainIn = Node->getOperand(0); SDValue RegIdx = Node->getOperand(2); SDValue Reg = CurDAG->getCopyFromReg(ChainIn, DL, getMSACtrlReg(RegIdx), MVT::i32); ReplaceNode(Node, Reg.getNode()); return true; } } break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast(Node->getOperand(0))->getZExtValue()) { default: break; case Intrinsic::mips_move_v: // Like an assignment but will always produce a move.v even if // unnecessary. ReplaceNode(Node, CurDAG->getMachineNode(Mips::MOVE_V, DL, Node->getValueType(0), Node->getOperand(1))); return true; } break; } case ISD::INTRINSIC_VOID: { switch (cast(Node->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::mips_ctcmsa: { SDValue ChainIn = Node->getOperand(0); SDValue RegIdx = Node->getOperand(2); SDValue Value = Node->getOperand(3); SDValue ChainOut = CurDAG->getCopyToReg(ChainIn, DL, getMSACtrlReg(RegIdx), Value); ReplaceNode(Node, ChainOut.getNode()); return true; } } break; } // Manually match MipsISD::Ins nodes to get the correct instruction. It has // to be done in this fashion so that we respect the differences between // dins and dinsm, as the difference is that the size operand has the range // 0 < size <= 32 for dins while dinsm has the range 2 <= size <= 64 which // means SelectionDAGISel would have to test all the operands at once to // match the instruction. case MipsISD::Ins: { // Sanity checking for the node operands. if (Node->getValueType(0) != MVT::i32 && Node->getValueType(0) != MVT::i64) return false; if (Node->getNumOperands() != 4) return false; if (Node->getOperand(1)->getOpcode() != ISD::Constant || Node->getOperand(2)->getOpcode() != ISD::Constant) return false; MVT ResTy = Node->getSimpleValueType(0); uint64_t Pos = Node->getConstantOperandVal(1); uint64_t Size = Node->getConstantOperandVal(2); // Size has to be >0 for 'ins', 'dins' and 'dinsu'. if (!Size) return false; if (Pos + Size > 64) return false; if (ResTy != MVT::i32 && ResTy != MVT::i64) return false; unsigned Opcode = 0; if (ResTy == MVT::i32) { if (Pos + Size <= 32) Opcode = Mips::INS; } else { if (Pos + Size <= 32) Opcode = Mips::DINS; else if (Pos < 32 && 1 < Size) Opcode = Mips::DINSM; else Opcode = Mips::DINSU; } if (Opcode) { SDValue Ops[4] = { Node->getOperand(0), CurDAG->getTargetConstant(Pos, DL, MVT::i32), CurDAG->getTargetConstant(Size, DL, MVT::i32), Node->getOperand(3)}; ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, ResTy, Ops)); return true; } return false; } case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout()); unsigned RdhwrOpc, DestReg; if (PtrVT == MVT::i32) { RdhwrOpc = Mips::RDHWR; DestReg = Mips::V1; } else { RdhwrOpc = Mips::RDHWR64; DestReg = Mips::V1_64; } SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), CurDAG->getRegister(Mips::HWR29, MVT::i32), CurDAG->getTargetConstant(0, DL, MVT::i32)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, SDValue(Rdhwr, 0)); SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); ReplaceNode(Node, ResNode.getNode()); return true; } case ISD::BUILD_VECTOR: { // Select appropriate ldi.[bhwd] instructions for constant splats of // 128-bit when MSA is enabled. Fixup any register class mismatches that // occur as a result. // // This allows the compiler to use a wider range of immediates than would // otherwise be allowed. If, for example, v4i32 could only use ldi.h then // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101, // 0x01010101 } without using a constant pool. This would be sub-optimal // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the // same set/ of registers. Similarly, ldi.h isn't capable of producing { // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can. const MipsABIInfo &ABI = static_cast(TM).getABI(); BuildVectorSDNode *BVN = cast(Node); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; unsigned LdiOp; EVT ResVecTy = BVN->getValueType(0); EVT ViaVecTy; if (!Subtarget->hasMSA() || !BVN->getValueType(0).is128BitVector()) return false; if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, !Subtarget->isLittle())) return false; switch (SplatBitSize) { default: return false; case 8: LdiOp = Mips::LDI_B; ViaVecTy = MVT::v16i8; break; case 16: LdiOp = Mips::LDI_H; ViaVecTy = MVT::v8i16; break; case 32: LdiOp = Mips::LDI_W; ViaVecTy = MVT::v4i32; break; case 64: LdiOp = Mips::LDI_D; ViaVecTy = MVT::v2i64; break; } SDNode *Res; // If we have a signed 10 bit integer, we can splat it directly. // // If we have something bigger we can synthesize the value into a GPR and // splat from there. if (SplatValue.isSignedIntN(10)) { SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, ViaVecTy.getVectorElementType()); Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); } else if (SplatValue.isSignedIntN(16) && ((ABI.IsO32() && SplatBitSize < 64) || (ABI.IsN32() || ABI.IsN64()))) { // Only handle signed 16 bit values when the element size is GPR width. // MIPS64 can handle all the cases but MIPS32 would need to handle // negative cases specifically here. Instead, handle those cases as // 64bit values. bool Is32BitSplat = ABI.IsO32() || SplatBitSize < 64; const unsigned ADDiuOp = Is32BitSplat ? Mips::ADDiu : Mips::DADDiu; const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64; SDValue ZeroVal = CurDAG->getRegister( Is32BitSplat ? Mips::ZERO : Mips::ZERO_64, SplatMVT); const unsigned FILLOp = SplatBitSize == 16 ? Mips::FILL_H : (SplatBitSize == 32 ? Mips::FILL_W : (SplatBitSize == 64 ? Mips::FILL_D : 0)); assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!"); assert((!ABI.IsO32() || (FILLOp != Mips::FILL_D)) && "Attempting to use fill.d on MIPS32!"); const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT); Res = CurDAG->getMachineNode(ADDiuOp, DL, SplatMVT, ZeroVal, LoVal); Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0)); } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) { // Only handle the cases where the splat size agrees with the size // of the SplatValue here. const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue(); SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32); SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); if (Hi) Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal); if (Lo) Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, Hi ? SDValue(Res, 0) : ZeroVal, LoVal); assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0)); } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 64 && (ABI.IsN32() || ABI.IsN64())) { // N32 and N64 can perform some tricks that O32 can't for signed 32 bit // integers due to having 64bit registers. lui will cause the necessary // zero/sign extension. const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue(); SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32); SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); if (Hi) Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal); if (Lo) Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, Hi ? SDValue(Res, 0) : ZeroVal, LoVal); Res = CurDAG->getMachineNode( Mips::SUBREG_TO_REG, DL, MVT::i64, CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64), SDValue(Res, 0), CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64)); Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0)); } else if (SplatValue.isSignedIntN(64)) { // If we have a 64 bit Splat value, we perform a similar sequence to the // above: // // MIPS32: MIPS64: // lui $res, %highest(val) lui $res, %highest(val) // ori $res, $res, %higher(val) ori $res, $res, %higher(val) // lui $res2, %hi(val) lui $res2, %hi(val) // ori $res2, %res2, %lo(val) ori $res2, %res2, %lo(val) // $res3 = fill $res2 dinsu $res, $res2, 0, 32 // $res4 = insert.w $res3[1], $res fill.d $res // splat.d $res4, 0 // // The ability to use dinsu is guaranteed as MSA requires MIPSR5. This saves // having to materialize the value by shifts and ors. // // FIXME: Implement the preferred sequence for MIPS64R6: // // MIPS64R6: // ori $res, $zero, %lo(val) // daui $res, $res, %hi(val) // dahi $res, $res, %higher(val) // dati $res, $res, %highest(cal) // fill.d $res // const unsigned Lo = SplatValue.getLoBits(16).getZExtValue(); const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue(); const unsigned Higher = SplatValue.lshr(32).getLoBits(16).getZExtValue(); const unsigned Highest = SplatValue.lshr(48).getLoBits(16).getZExtValue(); SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); SDValue HigherVal = CurDAG->getTargetConstant(Higher, DL, MVT::i32); SDValue HighestVal = CurDAG->getTargetConstant(Highest, DL, MVT::i32); SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32); // Independent of whether we're targeting MIPS64 or not, the basic // operations are the same. Also, directly use the $zero register if // the 16 bit chunk is zero. // // For optimization purposes we always synthesize the splat value as // an i32 value, then if we're targetting MIPS64, use SUBREG_TO_REG // just before combining the values with dinsu to produce an i64. This // enables SelectionDAG to aggressively share components of splat values // where possible. // // FIXME: This is the general constant synthesis problem. This code // should be factored out into a class shared between all the // classes that need it. Specifically, for a splat size of 64 // bits that's a negative number we can do better than LUi/ORi // for the upper 32bits. if (Hi) Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal); if (Lo) Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, Hi ? SDValue(Res, 0) : ZeroVal, LoVal); SDNode *HiRes; if (Highest) HiRes = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HighestVal); if (Higher) HiRes = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32, Highest ? SDValue(HiRes, 0) : ZeroVal, HigherVal); if (ABI.IsO32()) { Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, (Hi || Lo) ? SDValue(Res, 0) : ZeroVal); Res = CurDAG->getMachineNode( Mips::INSERT_W, DL, MVT::v4i32, SDValue(Res, 0), (Highest || Higher) ? SDValue(HiRes, 0) : ZeroVal, CurDAG->getTargetConstant(1, DL, MVT::i32)); const TargetLowering *TLI = getTargetLowering(); const TargetRegisterClass *RC = TLI->getRegClassFor(ViaVecTy.getSimpleVT()); Res = CurDAG->getMachineNode( Mips::COPY_TO_REGCLASS, DL, ViaVecTy, SDValue(Res, 0), CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); Res = CurDAG->getMachineNode( Mips::SPLATI_D, DL, MVT::v2i64, SDValue(Res, 0), CurDAG->getTargetConstant(0, DL, MVT::i32)); } else if (ABI.IsN64() || ABI.IsN32()) { SDValue Zero64Val = CurDAG->getRegister(Mips::ZERO_64, MVT::i64); const bool HiResNonZero = Highest || Higher; const bool ResNonZero = Hi || Lo; if (HiResNonZero) HiRes = CurDAG->getMachineNode( Mips::SUBREG_TO_REG, DL, MVT::i64, CurDAG->getTargetConstant(((Highest >> 15) & 0x1), DL, MVT::i64), SDValue(HiRes, 0), CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64)); if (ResNonZero) Res = CurDAG->getMachineNode( Mips::SUBREG_TO_REG, DL, MVT::i64, CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64), SDValue(Res, 0), CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64)); // We have 3 cases: // The HiRes is nonzero but Res is $zero => dsll32 HiRes, 0 // The Res is nonzero but HiRes is $zero => dinsu Res, $zero, 32, 32 // Both are non zero => dinsu Res, HiRes, 32, 32 // // The obvious "missing" case is when both are zero, but that case is // handled by the ldi case. if (ResNonZero) { IntegerType *Int32Ty = IntegerType::get(MF->getFunction().getContext(), 32); const ConstantInt *Const32 = ConstantInt::get(Int32Ty, 32); SDValue Ops[4] = {HiResNonZero ? SDValue(HiRes, 0) : Zero64Val, CurDAG->getConstant(*Const32, DL, MVT::i32), CurDAG->getConstant(*Const32, DL, MVT::i32), SDValue(Res, 0)}; Res = CurDAG->getMachineNode(Mips::DINSU, DL, MVT::i64, Ops); } else if (HiResNonZero) { Res = CurDAG->getMachineNode( Mips::DSLL32, DL, MVT::i64, SDValue(HiRes, 0), CurDAG->getTargetConstant(0, DL, MVT::i32)); } else llvm_unreachable( "Zero splat value handled by non-zero 64bit splat synthesis!"); Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0)); } else llvm_unreachable("Unknown ABI in MipsISelDAGToDAG!"); } else return false; if (ResVecTy != ViaVecTy) { // If LdiOp is writing to a different register class to ResVecTy, then // fix it up here. This COPY_TO_REGCLASS should never cause a move.v // since the source and destination register sets contain the same // registers. const TargetLowering *TLI = getTargetLowering(); MVT ResVecTySimple = ResVecTy.getSimpleVT(); const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, DL, ResVecTy, SDValue(Res, 0), CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); } ReplaceNode(Node, Res); return true; } } return false; } bool MipsSEDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { SDValue Base, Offset; switch(ConstraintID) { default: llvm_unreachable("Unexpected asm memory constraint"); // All memory constraints can at least accept raw pointers. case InlineAsm::Constraint_i: OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; case InlineAsm::Constraint_m: if (selectAddrRegImm16(Op, Base, Offset)) { OutOps.push_back(Base); OutOps.push_back(Offset); return false; } OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; case InlineAsm::Constraint_R: // The 'R' constraint is supposed to be much more complicated than this. // However, it's becoming less useful due to architectural changes and // ought to be replaced by other constraints such as 'ZC'. // For now, support 9-bit signed offsets which is supportable by all // subtargets for all instructions. if (selectAddrRegImm9(Op, Base, Offset)) { OutOps.push_back(Base); OutOps.push_back(Offset); return false; } OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; case InlineAsm::Constraint_ZC: // ZC matches whatever the pref, ll, and sc instructions can handle for the // given subtarget. if (Subtarget->inMicroMipsMode()) { // On microMIPS, they can handle 12-bit offsets. if (selectAddrRegImm12(Op, Base, Offset)) { OutOps.push_back(Base); OutOps.push_back(Offset); return false; } } else if (Subtarget->hasMips32r6()) { // On MIPS32r6/MIPS64r6, they can only handle 9-bit offsets. if (selectAddrRegImm9(Op, Base, Offset)) { OutOps.push_back(Base); OutOps.push_back(Offset); return false; } } else if (selectAddrRegImm16(Op, Base, Offset)) { // Prior to MIPS32r6/MIPS64r6, they can handle 16-bit offsets. OutOps.push_back(Base); OutOps.push_back(Offset); return false; } // In all cases, 0-bit offsets are acceptable. OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; } return true; } FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new MipsSEDAGToDAGISel(TM, OptLevel); } Index: vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEInstrInfo.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEInstrInfo.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/Mips/MipsSEInstrInfo.cpp (revision 341365) @@ -1,895 +1,900 @@ //===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the Mips32/64 implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// #include "MipsSEInstrInfo.h" #include "InstPrinter/MipsInstPrinter.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; +static unsigned getUnconditionalBranch(const MipsSubtarget &STI) { + if (STI.inMicroMipsMode()) + return STI.isPositionIndependent() ? Mips::B_MM : Mips::J_MM; + return STI.isPositionIndependent() ? Mips::B : Mips::J; +} + MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, STI.isPositionIndependent() ? Mips::B : Mips::J), - RI() {} + : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI() {} const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. unsigned MipsSEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { unsigned Opc = MI.getOpcode(); if ((Opc == Mips::LW) || (Opc == Mips::LD) || (Opc == Mips::LWC1) || (Opc == Mips::LDC1) || (Opc == Mips::LDC164)) { if ((MI.getOperand(1).isFI()) && // is a stack slot (MI.getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI.getOperand(2)))) { FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } } return 0; } /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. unsigned MipsSEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { unsigned Opc = MI.getOpcode(); if ((Opc == Mips::SW) || (Opc == Mips::SD) || (Opc == Mips::SWC1) || (Opc == Mips::SDC1) || (Opc == Mips::SDC164)) { if ((MI.getOperand(1).isFI()) && // is a stack slot (MI.getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI.getOperand(2)))) { FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } } return 0; } void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { unsigned Opc = 0, ZeroReg = 0; bool isMicroMips = Subtarget.inMicroMipsMode(); if (Mips::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg. if (Mips::GPR32RegClass.contains(SrcReg)) { if (isMicroMips) Opc = Mips::MOVE16_MM; else Opc = Mips::OR, ZeroReg = Mips::ZERO; } else if (Mips::CCRRegClass.contains(SrcReg)) Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) Opc = Mips::MFC1; else if (Mips::HI32RegClass.contains(SrcReg)) { Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; SrcReg = 0; } else if (Mips::LO32RegClass.contains(SrcReg)) { Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO; SrcReg = 0; } else if (Mips::HI32DSPRegClass.contains(SrcReg)) Opc = Mips::MFHI_DSP; else if (Mips::LO32DSPRegClass.contains(SrcReg)) Opc = Mips::MFLO_DSP; else if (Mips::DSPCCRegClass.contains(SrcReg)) { BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4) .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); return; } else if (Mips::MSACtrlRegClass.contains(SrcReg)) Opc = Mips::CFCMSA; } else if (Mips::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. if (Mips::CCRRegClass.contains(DestReg)) Opc = Mips::CTC1; else if (Mips::FGR32RegClass.contains(DestReg)) Opc = Mips::MTC1; else if (Mips::HI32RegClass.contains(DestReg)) Opc = Mips::MTHI, DestReg = 0; else if (Mips::LO32RegClass.contains(DestReg)) Opc = Mips::MTLO, DestReg = 0; else if (Mips::HI32DSPRegClass.contains(DestReg)) Opc = Mips::MTHI_DSP; else if (Mips::LO32DSPRegClass.contains(DestReg)) Opc = Mips::MTLO_DSP; else if (Mips::DSPCCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(Mips::WRDSP)) .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1 << 4) .addReg(DestReg, RegState::ImplicitDefine); return; } else if (Mips::MSACtrlRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(Mips::CTCMSA)) .addReg(DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } } else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_S; else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_D32; else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_D64; else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::GPR64RegClass.contains(SrcReg)) Opc = Mips::OR64, ZeroReg = Mips::ZERO_64; else if (Mips::HI64RegClass.contains(SrcReg)) Opc = Mips::MFHI64, SrcReg = 0; else if (Mips::LO64RegClass.contains(SrcReg)) Opc = Mips::MFLO64, SrcReg = 0; else if (Mips::FGR64RegClass.contains(SrcReg)) Opc = Mips::DMFC1; } else if (Mips::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. if (Mips::HI64RegClass.contains(DestReg)) Opc = Mips::MTHI64, DestReg = 0; else if (Mips::LO64RegClass.contains(DestReg)) Opc = Mips::MTLO64, DestReg = 0; else if (Mips::FGR64RegClass.contains(DestReg)) Opc = Mips::DMTC1; } else if (Mips::MSA128BRegClass.contains(DestReg)) { // Copy to MSA reg if (Mips::MSA128BRegClass.contains(SrcReg)) Opc = Mips::MOVE_V; } assert(Opc && "Cannot copy registers"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); if (DestReg) MIB.addReg(DestReg, RegState::Define); if (SrcReg) MIB.addReg(SrcReg, getKillRegState(KillSrc)); if (ZeroReg) MIB.addReg(ZeroReg); } static bool isORCopyInst(const MachineInstr &MI) { switch (MI.getOpcode()) { default: break; case Mips::OR_MM: case Mips::OR: if (MI.getOperand(2).getReg() == Mips::ZERO) return true; break; case Mips::OR64: if (MI.getOperand(2).getReg() == Mips::ZERO_64) return true; break; } return false; } /// If @MI is WRDSP/RRDSP instruction return true with @isWrite set to true /// if it is WRDSP instruction. static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) { switch (MI.getOpcode()) { default: return false; case Mips::WRDSP: case Mips::WRDSP_MM: isWrite = true; break; case Mips::RDDSP: case Mips::RDDSP_MM: isWrite = false; break; } return true; } /// We check for the common case of 'or', as it's MIPS' preferred instruction /// for GPRs but we have to check the operands to ensure that is the case. /// Other move instructions for MIPS are directly identifiable. bool MipsSEInstrInfo::isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src, const MachineOperand *&Dest) const { bool isDSPControlWrite = false; // Condition is made to match the creation of WRDSP/RDDSP copy instruction // from copyPhysReg function. if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) { if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1<<4)) return false; else if (isDSPControlWrite) { Src = &MI.getOperand(0); Dest = &MI.getOperand(2); } else { Dest = &MI.getOperand(0); Src = &MI.getOperand(2); } return true; } else if (MI.isMoveReg() || isORCopyInst(MI)) { Dest = &MI.getOperand(0); Src = &MI.getOperand(1); return true; } return false; } void MipsSEInstrInfo:: storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); unsigned Opc = 0; if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) Opc = Mips::STORE_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) Opc = Mips::STORE_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) Opc = Mips::STORE_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = Mips::STORE_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = Mips::SWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::ST_B; else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::ST_H; else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::ST_W; else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::ST_D; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; else if (Mips::DSPRRegClass.hasSubClassEq(RC)) Opc = Mips::SWDSP; // Hi, Lo are normally caller save but they are callee save // for interrupt handling. const Function &Func = MBB.getParent()->getFunction(); if (Func.hasFnAttribute("interrupt")) { if (Mips::HI32RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0); SrcReg = Mips::K0; } else if (Mips::HI64RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFHI64), Mips::K0_64); SrcReg = Mips::K0_64; } else if (Mips::LO32RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFLO), Mips::K0); SrcReg = Mips::K0; } else if (Mips::LO64RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFLO64), Mips::K0_64); SrcReg = Mips::K0_64; } } assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } void MipsSEInstrInfo:: loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; const Function &Func = MBB.getParent()->getFunction(); bool ReqIndirectLoad = Func.hasFnAttribute("interrupt") && (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 || DestReg == Mips::HI0 || DestReg == Mips::HI0_64); if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::DSPRRegClass.hasSubClassEq(RC)) Opc = Mips::LWDSP; assert(Opc && "Register class not handled!"); if (!ReqIndirectLoad) BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); else { // Load HI/LO through K0. Notably the DestReg is encoded into the // instruction itself. unsigned Reg = Mips::K0; unsigned LdOp = Mips::MTLO; if (DestReg == Mips::HI0) LdOp = Mips::MTHI; if (Subtarget.getABI().ArePtrs64bit()) { Reg = Mips::K0_64; if (DestReg == Mips::HI0_64) LdOp = Mips::MTHI64; else LdOp = Mips::MTLO64; } BuildMI(MBB, I, DL, get(Opc), Reg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg); } } bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineBasicBlock &MBB = *MI.getParent(); bool isMicroMips = Subtarget.inMicroMipsMode(); unsigned Opc; switch (MI.getDesc().getOpcode()) { default: return false; case Mips::RetRA: expandRetRA(MBB, MI); break; case Mips::ERet: expandERet(MBB, MI); break; case Mips::PseudoMFHI: Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; expandPseudoMFHiLo(MBB, MI, Opc); break; case Mips::PseudoMFLO: Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO; expandPseudoMFHiLo(MBB, MI, Opc); break; case Mips::PseudoMFHI64: expandPseudoMFHiLo(MBB, MI, Mips::MFHI64); break; case Mips::PseudoMFLO64: expandPseudoMFHiLo(MBB, MI, Mips::MFLO64); break; case Mips::PseudoMTLOHI: expandPseudoMTLoHi(MBB, MI, Mips::MTLO, Mips::MTHI, false); break; case Mips::PseudoMTLOHI64: expandPseudoMTLoHi(MBB, MI, Mips::MTLO64, Mips::MTHI64, false); break; case Mips::PseudoMTLOHI_DSP: expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true); break; case Mips::PseudoCVT_S_W: expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false); break; case Mips::PseudoCVT_D32_W: Opc = isMicroMips ? Mips::CVT_D32_W_MM : Mips::CVT_D32_W; expandCvtFPInt(MBB, MI, Opc, Mips::MTC1, false); break; case Mips::PseudoCVT_S_L: expandCvtFPInt(MBB, MI, Mips::CVT_S_L, Mips::DMTC1, true); break; case Mips::PseudoCVT_D64_W: Opc = isMicroMips ? Mips::CVT_D64_W_MM : Mips::CVT_D64_W; expandCvtFPInt(MBB, MI, Opc, Mips::MTC1, true); break; case Mips::PseudoCVT_D64_L: expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true); break; case Mips::BuildPairF64: expandBuildPairF64(MBB, MI, isMicroMips, false); break; case Mips::BuildPairF64_64: expandBuildPairF64(MBB, MI, isMicroMips, true); break; case Mips::ExtractElementF64: expandExtractElementF64(MBB, MI, isMicroMips, false); break; case Mips::ExtractElementF64_64: expandExtractElementF64(MBB, MI, isMicroMips, true); break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: expandEhReturn(MBB, MI); break; } MBB.erase(MI); return true; } /// getOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); case Mips::BEQ: return Mips::BNE; case Mips::BEQ_MM: return Mips::BNE_MM; case Mips::BNE: return Mips::BEQ; case Mips::BNE_MM: return Mips::BEQ_MM; case Mips::BGTZ: return Mips::BLEZ; case Mips::BGEZ: return Mips::BLTZ; case Mips::BLTZ: return Mips::BGEZ; case Mips::BLEZ: return Mips::BGTZ; case Mips::BGTZ_MM: return Mips::BLEZ_MM; case Mips::BGEZ_MM: return Mips::BLTZ_MM; case Mips::BLTZ_MM: return Mips::BGEZ_MM; case Mips::BLEZ_MM: return Mips::BGTZ_MM; case Mips::BEQ64: return Mips::BNE64; case Mips::BNE64: return Mips::BEQ64; case Mips::BGTZ64: return Mips::BLEZ64; case Mips::BGEZ64: return Mips::BLTZ64; case Mips::BLTZ64: return Mips::BGEZ64; case Mips::BLEZ64: return Mips::BGTZ64; case Mips::BC1T: return Mips::BC1F; case Mips::BC1F: return Mips::BC1T; case Mips::BC1T_MM: return Mips::BC1F_MM; case Mips::BC1F_MM: return Mips::BC1T_MM; case Mips::BEQZ16_MM: return Mips::BNEZ16_MM; case Mips::BNEZ16_MM: return Mips::BEQZ16_MM; case Mips::BEQZC_MM: return Mips::BNEZC_MM; case Mips::BNEZC_MM: return Mips::BEQZC_MM; case Mips::BEQZC: return Mips::BNEZC; case Mips::BNEZC: return Mips::BEQZC; case Mips::BLEZC: return Mips::BGTZC; case Mips::BGEZC: return Mips::BLTZC; case Mips::BGEC: return Mips::BLTC; case Mips::BGTZC: return Mips::BLEZC; case Mips::BLTZC: return Mips::BGEZC; case Mips::BLTC: return Mips::BGEC; case Mips::BGEUC: return Mips::BLTUC; case Mips::BLTUC: return Mips::BGEUC; case Mips::BEQC: return Mips::BNEC; case Mips::BNEC: return Mips::BEQC; case Mips::BC1EQZ: return Mips::BC1NEZ; case Mips::BC1NEZ: return Mips::BC1EQZ; case Mips::BEQZC_MMR6: return Mips::BNEZC_MMR6; case Mips::BNEZC_MMR6: return Mips::BEQZC_MMR6; case Mips::BLEZC_MMR6: return Mips::BGTZC_MMR6; case Mips::BGEZC_MMR6: return Mips::BLTZC_MMR6; case Mips::BGEC_MMR6: return Mips::BLTC_MMR6; case Mips::BGTZC_MMR6: return Mips::BLEZC_MMR6; case Mips::BLTZC_MMR6: return Mips::BGEZC_MMR6; case Mips::BLTC_MMR6: return Mips::BGEC_MMR6; case Mips::BGEUC_MMR6: return Mips::BLTUC_MMR6; case Mips::BLTUC_MMR6: return Mips::BGEUC_MMR6; case Mips::BEQC_MMR6: return Mips::BNEC_MMR6; case Mips::BNEC_MMR6: return Mips::BEQC_MMR6; case Mips::BC1EQZC_MMR6: return Mips::BC1NEZC_MMR6; case Mips::BC1NEZC_MMR6: return Mips::BC1EQZC_MMR6; case Mips::BEQZC64: return Mips::BNEZC64; case Mips::BNEZC64: return Mips::BEQZC64; case Mips::BEQC64: return Mips::BNEC64; case Mips::BNEC64: return Mips::BEQC64; case Mips::BGEC64: return Mips::BLTC64; case Mips::BGEUC64: return Mips::BLTUC64; case Mips::BLTC64: return Mips::BGEC64; case Mips::BLTUC64: return Mips::BGEUC64; case Mips::BGTZC64: return Mips::BLEZC64; case Mips::BGEZC64: return Mips::BLTZC64; case Mips::BLTZC64: return Mips::BGEZC64; case Mips::BLEZC64: return Mips::BGTZC64; case Mips::BBIT0: return Mips::BBIT1; case Mips::BBIT1: return Mips::BBIT0; case Mips::BBIT032: return Mips::BBIT132; case Mips::BBIT132: return Mips::BBIT032; case Mips::BZ_B: return Mips::BNZ_B; case Mips::BZ_H: return Mips::BNZ_H; case Mips::BZ_W: return Mips::BNZ_W; case Mips::BZ_D: return Mips::BNZ_D; case Mips::BZ_V: return Mips::BNZ_V; case Mips::BNZ_B: return Mips::BZ_B; case Mips::BNZ_H: return Mips::BZ_H; case Mips::BNZ_W: return Mips::BZ_W; case Mips::BNZ_D: return Mips::BZ_D; case Mips::BNZ_V: return Mips::BZ_V; } } /// Adjust SP by Amount bytes. void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MipsABIInfo ABI = Subtarget.getABI(); DebugLoc DL; unsigned ADDiu = ABI.GetPtrAddiuOp(); if (Amount == 0) return; if (isInt<16>(Amount)) { // addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); } else { // For numbers which are not 16bit integers we synthesize Amount inline // then add or subtract it from sp. unsigned Opc = ABI.GetPtrAdduOp(); if (Amount < 0) { Opc = ABI.GetPtrSubuOp(); Amount = -Amount; } unsigned Reg = loadImmediate(Amount, MBB, I, DL, nullptr); BuildMI(MBB, I, DL, get(Opc), SP).addReg(SP).addReg(Reg, RegState::Kill); } } /// This function generates the sequence of instructions needed to get the /// result of adding register REG and immediate IMM. unsigned MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, unsigned *NewImm) const { MipsAnalyzeImmediate AnalyzeImm; const MipsSubtarget &STI = Subtarget; MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); unsigned Size = STI.isABI_N64() ? 64 : 32; unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; const TargetRegisterClass *RC = STI.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; bool LastInstrIsADDiu = NewImm; const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1))); // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. unsigned Reg = RegInfo.createVirtualRegister(RC); if (Inst->Opc == LUi) BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd)); else BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(ZEROReg) .addImm(SignExtend64<16>(Inst->ImmOpnd)); // Build the remaining instructions in Seq. for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst) BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(Reg, RegState::Kill) .addImm(SignExtend64<16>(Inst->ImmOpnd)); if (LastInstrIsADDiu) *NewImm = Inst->ImmOpnd; return Reg; } unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BEQ_MM || Opc == Mips::BNE || Opc == Mips::BNE_MM || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || Opc == Mips::J || - Opc == Mips::B_MM || Opc == Mips::BEQZC_MM || + Opc == Mips::J_MM || Opc == Mips::B_MM || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM || Opc == Mips::BEQC || Opc == Mips::BNEC || Opc == Mips::BLTC || Opc == Mips::BGEC || Opc == Mips::BLTUC || Opc == Mips::BGEUC || Opc == Mips::BGTZC || Opc == Mips::BLEZC || Opc == Mips::BGEZC || Opc == Mips::BLTZC || Opc == Mips::BEQZC || Opc == Mips::BNEZC || Opc == Mips::BEQZC64 || Opc == Mips::BNEZC64 || Opc == Mips::BEQC64 || Opc == Mips::BNEC64 || Opc == Mips::BGEC64 || Opc == Mips::BGEUC64 || Opc == Mips::BLTC64 || Opc == Mips::BLTUC64 || Opc == Mips::BGTZC64 || Opc == Mips::BGEZC64 || Opc == Mips::BLTZC64 || Opc == Mips::BLEZC64 || Opc == Mips::BC || Opc == Mips::BBIT0 || Opc == Mips::BBIT1 || Opc == Mips::BBIT032 || Opc == Mips::BBIT132 || Opc == Mips::BC_MMR6 || Opc == Mips::BEQC_MMR6 || Opc == Mips::BNEC_MMR6 || Opc == Mips::BLTC_MMR6 || Opc == Mips::BGEC_MMR6 || Opc == Mips::BLTUC_MMR6 || Opc == Mips::BGEUC_MMR6 || Opc == Mips::BGTZC_MMR6 || Opc == Mips::BLEZC_MMR6 || Opc == Mips::BGEZC_MMR6 || Opc == Mips::BLTZC_MMR6 || Opc == Mips::BEQZC_MMR6 || Opc == Mips::BNEZC_MMR6) ? Opc : 0; } void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; if (Subtarget.isGP64bit()) MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64)) .addReg(Mips::RA_64, RegState::Undef); else MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)) .addReg(Mips::RA, RegState::Undef); // Retain any imp-use flags. for (auto & MO : I->operands()) { if (MO.isImplicit()) MIB.add(MO); } } void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET)); } std::pair MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MachineFunction &MF) const { const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned NewOpc) const { BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg()); } void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned LoOpc, unsigned HiOpc, bool HasExplicitDef) const { // Expand // lo_hi pseudomtlohi $gpr0, $gpr1 // to these two instructions: // mtlo $gpr0 // mthi $gpr1 DebugLoc DL = I->getDebugLoc(); const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2); MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc)); MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc)); // Add lo/hi registers if the mtlo/hi instructions created have explicit // def registers. if (HasExplicitDef) { unsigned DstReg = I->getOperand(0).getReg(); unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi); LoInst.addReg(DstLo, RegState::Define); HiInst.addReg(DstHi, RegState::Define); } LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill())); HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill())); } void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned CvtOpc, unsigned MovOpc, bool IsI64) const { const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc); const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1); unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; unsigned KillSrc = getKillRegState(Src.isKill()); DebugLoc DL = I->getDebugLoc(); bool DstIsLarger, SrcIsLarger; std::tie(DstIsLarger, SrcIsLarger) = compareOpndSize(CvtOpc, *MBB.getParent()); if (DstIsLarger) TmpReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); if (SrcIsLarger) DstReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); } void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); DebugLoc dl = I->getDebugLoc(); assert(N < 2 && "Invalid immediate"); unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2())); // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg())); if (SubIdx == Mips::sub_hi && Subtarget.hasMTHC1()) { // FIXME: Strictly speaking MFHC1 only reads the top 32-bits however, we // claim to read the whole 64-bits as part of a white lie used to // temporarily work around a widespread bug in the -mfp64 support. // The problem is that none of the 32-bit fpu ops mention the fact // that they clobber the upper 32-bits of the 64-bit FPR. Fixing that // requires a major overhaul of the FPU implementation which can't // be done right now due to time constraints. // MFHC1 is one of two instructions that are affected since they are // the only instructions that don't read the lower 32-bits. // We therefore pretend that it reads the bottom 32-bits to // artificially create a dependency and prevent the scheduler // changing the behaviour of the code. BuildMI(MBB, I, dl, get(isMicroMips ? (FP64 ? Mips::MFHC1_D64_MM : Mips::MFHC1_D32_MM) : (FP64 ? Mips::MFHC1_D64 : Mips::MFHC1_D32)), DstReg) .addReg(SrcReg); } else BuildMI(MBB, I, dl, get(Mips::MFC1), DstReg).addReg(SubReg); } void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); const TargetRegisterInfo &TRI = getRegisterInfo(); // When mthc1 is available, use: // mtc1 Lo, $fp // mthc1 Hi, $fp // // Otherwise, for O32 FPXX ABI: // spill + reload via ldc1 // This case is handled by the frame lowering code. // // Otherwise, for FP32: // mtc1 Lo, $fp // mtc1 Hi, $fp + 1 // // The case where dmtc1 is available doesn't need to be handled here // because it never creates a BuildPairF64 node. // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2())); // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg())); BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo)) .addReg(LoReg); if (Subtarget.hasMTHC1()) { // FIXME: The .addReg(DstReg) is a white lie used to temporarily work // around a widespread bug in the -mfp64 support. // The problem is that none of the 32-bit fpu ops mention the fact // that they clobber the upper 32-bits of the 64-bit FPR. Fixing that // requires a major overhaul of the FPU implementation which can't // be done right now due to time constraints. // MTHC1 is one of two instructions that are affected since they are // the only instructions that don't read the lower 32-bits. // We therefore pretend that it reads the bottom 32-bits to // artificially create a dependency and prevent the scheduler // changing the behaviour of the code. BuildMI(MBB, I, dl, get(isMicroMips ? (FP64 ? Mips::MTHC1_D64_MM : Mips::MTHC1_D32_MM) : (FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32)), DstReg) .addReg(DstReg) .addReg(HiReg); } else if (Subtarget.isABI_FPXX()) llvm_unreachable("BuildPairF64 not expanded in frame lowering code!"); else BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) .addReg(HiReg); } void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { // This pseudo instruction is generated as part of the lowering of // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and // indirect jump to TargetReg MipsABIInfo ABI = Subtarget.getABI(); unsigned ADDU = ABI.GetPtrAdduOp(); unsigned SP = Subtarget.isGP64bit() ? Mips::SP_64 : Mips::SP; unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA; unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9; unsigned ZERO = Subtarget.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; unsigned OffsetReg = I->getOperand(0).getReg(); unsigned TargetReg = I->getOperand(1).getReg(); // addu $ra, $v0, $zero // addu $sp, $sp, $v1 // jr $ra (via RetRA) const TargetMachine &TM = MBB.getParent()->getTarget(); if (TM.isPositionIndependent()) BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), T9) .addReg(TargetReg) .addReg(ZERO); BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), RA) .addReg(TargetReg) .addReg(ZERO); BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), SP).addReg(SP).addReg(OffsetReg); expandRetRA(MBB, I); } const MipsInstrInfo *llvm::createMipsSEInstrInfo(const MipsSubtarget &STI) { return new MipsSEInstrInfo(STI); } Index: vendor/llvm/dist-release_70/lib/Target/PowerPC/P9InstrResources.td =================================================================== --- vendor/llvm/dist-release_70/lib/Target/PowerPC/P9InstrResources.td (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/PowerPC/P9InstrResources.td (revision 341365) @@ -1,1419 +1,1420 @@ //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the resources required by P9 instructions. This is part // P9 processor model used for instruction scheduling. This file should contain // all of the instructions that may be used on Power 9. This is not just // instructions that are new on Power 9 but also instructions that were // available on earlier architectures and are still used in Power 9. // // The makeup of the P9 CPU is modeled as follows: // - Each CPU is made up of two superslices. // - Each superslice is made up of two slices. Therefore, there are 4 slices // for each CPU. // - Up to 6 instructions can be dispatched to each CPU. Three per superslice. // - Each CPU has: // - One CY (Crypto) unit P9_CY_* // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* // - Two PM (Permute) units. One on each superslice. P9_PM_* // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* // - Four DP (Floating Point) units. One on each slice. P9_DP_* // This also includes fixed point multiply add. // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* // - Four Load/Store Queues. P9_LS_* // - Each set of instructions will require a number of these resources. //===----------------------------------------------------------------------===// // Two cycle ALU vector operation that uses an entire superslice. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "VADDU(B|H|W|D)M$"), (instregex "VAND(C)?$"), (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), (instregex "V_SET0(B|H)?$"), (instregex "VS(R|L)(B|H|W|D)$"), (instregex "VSUBU(B|H|W|D)M$"), (instregex "VPOPCNT(B|H)$"), (instregex "VRL(B|H|W|D)$"), (instregex "VSRA(B|H|W|D)$"), (instregex "XV(N)?ABS(D|S)P$"), (instregex "XVCPSGN(D|S)P$"), (instregex "XV(I|X)EXP(D|S)P$"), (instregex "VRL(D|W)(MI|NM)$"), (instregex "VMRG(E|O)W$"), MTVSRDD, VEQV, VNAND, VNEGD, VNEGW, VNOR, VOR, VORC, VSEL, VXOR, XVNEGDP, XVNEGSP, XXLAND, XXLANDC, XXLEQV, XXLNAND, XXLNOR, XXLOR, XXLORf, XXLORC, XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz, XXSEL, XSABSQP, XSCPSGNQP, XSIEXPQP, XSNABSQP, XSNEGQP, XSXEXPQP )>; // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a // slingle slice. However, since it is Restricted it requires all 3 dispatches // (DISP) for that superslice. def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "TABORT(D|W)C(I)?$"), (instregex "MTFSB(0|1)$"), (instregex "MFFSC(D)?RN(I)?$"), (instregex "CMPRB(8)?$"), (instregex "TD(I)?$"), (instregex "TW(I)?$"), (instregex "FCMPU(S|D)$"), (instregex "XSTSTDC(S|D)P$"), FTDIV, FTSQRT, CMPEQB )>; // Standard Dispatch ALU operation for 3 cycles. Only one slice used. def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs (instregex "XSMAX(C|J)?DP$"), (instregex "XSMIN(C|J)?DP$"), (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"), (instregex "POPCNT(D|W)$"), (instregex "CMPB(8)?$"), XSTDIVDP, XSTSQRTDP, XSXSIGDP, XSCVSPDPN, SETB, BPERMD )>; // Standard Dispatch ALU operation for 2 cycles. Only one slice used. def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs (instregex "S(L|R)D$"), (instregex "SRAD(I)?$"), (instregex "EXTSWSLI$"), (instregex "MFV(S)?RD$"), (instregex "MTVSRD$"), (instregex "MTVSRW(A|Z)$"), (instregex "CMP(WI|LWI|W|LW)(8)?$"), (instregex "CMP(L)?D(I)?$"), (instregex "SUBF(I)?C(8)?$"), (instregex "ANDI(S)?o(8)?$"), (instregex "ADDC(8)?$"), (instregex "ADDIC(8)?(o)?$"), (instregex "ADD(8|4)(o)?$"), (instregex "ADD(E|ME|ZE)(8)?(o)?$"), (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"), (instregex "NEG(8)?(o)?$"), (instregex "POPCNTB$"), (instregex "ADD(I|IS)?(8)?$"), (instregex "LI(S)?(8)?$"), (instregex "(X)?OR(I|IS)?(8)?(o)?$"), (instregex "NAND(8)?(o)?$"), (instregex "AND(C)?(8)?(o)?$"), (instregex "NOR(8)?(o)?$"), (instregex "OR(C)?(8)?(o)?$"), (instregex "EQV(8)?(o)?$"), (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), (instregex "ADD(4|8)(TLS)?(_)?$"), (instregex "NEG(8)?$"), (instregex "ADDI(S)?toc(HA|L)$"), COPY, MCRF, MCRXRX, XSNABSDP, XSXEXPDP, XSABSDP, XSNEGDP, XSCPSGNDP, MFVSRWZ, SRADI_32, RLDIC, RFEBB, LA, TBEGIN, TRECHKPT, NOP, WAIT )>; // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a // slingle slice. However, since it is Restricted it requires all 3 dispatches // (DISP) for that superslice. def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "RLDC(L|R)$"), (instregex "RLWIMI(8)?$"), (instregex "RLDIC(L|R)(_32)?(_64)?$"), (instregex "M(F|T)OCRF(8)?$"), (instregex "CR(6)?(UN)?SET$"), (instregex "CR(N)?(OR|AND)(C)?$"), (instregex "S(L|R)W(8)?$"), (instregex "RLW(INM|NM)(8)?$"), (instregex "F(N)?ABS(D|S)$"), (instregex "FNEG(D|S)$"), (instregex "FCPSGN(D|S)$"), (instregex "SRAW(I)?$"), (instregex "ISEL(8)?$"), RLDIMI, XSIEXPDP, FMR, CREQV, CRXOR, TRECLAIM, TSR, TABORT )>; // Three cycle ALU vector operation that uses an entire superslice. // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "M(T|F)VSCR$"), (instregex "VCMPNEZ(B|H|W)$"), (instregex "VCMPEQU(B|H|W|D)$"), (instregex "VCMPNE(B|H|W)$"), (instregex "VABSDU(B|H|W)$"), (instregex "VADDU(B|H|W)S$"), (instregex "VAVG(S|U)(B|H|W)$"), (instregex "VCMP(EQ|GE|GT)FP(o)?$"), (instregex "VCMPBFP(o)?$"), (instregex "VC(L|T)Z(B|H|W|D)$"), (instregex "VADDS(B|H|W)S$"), (instregex "V(MIN|MAX)FP$"), (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), VBPERMD, VADDCUW, VPOPCNTW, VPOPCNTD, VPRTYBD, VPRTYBW, VSHASIGMAD, VSHASIGMAW, VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS, VSUBCUW, VCMPGTSB, VCMPGTSBo, VCMPGTSD, VCMPGTSDo, VCMPGTSH, VCMPGTSHo, VCMPGTSW, VCMPGTSWo, VCMPGTUB, VCMPGTUBo, VCMPGTUD, VCMPGTUDo, VCMPGTUH, VCMPGTUHo, VCMPGTUW, VCMPGTUWo, VCMPNEBo, VCMPNEHo, VCMPNEWo, VCMPNEZBo, VCMPNEZHo, VCMPNEZWo, VCMPEQUBo, VCMPEQUDo, VCMPEQUHo, VCMPEQUWo, XVCMPEQDP, XVCMPEQDPo, XVCMPEQSP, XVCMPEQSPo, XVCMPGEDP, XVCMPGEDPo, XVCMPGESP, XVCMPGESPo, XVCMPGTDP, XVCMPGTDPo, XVCMPGTSP, XVCMPGTSPo, XVMAXDP, XVMAXSP, XVMINDP, XVMINSP, XVTDIVDP, XVTDIVSP, XVTSQRTDP, XVTSQRTSP, XVTSTDCDP, XVTSTDCSP, XVXSIGDP, XVXSIGSP )>; // 7 cycle DP vector operation that uses an entire superslice. // Uses both DP units (the even DPE and odd DPO units), two pipelines // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs VADDFP, VCTSXS, VCTSXS_0, VCTUXS, VCTUXS_0, VEXPTEFP, VLOGEFP, VMADDFP, VMHADDSHS, VNMSUBFP, VREFP, VRFIM, VRFIN, VRFIP, VRFIZ, VRSQRTEFP, VSUBFP, XVADDDP, XVADDSP, XVCVDPSP, XVCVDPSXDS, XVCVDPSXWS, XVCVDPUXDS, XVCVDPUXWS, XVCVHPSP, XVCVSPDP, XVCVSPHP, XVCVSPSXDS, XVCVSPSXWS, XVCVSPUXDS, XVCVSPUXWS, XVCVSXDDP, XVCVSXDSP, XVCVSXWDP, XVCVSXWSP, XVCVUXDDP, XVCVUXDSP, XVCVUXWDP, XVCVUXWSP, XVMADDADP, XVMADDASP, XVMADDMDP, XVMADDMSP, XVMSUBADP, XVMSUBASP, XVMSUBMDP, XVMSUBMSP, XVMULDP, XVMULSP, XVNMADDADP, XVNMADDASP, XVNMADDMDP, XVNMADDMSP, XVNMSUBADP, XVNMSUBASP, XVNMSUBMDP, XVNMSUBMSP, XVRDPI, XVRDPIC, XVRDPIM, XVRDPIP, XVRDPIZ, XVREDP, XVRESP, XVRSPI, XVRSPIC, XVRSPIM, XVRSPIP, XVRSPIZ, XVRSQRTEDP, XVRSQRTESP, XVSUBDP, XVSUBSP, VCFSX, VCFSX_0, VCFUX, VCFUX_0, VMHRADDSHS, VMLADDUHM, VMSUMMBM, VMSUMSHM, VMSUMSHS, VMSUMUBM, VMSUMUHM, VMSUMUHS, VMULESB, VMULESH, VMULESW, VMULEUB, VMULEUH, VMULEUW, VMULOSB, VMULOSH, VMULOSW, VMULOUB, VMULOUH, VMULOUW, VMULUWM, VSUM2SWS, VSUM4SBS, VSUM4SHS, VSUM4UBS, VSUMSWS )>; // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three // dispatch units for the superslice. def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "MADD(HD|HDU|LD)$"), (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") )>; // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three // dispatch units for the superslice. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FRSP, (instregex "FRI(N|P|Z|M)(D|S)$"), (instregex "FRE(S)?$"), (instregex "FADD(S)?$"), (instregex "FMSUB(S)?$"), (instregex "FMADD(S)?$"), (instregex "FSUB(S)?$"), (instregex "FCFID(U)?(S)?$"), (instregex "FCTID(U)?(Z)?$"), (instregex "FCTIW(U)?(Z)?$"), (instregex "FRSQRTE(S)?$"), FNMADDS, FNMADD, FNMSUBS, FNMSUB, FSELD, FSELS, FMULS, FMUL, XSMADDADP, XSMADDASP, XSMADDMDP, XSMADDMSP, XSMSUBADP, XSMSUBASP, XSMSUBMDP, XSMSUBMSP, XSMULDP, XSMULSP, XSNMADDADP, XSNMADDASP, XSNMADDMDP, XSNMADDMSP, XSNMSUBADP, XSNMSUBASP, XSNMSUBMDP, XSNMSUBMSP )>; // 7 cycle Restricted DP operation and one 3 cycle ALU operation. // These operations can be done in parallel. // The DP is restricted so we need a full 5 dispatches. def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "FSEL(D|S)o$") )>; // 5 Cycle Restricted DP operation and one 2 cycle ALU operation. def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "MUL(H|L)(D|W)(U)?o$") )>; // 7 cycle Restricted DP operation and one 3 cycle ALU operation. // These operations must be done sequentially. // The DP is restricted so we need a full 5 dispatches. def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "FRI(N|P|Z|M)(D|S)o$"), (instregex "FRE(S)?o$"), (instregex "FADD(S)?o$"), (instregex "FSUB(S)?o$"), (instregex "F(N)?MSUB(S)?o$"), (instregex "F(N)?MADD(S)?o$"), (instregex "FCFID(U)?(S)?o$"), (instregex "FCTID(U)?(Z)?o$"), (instregex "FCTIW(U)?(Z)?o$"), (instregex "FMUL(S)?o$"), (instregex "FRSQRTE(S)?o$"), FRSPo )>; // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs XSADDDP, XSADDSP, XSCVDPHP, XSCVDPSP, XSCVDPSXDS, XSCVDPSXDSs, XSCVDPSXWS, XSCVDPUXDS, XSCVDPUXDSs, XSCVDPUXWS, XSCVDPSXWSs, XSCVDPUXWSs, XSCVHPDP, XSCVSPDP, XSCVSXDDP, XSCVSXDSP, XSCVUXDDP, XSCVUXDSP, XSRDPI, XSRDPIC, XSRDPIM, XSRDPIP, XSRDPIZ, XSREDP, XSRESP, XSRSQRTEDP, XSRSQRTESP, XSSUBDP, XSSUBSP, XSCVDPSPN, XSRSP )>; // Three Cycle PM operation. Only one PM unit per superslice so we use the whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "LVS(L|R)$"), (instregex "VSPLTIS(W|H|B)$"), (instregex "VSPLT(W|H|B)(s)?$"), (instregex "V_SETALLONES(B|H)?$"), (instregex "VEXTRACTU(B|H|W)$"), (instregex "VINSERT(B|H|W|D)$"), MFVSRLD, MTVSRWS, VBPERMQ, VCLZLSBB, VCTZLSBB, VEXTRACTD, VEXTUBLX, VEXTUBRX, VEXTUHLX, VEXTUHRX, VEXTUWLX, VEXTUWRX, VGBBD, VMRGHB, VMRGHH, VMRGHW, VMRGLB, VMRGLH, VMRGLW, VPERM, VPERMR, VPERMXOR, VPKPX, VPKSDSS, VPKSDUS, VPKSHSS, VPKSHUS, VPKSWSS, VPKSWUS, VPKUDUM, VPKUDUS, VPKUHUM, VPKUHUS, VPKUWUM, VPKUWUS, VPRTYBQ, VSL, VSLDOI, VSLO, VSLV, VSR, VSRO, VSRV, VUPKHPX, VUPKHSB, VUPKHSH, VUPKHSW, VUPKLPX, VUPKLSB, VUPKLSH, VUPKLSW, XXBRD, XXBRH, XXBRQ, XXBRW, XXEXTRACTUW, XXINSERTW, XXMRGHW, XXMRGLW, XXPERM, XXPERMR, XXSLDWI, + XXSLDWIs, XXSPLTIB, XXSPLTW, XXSPLTWs, XXPERMDI, XXPERMDIs, VADDCUQ, VADDECUQ, VADDEUQM, VADDUQM, VMUL10CUQ, VMUL10ECUQ, VMUL10EUQ, VMUL10UQ, VSUBCUQ, VSUBECUQ, VSUBEUQM, VSUBUQM, XSCMPEXPQP, XSCMPOQP, XSCMPUQP, XSTSTDCQP, XSXSIGQP, BCDCFNo, BCDCFZo, BCDCPSGNo, BCDCTNo, BCDCTZo, BCDSETSGNo, BCDSo, BCDTRUNCo, BCDUSo, BCDUTRUNCo )>; // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs BCDSRo, XSADDQP, XSADDQPO, XSCVDPQP, XSCVQPDP, XSCVQPDPO, XSCVQPSDZ, XSCVQPSWZ, XSCVQPUDZ, XSCVQPUWZ, XSCVSDQP, XSCVUDQP, XSRQPI, XSRQPIX, XSRQPXP, XSSUBQP, XSSUBQPO )>; // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs BCDCTSQo )>; // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XSMADDQP, XSMADDQPO, XSMSUBQP, XSMSUBQPO, XSMULQP, XSMULQPO, XSNMADDQP, XSNMADDQPO, XSNMSUBQP, XSNMSUBQPO )>; // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs BCDCFSQo )>; // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XSDIVQP, XSDIVQPO )>; // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XSSQRTQP, XSSQRTQPO )>; // 6 Cycle Load uses a single slice. def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C], (instrs (instregex "LXVL(L)?") )>; // 5 Cycle Load uses a single slice. def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], (instrs (instregex "LVE(B|H|W)X$"), (instregex "LVX(L)?"), (instregex "LXSI(B|H)ZX$"), LXSDX, LXVB16X, LXVD2X, LXVWSX, LXSIWZX, LXV, LXVX, LXSD, DFLOADf64, XFLOADf64, LIWZX )>; // 4 Cycle Load uses a single slice. def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], (instrs (instregex "DCB(F|T|ST)(EP)?$"), (instregex "DCBZ(L)?(EP)?$"), (instregex "DCBTST(EP)?$"), (instregex "CP_COPY(8)?$"), (instregex "CP_PASTE(8)?$"), (instregex "ICBI(EP)?$"), (instregex "ICBT(LS)?$"), (instregex "LBARX(L)?$"), (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), (instregex "LH(A|B)RX(L)?(8)?$"), (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), (instregex "LWARX(L)?$"), (instregex "LWBRX(8)?$"), (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), CP_ABORT, DARN, EnforceIEIO, ISYNC, MSGSYNC, TLBSYNC, SYNC, LMW, LSWI )>; // 4 Cycle Restricted load uses a single slice but the dispatch for the whole // superslice. def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LFIWZX, LFDX, LFD )>; // Cracked Load Instructions. // Load instructions that can be done in parallel. def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs SLBIA, SLBIE, SLBMFEE, SLBMFEV, SLBMTE, TLBIEL )>; // Cracked Load Instruction. // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU // operations can be run in parallel. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "L(W|H)ZU(X)?(8)?$"), TEND )>; // Cracked Store Instruction // Consecutive Store and ALU instructions. The store is restricted and requires // three dispatches. def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "ST(B|H|W|D)CX$") )>; // Cracked Load Instruction. // Two consecutive load operations for a total of 8 cycles. def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LDMX )>; // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU // operations cannot be done at the same time and so their latencies are added. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "LHA(X)?(8)?$"), (instregex "CP_PASTE(8)?o$"), (instregex "LWA(X)?(_32)?$"), TCHECK )>; // Cracked Restricted Load instruction. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU // operations cannot be done at the same time and so their latencies are added. // Full 6 dispatches are required as this is both cracked and restricted. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LFIWAX )>; // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU // operations cannot be done at the same time and so their latencies are added. // Full 4 dispatches are required as this is a cracked instruction. def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LXSIWAX, LIWAX )>; // Cracked Load instruction. // Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 // cycles. The Load and ALU operations cannot be done at the same time and so // their latencies are added. // Full 6 dispatches are required as this is a restricted instruction. def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LFSX, LFS )>; // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU // operations cannot be done at the same time and so their latencies are added. // Full 4 dispatches are required as this is a cracked instruction. def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LXSSP, LXSSPX, XFLOADf32, DFLOADf32 )>; // Cracked 3-Way Load Instruction // Load with two ALU operations that depend on each other def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "LHAU(X)?(8)?$"), LWAUX )>; // Cracked Load that requires the PM resource. // Since the Load and the PM cannot be done at the same time the latencies are // added. Requires 8 cycles. // Since the PM requires the full superslice we need both EXECE, EXECO pipelines // as well as 3 dispatches for the PM. The Load requires the remaining 2 // dispatches. def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs LXVH8X, LXVDSX, LXVW4X )>; // Single slice Restricted store operation. The restricted operation requires // all three dispatches for the superslice. def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "STF(S|D|IWX|SX|DX)$"), (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), (instregex "STW(8)?$"), (instregex "(D|X)FSTORE(f32|f64)$"), (instregex "ST(W|H|D)BRX$"), (instregex "ST(B|H|D)(8)?$"), (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), STIWX, SLBIEG, STMW, STSWI, TLBIE )>; // Vector Store Instruction // Requires the whole superslice and therefore requires all three dispatches // as well as both the Even and Odd exec pipelines. def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "STVE(B|H|W)X$"), (instregex "STVX(L)?$"), (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") )>; // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "MTCTR(8)?(loop)?$"), (instregex "MTLR(8)?$") )>; // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "M(T|F)VRSAVE(v)?$"), (instregex "M(T|F)PMR$"), (instregex "M(T|F)TB(8)?$"), (instregex "MF(SPR|CTR|LR)(8)?$"), (instregex "M(T|F)MSR(D)?$"), (instregex "MTSPR(8)?$") )>; // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], (instrs DIVW, DIVWU, MODSW )>; // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], (instrs DIVWE, DIVD, DIVWEU, DIVDU, MODSD, MODUD, MODUW )>; // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], (instrs DIVDE, DIVDEU )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation // and one full superslice for the DIV operation since there is only one DIV // per superslice. Latency of DIV plus ALU is 26. def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "DIVW(U)?(O)?o$") )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation // and one full superslice for the DIV operation since there is only one DIV // per superslice. Latency of DIV plus ALU is 26. def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs DIVDo, DIVDUo, DIVWEo, DIVWEUo )>; // Cracked DIV and ALU operation. Requires one full slice for the ALU operation // and one full superslice for the DIV operation since there is only one DIV // per superslice. Latency of DIV plus ALU is 42. def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs DIVDEo, DIVDEUo )>; // CR access instructions in _BrMCR, IIC_BrMCRX. // Cracked, restricted, ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the // latencies are not added together. Otherwise this is like having two // instructions running together on two pipelines and 6 dispatches. // ALU ops are 2 cycles each. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs MTCRF, MTCRF8 )>; // Cracked ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the // latencies are not added together. Otherwise this is like having two // instructions running together on two pipelines and 4 dispatches. // ALU ops are 2 cycles each. def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "ADDC(8)?o$"), (instregex "SUBFC(8)?o$") )>; // Cracked ALU operations. // Two ALU ops can be done in parallel. // One is three cycle ALU the ohter is a two cycle ALU. // One of the ALU ops is restricted the other is not so we have a total of // 5 dispatches. def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "F(N)?ABS(D|S)o$"), (instregex "FCPSGN(D|S)o$"), (instregex "FNEG(D|S)o$"), FMRo )>; // Cracked ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the // latencies are not added together. Otherwise this is like having two // instructions running together on two pipelines and 4 dispatches. // ALU ops are 3 cycles each. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs MCRFS )>; // Cracked Restricted ALU operations. // Here the two ALU ops can actually be done in parallel and therefore the // latencies are not added together. Otherwise this is like having two // instructions running together on two pipelines and 6 dispatches. // ALU ops are 3 cycles each. def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "MTFSF(b|o)?$"), (instregex "MTFSFI(o)?$") )>; // Cracked instruction made of two ALU ops. // The two ops cannot be done in parallel. // One of the ALU ops is restricted and takes 3 dispatches. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "RLD(I)?C(R|L)o$"), (instregex "RLW(IMI|INM|NM)(8)?o$"), (instregex "SLW(8)?o$"), (instregex "SRAW(I)?o$"), (instregex "SRW(8)?o$"), RLDICL_32o, RLDIMIo )>; // Cracked instruction made of two ALU ops. // The two ops cannot be done in parallel. // Both of the ALU ops are restricted and take 3 dispatches. def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "MFFS(L|CE|o)?$") )>; // Cracked ALU instruction composed of three consecutive 2 cycle loads for a // total of 6 cycles. All of the ALU operations are also restricted so each // takes 3 dispatches for a total of 9. def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "MFCR(8)?$") )>; // Cracked instruction made of two ALU ops. // The two ops cannot be done in parallel. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "EXTSWSLIo$"), (instregex "SRAD(I)?o$"), SLDo, SRDo, RLDICo )>; // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FDIV )>; // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FDIVo )>; // 36 Cycle DP Instruction. // Instruction can be done on a single slice. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs XSSQRTDP )>; // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FSQRT )>; // 36 Cycle DP Vector Instruction. def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XVSQRTDP )>; // 27 Cycle DP Vector Instruction. def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XVSQRTSP )>; // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FSQRTo )>; // 26 Cycle DP Instruction. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs XSSQRTSP )>; // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FSQRTS )>; // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FSQRTSo )>; // 33 Cycle DP Instruction. Takes one slice and 2 dispatches. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs XSDIVDP )>; // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FDIVS )>; // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FDIVSo )>; // 22 Cycle DP Instruction. Takes one slice and 2 dispatches. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs XSDIVSP )>; // 24 Cycle DP Vector Instruction. Takes one full superslice. // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given // superslice. def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XVDIVSP )>; // 33 Cycle DP Vector Instruction. Takes one full superslice. // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given // superslice. def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs XVDIVDP )>; // Instruction cracked into three pieces. One Load and two ALU operations. // The Load and one of the ALU ops cannot be run at the same time and so the // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. // Both the load and the ALU that depends on it are restricted and so they take // a total of 6 dispatches. The final 2 dispatches come from the second ALU op. // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "LF(SU|SUX)$") )>; // Cracked instruction made up of a Store and an ALU. The ALU does not depend on // the store and so it can be run at the same time as the store. The store is // also restricted. def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "STF(S|D)U(X)?$"), (instregex "ST(B|H|W|D)U(X)?(8)?$") )>; // Cracked instruction made up of a Load and an ALU. The ALU does not depend on // the load and so it can be run at the same time as the load. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "LBZU(X)?(8)?$"), (instregex "LDU(X)?$") )>; // Cracked instruction made up of a Load and an ALU. The ALU does not depend on // the load and so it can be run at the same time as the load. The load is also // restricted. 3 dispatches are from the restricted load while the other two // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline // is required for the ALU. def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "LF(DU|DUX)$") )>; // Crypto Instructions // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole // superslice. That includes both exec pipelines (EXECO, EXECE) and all three // dispatches. def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "VPMSUM(B|H|W|D)$"), (instregex "V(N)?CIPHER(LAST)?$"), VSBOX )>; // Branch Instructions // Two Cycle Branch def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C], (instrs (instregex "BCCCTR(L)?(8)?$"), (instregex "BCCL(A|R|RL)?$"), (instregex "BCCTR(L)?(8)?(n)?$"), (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), (instregex "BL(_TLS)?$"), (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), (instregex "BLA(8|8_NOP)?$"), (instregex "BLR(8|L)?$"), (instregex "TAILB(A)?(8)?$"), (instregex "TAILBCTR(8)?$"), (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), (instregex "BCLR(L)?(n)?$"), (instregex "BCTR(L)?(8)?$"), B, BA, BC, BCC, BCCA, BCL, BCLalways, BCLn, BCTRL8_LDinto_toc, BCn, CTRL_DEP )>; // Five Cycle Branch with a 2 Cycle ALU Op // Operations must be done consecutively and not in parallel. def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs ADDPCIS )>; // Special Extracted Instructions For Atomics // Atomic Load def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "L(D|W)AT$") )>; // Atomic Store def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "ST(D|W)AT$") )>; // Signal Processing Engine (SPE) Instructions // These instructions are not supported on Power 9 def : InstRW<[], (instrs BRINC, EVABS, EVEQV, EVMRA, EVNAND, EVNEG, (instregex "EVADD(I)?W$"), (instregex "EVADD(SM|SS|UM|US)IAAW$"), (instregex "EVAND(C)?$"), (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), (instregex "EVCNTL(S|Z)W$"), (instregex "EVDIVW(S|U)$"), (instregex "EVEXTS(B|H)$"), (instregex "EVLD(H|W|D)(X)?$"), (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), (instregex "EVLWHE(X)?$"), (instregex "EVLWHO(S|U)(X)?$"), (instregex "EVLW(H|W)SPLAT(X)?$"), (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), (instregex "EVMWHUMI(A)?$"), (instregex "EVMWLS(M|S)IA(A|N)W$"), (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), (instregex "EVMWSM(F|I)(A|AA|AN)?$"), (instregex "EVMWSSF(A|AA|AN)?$"), (instregex "EVMWUMI(A|AA|AN)?$"), (instregex "EV(N|X)?OR(C)?$"), (instregex "EVR(LW|LWI|NDW)$"), (instregex "EVSLW(I)?$"), (instregex "EVSPLAT(F)?I$"), (instregex "EVSRW(I)?(S|U)$"), (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), (instregex "EVSUBF(S|U)(M|S)IAAW$"), (instregex "EVSUB(I)?FW$") )> { let Unsupported = 1; } // General Instructions without scheduling support. def : InstRW<[], (instrs (instregex "(H)?RFI(D)?$"), (instregex "DSS(ALL)?$"), (instregex "DST(ST)?(T)?(64)?$"), (instregex "ICBL(C|Q)$"), (instregex "L(W|H|B)EPX$"), (instregex "ST(W|H|B)EPX$"), (instregex "(L|ST)FDEPX$"), (instregex "M(T|F)SR(IN)?$"), (instregex "M(T|F)DCR$"), (instregex "NOP_GT_PWR(6|7)$"), (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), (instregex "WRTEE(I)?$"), ATTN, CLRBHRB, MFBHRBE, MBAR, MSYNC, SLBSYNC, NAP, STOP, TRAP, RFCI, RFDI, RFMCI, SC, DCBA, DCBI, DCCCI, ICCCI )> { let Unsupported = 1; } Index: vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCISelLowering.cpp (revision 341365) @@ -1,14197 +1,14186 @@ //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the PPCISelLowering class. // //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCCCState.h" #include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "ppc-lowering" static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); static cl::opt EnableQuadPrecision("enable-ppc-quad-precision", cl::desc("enable quad precision float support on ppc"), cl::Hidden); STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { if (hasSPE()) { addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } } // Match BITREVERSE to customized fast code sequence in the td file. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); if (!Subtarget.hasSPE()) { setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); } // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { setOperationAction(ISD::ADDC, VT, Legal); setOperationAction(ISD::ADDE, VT, Legal); setOperationAction(ISD::SUBC, VT, Legal); setOperationAction(ISD::SUBE, VT, Legal); } if (Subtarget.useCRBits()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (isPPC64 || Subtarget.hasFPCVT()) { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } // PowerPC does not support direct load/store of condition registers. setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); // FIXME: Remove this once the ANDI glue bug is fixed: if (ANDIGlueBug) setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); } // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); // PowerPC has no SREM/UREM instructions unless we are on P9 // On P9 we may use a hardware instruction to compute the remainder. // The instructions are not legalized directly because in the cases where the // result of both the remainder and the division is required it is more // efficient to compute the remainder from the result of the division rather // than use the remainder instruction. if (Subtarget.isISA3_0()) { setOperationAction(ISD::SREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Custom); setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); } else { setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); } if (Subtarget.hasP9Vector()) { setOperationAction(ISD::ABS, MVT::v4i32, Legal); setOperationAction(ISD::ABS, MVT::v8i16, Legal); setOperationAction(ISD::ABS, MVT::v16i8, Legal); } // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); if (Subtarget.hasSPE()) { setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand); } else { setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FMA , MVT::f32, Legal); } setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); if (Subtarget.hasFCPSGN()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); } else { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); } if (Subtarget.hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectively setOperationAction(ISD::BSWAP, MVT::i32 , Expand); if (Subtarget.isISA3_0()) { setOperationAction(ISD::BSWAP, MVT::i64 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); } else { setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); } if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); } // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); if (!Subtarget.useCRBits()) { // PowerPC does not have Select setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit if (!Subtarget.useCRBits()) setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC if (!Subtarget.useCRBits()) setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); if (Subtarget.hasSPE()) { // SPE has built-in conversions setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); setOperationAction(ISD::BITCAST, MVT::i64, Expand); setOperationAction(ISD::BITCAST, MVT::f64, Expand); } // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build // your own exception handling based on them. // LLVM/Clang supports zero-cost DWARF exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); // TRAP is legal. setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); if (Subtarget.isSVR4ABI()) { if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); setOperationAction(ISD::VAARG, MVT::i8, Promote); AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); setOperationAction(ISD::VAARG, MVT::i16, Promote); AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); setOperationAction(ISD::VAARG, MVT::i32, Promote); AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); setOperationAction(ISD::VAARG, MVT::Other, Expand); } else { // VAARG is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::i64, Custom); } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); if (Subtarget.isSVR4ABI() && !isPPC64) // VACOPY is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); // Use the default implementation. setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); // Comparisons that require checking two conditions. if (Subtarget.hasSPE()) { setCondCodeAction(ISD::SETO, MVT::f32, Expand); setCondCodeAction(ISD::SETO, MVT::f64, Expand); setCondCodeAction(ISD::SETUO, MVT::f32, Expand); setCondCodeAction(ISD::SETUO, MVT::f64, Expand); } setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. if (Subtarget.hasSPE()) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); else setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } if (Subtarget.use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // 64-bit PowerPC wants to expand i128 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } else { // 32-bit PowerPC wants to expand i64 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); } else { setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); } // Vector instructions introduced in P9 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) setOperationAction(ISD::CTTZ, VT, Legal); else setOperationAction(ISD::CTTZ, VT, Expand); // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); // We promote all non-typed operations to v4i32. setOperationAction(ISD::AND , VT, Promote); AddPromotedToType (ISD::AND , VT, MVT::v4i32); setOperationAction(ISD::OR , VT, Promote); AddPromotedToType (ISD::OR , VT, MVT::v4i32); setOperationAction(ISD::XOR , VT, Promote); AddPromotedToType (ISD::XOR , VT, MVT::v4i32); setOperationAction(ISD::LOAD , VT, Promote); AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); setOperationAction(ISD::SELECT_CC, VT, Promote); AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); // No other operations are legal. setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); setOperationAction(ISD::AND , MVT::v4i32, Legal); setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } if (Subtarget.hasP8Altivec()) setOperationAction(ISD::MUL, MVT::v4i32, Legal); else setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); // Share the Altivec comparison restrictions. setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::STORE, MVT::v2f64, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); if (Subtarget.hasP8Altivec()) { setOperationAction(ISD::SHL, MVT::v2i64, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal); // 128 bit shifts can be accomplished via 3 instructions for SHL and // SRL, but not for SRA because of the instructions available: // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth // doing setOperationAction(ISD::SHL, MVT::v1i128, Expand); setOperationAction(ISD::SRL, MVT::v1i128, Expand); setOperationAction(ISD::SRA, MVT::v1i128, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Legal); } else { setOperationAction(ISD::SHL, MVT::v2i64, Expand); setOperationAction(ISD::SRA, MVT::v2i64, Expand); setOperationAction(ISD::SRL, MVT::v2i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); // VSX v2i64 only supports non-arithmetic operations. setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SUB, MVT::v2i64, Expand); } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::STORE, MVT::v2i64, Promote); AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); // Vector operation legalization checks the result type of // SIGN_EXTEND_INREG, overall legalization checks the inner type. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); if (Subtarget.hasDirectMove()) setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } if (Subtarget.hasP8Altivec()) { addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // 128 bit shifts can be accomplished via 3 instructions for SHL and // SRL, but not for SRA because of the instructions available: // VS{RL} and VS{RL}O. setOperationAction(ISD::SHL, MVT::v1i128, Legal); setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); if (EnableQuadPrecision) { addRegisterClass(MVT::f128, &PPC::VRRCRegClass); setOperationAction(ISD::FADD, MVT::f128, Legal); setOperationAction(ISD::FSUB, MVT::f128, Legal); setOperationAction(ISD::FDIV, MVT::f128, Legal); setOperationAction(ISD::FMUL, MVT::f128, Legal); setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); // No extending loads to f128 on PPC. for (MVT FPT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); setOperationAction(ISD::FMA, MVT::f128, Legal); setCondCodeAction(ISD::SETULT, MVT::f128, Expand); setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand); setCondCodeAction(ISD::SETOGE, MVT::f128, Expand); setCondCodeAction(ISD::SETOLE, MVT::f128, Expand); setCondCodeAction(ISD::SETONE, MVT::f128, Expand); setOperationAction(ISD::FTRUNC, MVT::f128, Legal); setOperationAction(ISD::FRINT, MVT::f128, Legal); setOperationAction(ISD::FFLOOR, MVT::f128, Legal); setOperationAction(ISD::FCEIL, MVT::f128, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); setOperationAction(ISD::FROUND, MVT::f128, Legal); setOperationAction(ISD::SELECT, MVT::f128, Expand); setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i128, Custom); // No implementation for these ops for PowerPC. setOperationAction(ISD::FSIN , MVT::f128, Expand); setOperationAction(ISD::FCOS , MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FPOWI, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); } } if (Subtarget.hasP9Altivec()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); } } if (Subtarget.hasQPX()) { setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); setOperationAction(ISD::FREM, MVT::v4f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); setOperationAction(ISD::LOAD , MVT::v4f64, Custom); setOperationAction(ISD::STORE , MVT::v4f64, Custom); setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4f64, Expand); setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::FNEG , MVT::v4f64, Legal); setOperationAction(ISD::FABS , MVT::v4f64, Legal); setOperationAction(ISD::FSIN , MVT::v4f64, Expand); setOperationAction(ISD::FCOS , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); setOperationAction(ISD::FEXP , MVT::v4f64, Expand); setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FREM, MVT::v4f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); setOperationAction(ISD::LOAD , MVT::v4f32, Custom); setOperationAction(ISD::STORE , MVT::v4f32, Custom); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); setOperationAction(ISD::FNEG , MVT::v4f32, Legal); setOperationAction(ISD::FABS , MVT::v4f32, Legal); setOperationAction(ISD::FSIN , MVT::v4f32, Expand); setOperationAction(ISD::FCOS , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); setOperationAction(ISD::FEXP , MVT::v4f32, Expand); setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); setOperationAction(ISD::AND , MVT::v4i1, Legal); setOperationAction(ISD::OR , MVT::v4i1, Legal); setOperationAction(ISD::XOR , MVT::v4i1, Legal); if (!Subtarget.useCRBits()) setOperationAction(ISD::SELECT, MVT::v4i1, Expand); setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); setOperationAction(ISD::LOAD , MVT::v4i1, Custom); setOperationAction(ISD::STORE , MVT::v4i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); setOperationAction(ISD::FROUND, MVT::v4f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); // These need to set FE_INEXACT, and so cannot be vectorized here. setOperationAction(ISD::FRINT, MVT::v4f64, Expand); setOperationAction(ISD::FRINT, MVT::v4f32, Expand); if (TM.Options.UnsafeFPMath) { setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } else { setOperationAction(ISD::FDIV, MVT::v4f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); } } if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); if (!isPPC64) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { // Altivec instructions set fields to all zeros or all ones. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } if (!isPPC64) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); } setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); if (Subtarget.useCRBits()) setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SELECT_CC); } // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::FSQRT); } // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } if (EnableQuadPrecision) { setLibcallName(RTLIB::LOG_F128, "logf128"); setLibcallName(RTLIB::LOG2_F128, "log2f128"); setLibcallName(RTLIB::LOG10_F128, "log10f128"); setLibcallName(RTLIB::EXP_F128, "expf128"); setLibcallName(RTLIB::EXP2_F128, "exp2f128"); setLibcallName(RTLIB::SIN_F128, "sinf128"); setLibcallName(RTLIB::COS_F128, "cosf128"); setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::FMIN_F128, "fminf128"); setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); setLibcallName(RTLIB::POWI_F128, "__powikf2"); setLibcallName(RTLIB::REM_F128, "fmodf128"); } // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { setHasMultipleConditionRegisters(); setJumpIsExpensive(); } setMinFunctionAlignment(2); if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_A2: case PPC::DIR_E500: case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: setPrefFunctionAlignment(4); setPrefLoopAlignment(4); break; } if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(STI.getRegisterInfo()); // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || Subtarget.getDarwinDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { // The A2 also benefits from (very) aggressive inlining of memcpy and // friends. The overhead of a the function call, even when warm, can be // over one hundred cycles. MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; MaxLoadsPerMemcmp = 128; } else { MaxLoadsPerMemcmp = 8; MaxLoadsPerMemcmpOptSize = 4; } } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign) { if (MaxAlign == MaxMaxAlign) return; if (VectorType *VTy = dyn_cast(Ty)) { if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) MaxAlign = 32; else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) MaxAlign = 16; } else if (ArrayType *ATy = dyn_cast(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == MaxMaxAlign) break; } } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { // Darwin passes everything on 4 byte boundary. if (Subtarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. unsigned Align = Subtarget.isPPC64() ? 8 : 4; if (Subtarget.hasAltivec() || Subtarget.hasQPX()) getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); return Align; } unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv:: ID CC, EVT VT) const { if (Subtarget.hasSPE() && VT == MVT::f64) return 2; return PPCTargetLowering::getNumRegisters(Context, VT); } MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv:: ID CC, EVT VT) const { if (Subtarget.hasSPE() && VT == MVT::f64) return MVT::i32; return PPCTargetLowering::getRegisterType(Context, VT); } bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } bool PPCTargetLowering::hasSPE() const { return Subtarget.hasSPE(); } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; case PPCISD::FP_TO_UINT_IN_VSR: return "PPCISD::FP_TO_UINT_IN_VSR,"; case PPCISD::FP_TO_SINT_IN_VSR: return "PPCISD::FP_TO_SINT_IN_VSR"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; case PPCISD::MFVSR: return "PPCISD::MFVSR"; case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; case PPCISD::STXSIX: return "PPCISD::STXSIX"; case PPCISD::VEXTS: return "PPCISD::VEXTS"; case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::ST_VSR_SCAL_INT: return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; } return nullptr; } EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; if (Subtarget.hasQPX()) return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); return VT.changeVectorElementTypeToInteger(); } bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); return true; } //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// /// isFloatingPointZero - Return true if this is 0.0 or -0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isZero(); } return false; } /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. static bool isConstantOrUndef(int Op, int Val) { return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 1; for (unsigned i = 0; i != 8; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) return false; } return true; } /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; } /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the /// current subtarget. /// /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 4; for (unsigned i = 0; i != 8; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) return false; } return true; } /// isVMerge - Common function, used to match vmrg* shuffles. /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { if (N->getValueType(0) != MVT::v16i8) return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 0, 16); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 8, 24); else return false; } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 8, 24); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 0, 16); else return false; } } /** * Common function used to match vmrgew and vmrgow shuffles * * The indexOffset determines whether to look for even or odd words in * the shuffle mask. This is based on the of the endianness of the target * machine. * - Little Endian: * - Use offset of 0 to check for odd elements * - Use offset of 4 to check for even elements * - Big Endian: * - Use offset of 0 to check for even elements * - Use offset of 4 to check for odd elements * A detailed description of the vector element ordering for little endian and * big endian can be found at * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html * Targeting your applications - what little endian and big endian IBM XL C/C++ * compiler differences mean to you * * The mask to the shuffle vector instruction specifies the indices of the * elements from the two input vectors to place in the result. The elements are * numbered in array-access order, starting with the first vector. These vectors * are always of type v16i8, thus each vector will contain 16 elements of size * 8. More info on the shuffle vector can be found in the * http://llvm.org/docs/LangRef.html#shufflevector-instruction * Language Reference. * * The RHSStartValue indicates whether the same input vectors are used (unary) * or two different input vectors are used, based on the following: * - If the instruction uses the same vector for both inputs, the range of the * indices will be 0 to 15. In this case, the RHSStart value passed should * be 0. * - If the instruction has two different vectors then the range of the * indices will be 0 to 31. In this case, the RHSStart value passed should * be 16 (indices 0-15 specify elements in the first vector while indices 16 * to 31 specify elements in the second vector). * * \param[in] N The shuffle vector SD Node to analyze * \param[in] IndexOffset Specifies whether to look for even or odd elements * \param[in] RHSStartValue Specifies the starting index for the righthand input * vector to the shuffle_vector instruction * \return true iff this shuffle vector represents an even or odd word merge */ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, unsigned RHSStartValue) { if (N->getValueType(0) != MVT::v16i8) return false; for (unsigned i = 0; i < 2; ++i) for (unsigned j = 0; j < 4; ++j) if (!isConstantOrUndef(N->getMaskElt(i*4+j), i*RHSStartValue+j+IndexOffset) || !isConstantOrUndef(N->getMaskElt(i*4+j+8), i*RHSStartValue+j+IndexOffset+8)) return false; return true; } /** * Determine if the specified shuffle mask is suitable for the vmrgew or * vmrgow instructions. * * \param[in] N The shuffle vector SD Node to analyze * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) * \param[in] ShuffleKind Identify the type of merge: * - 0 = big-endian merge with two different inputs; * - 1 = either-endian merge with two identical inputs; * - 2 = little-endian merge with two different inputs (inputs are swapped for * little-endian merges). * \param[in] DAG The current SelectionDAG * \return true iff this shuffle mask */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { unsigned indexOffset = CheckEven ? 4 : 0; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, indexOffset, 16); else return false; } else { unsigned indexOffset = CheckEven ? 0 : 4; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 0) // Normal return isVMerge(N, indexOffset, 16); else return false; } return false; } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. /// The ShuffleKind distinguishes between big-endian operations with two /// different inputs (0), either-endian operations with two identical inputs /// (1), and little-endian operations with two different inputs (2). For the /// latter, the input operands are swapped (see PPCInstrAltivec.td). int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 16) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; } else if (ShuffleKind == 1) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; } else return -1; if (isLE) ShiftAmt = 16 - ShiftAmt; return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. if (N->getMaskElt(0) % EltSize != 0) return false; // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); // FIXME: Handle UNDEF elements too! if (ElementBase >= 16) return false; // Check that the indices are consecutive, in the case of a multi-byte element // splatted with a v16i8 mask. for (unsigned i = 1; i != EltSize; ++i) if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) return false; for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { if (N->getMaskElt(i) < 0) continue; for (unsigned j = 0; j != EltSize; ++j) if (N->getMaskElt(i+j) != N->getMaskElt(j)) return false; } return true; } /// Check that the mask is shuffling N byte elements. Within each N byte /// element of the mask, the indices could be either in increasing or /// decreasing order as long as they are consecutive. /// \param[in] N the shuffle vector SD Node to analyze /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ /// Word/DoubleWord/QuadWord). /// \param[in] StepLen the delta indices number among the N byte element, if /// the mask is in increasing/decreasing order then it is 1/-1. /// \return true iff the mask is shuffling N byte elements. static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, int StepLen) { assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && "Unexpected element width."); assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); unsigned NumOfElem = 16 / Width; unsigned MaskVal[16]; // Width is never greater than 16 for (unsigned i = 0; i < NumOfElem; ++i) { MaskVal[0] = N->getMaskElt(i * Width); if ((StepLen == 1) && (MaskVal[0] % Width)) { return false; } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; } for (unsigned int j = 1; j < Width; ++j) { MaskVal[j] = N->getMaskElt(i * Width + j); if (MaskVal[j] != MaskVal[j-1] + StepLen) { return false; } } } return true; } bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; unsigned M2 = N->getMaskElt(8) / 4; unsigned M3 = N->getMaskElt(12) / 4; unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; // Below, let H and L be arbitrary elements of the shuffle mask // where H is in the range [4,7] and L is in the range [0,3]. // H, 1, 2, 3 or L, 5, 6, 7 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; InsertAtByte = IsLE ? 12 : 0; Swap = M0 < 4; return true; } // 0, H, 2, 3 or 4, L, 6, 7 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; InsertAtByte = IsLE ? 8 : 4; Swap = M1 < 4; return true; } // 0, 1, H, 3 or 4, 5, L, 7 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; InsertAtByte = IsLE ? 4 : 8; Swap = M2 < 4; return true; } // 0, 1, 2, H or 4, 5, 6, L if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; InsertAtByte = IsLE ? 0 : 12; Swap = M3 < 4; return true; } // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { ShiftElts = 0; Swap = true; unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 12 : 0; return true; } if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 8 : 4; return true; } if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { InsertAtByte = IsLE ? 4 : 8; return true; } if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { InsertAtByte = IsLE ? 0 : 12; return true; } } return false; } bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the word is consecutive. if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12, which are the beginning of words. unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; unsigned M2 = N->getMaskElt(8) / 4; unsigned M3 = N->getMaskElt(12) / 4; // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { assert(M0 < 4 && "Indexing into an undef vector?"); if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) return false; ShiftElts = IsLE ? (4 - M0) % 4 : M0; Swap = false; return true; } // Ensure each word index of the ShuffleVector Mask is consecutive. if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) return false; if (IsLE) { if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { // Input vectors don't need to be swapped if the leading element // of the result is one of the 3 left elements of the second vector // (or if there is no shift to be done at all). Swap = false; ShiftElts = (8 - M0) % 8; } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { // Input vectors need to be swapped if the leading element // of the result is one of the 3 left elements of the first vector // (or if we're shifting by 4 - thereby simply swapping the vectors). Swap = true; ShiftElts = (4 - M0) % 4; } return true; } else { // BE if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { // Input vectors don't need to be swapped if the leading element // of the result is one of the 4 elements of the first vector. Swap = false; ShiftElts = M0; } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { // Input vectors need to be swapped if the leading element // of the result is one of the 4 elements of the right vector. Swap = true; ShiftElts = M0 - 4; } return true; } } bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); if (!isNByteElemShuffleMask(N, Width, -1)) return false; for (int i = 0; i < 16; i += Width) if (N->getMaskElt(i) != i + Width - 1) return false; return true; } bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 2); } bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 4); } bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 8); } bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 16); } /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap /// if the inputs to the instruction should be swapped and set \p DM to the /// value for the immediate. /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI /// AND element 0 of the result comes from the first input (LE) or second input /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle /// mask. bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the double word is consecutive. if (!isNByteElemShuffleMask(N, 8, 1)) return false; unsigned M0 = N->getMaskElt(0) / 8; unsigned M1 = N->getMaskElt(8) / 8; assert(((M0 | M1) < 4) && "A mask element out of bounds?"); // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { if ((M0 | M1) < 2) { DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); Swap = false; return true; } else return false; } if (IsLE) { if (M0 > 1 && M1 < 2) { Swap = false; } else if (M0 < 2 && M1 > 1) { M0 = (M0 + 2) % 4; M1 = (M1 + 2) % 4; Swap = true; } else return false; // Note: if control flow comes here that means Swap is already set above DM = (((~M1) & 1) << 1) + ((~M0) & 1); return true; } else { // BE if (M0 < 2 && M1 > 1) { Swap = false; } else if (M0 > 1 && M1 < 2) { M0 = (M0 + 2) % 4; M1 = (M1 + 2) % 4; Swap = true; } else return false; // Note: if control flow comes here that means Swap is already set above DM = (M0 << 1) + (M1 & 1); return true; } } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed /// by using a vspltis[bhw] instruction of the specified element size, return /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where // multiple elements of the buildvector are folded together into a single // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). unsigned EltSize = 16/N->getNumOperands(); if (EltSize < ByteSize) { unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. SDValue UniquedVals[4]; assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); // See if all of the elements in the buildvector agree across. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; // If the element isn't a constant, bail fully out. if (!isa(N->getOperand(i))) return SDValue(); if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. } // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains // either constant or undef values that are identical for each chunk. See // if these chunks can form into a larger vspltis*. // Check to see if all of the leading entries are either 0 or -1. If // neither, then this won't fit into the immediate field. bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= isNullConstant(UniquedVals[i]); LeadingOnes &= isAllOnesConstant(UniquedVals[i]); } // Finally, check the least significant entry. if (LeadingZero) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) // 0,0,0,4 -> vspltisw(4) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } if (LeadingOnes) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } return SDValue(); } // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; if (ConstantSDNode *CN = dyn_cast(OpVal)) { Value = CN->getZExtValue(); } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); Value = FloatToBits(CN->getValueAPF().convertToFloat()); } // If the splat value is larger than the element value, then we can never do // this splat. The only case that we could fit the replicated bits into our // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); // If the element value is larger than the splat value, check if it consists // of a repeated bit pattern of size ByteSize. if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); return SDValue(); } /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isQVALIGNIShuffleMask(SDNode *N) { EVT VT = N->getValueType(0); if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 4) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; // Check the rest of the elements to see if they are consecutive. for (++i; i != 4; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; return ShiftAmt; } //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// /// isIntS16Immediate - This method tests to see if the node is either a 32-bit /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa(N)) return false; Imm = (int16_t)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i Base = N.getOperand(0); Index = N.getOperand(1); return true; } else if (N.getOpcode() == ISD::OR) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i can fold it if we can. // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. KnownBits LHSKnown, RHSKnown; DAG.computeKnownBits(N.getOperand(0), LHSKnown); if (LHSKnown.Zero.getBoolValue()) { DAG.computeKnownBits(N.getOperand(1), RHSKnown); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } } } return false; } // If we happen to be doing an i64 load or store into a stack slot that has // less than a 4-byte alignment, then the frame-index elimination may need to // use an indexed load or store instruction (because the offset may not be a // multiple of 4). The extra register needed to hold the offset comes from the // register scavenger, and it is possible that the scavenger will need to use // an emergency spill slot. As a result, we need to make sure that a spill slot // is allocated when doing an i64 load/store into a less-than-4-byte-aligned // stack slot. static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { // FIXME: This does not handle the LWA case. if (VT != MVT::i64) return; // NOTE: We'll exclude negative FIs here, which come from argument // lowering, because there are no known test cases triggering this problem // using packed structures (or similar). We can remove this exclusion if // we find such a test case. The reason why this is so test-case driven is // because this entire 'fixup' is only to prevent crashes (from the // register scavenger) on not-really-valid inputs. For example, if we have: // %a = alloca i1 // %b = bitcast i1* %a to i64* // store i64* a, i64 b // then the store should really be marked as 'align 1', but is not. If it // were marked as 'align 1' then the indexed form would have been // instruction-selected initially, and the problem this 'fixup' is preventing // won't happen regardless. if (FrameIdx < 0) return; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FrameIdx); if (Align >= 4) return; PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasNonRISpills(); } /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If \p Alignment is non-zero, only accept /// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, unsigned Alignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; if (N.getOpcode() == ISD::ADD) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Alignment || (imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable); Base = N.getOperand(0); return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Alignment || (imm % Alignment) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. KnownBits LHSKnown; DAG.computeKnownBits(N.getOperand(0), LHSKnown); if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); return true; } } } else if (ConstantSDNode *CN = dyn_cast(N)) { // Loading from a constant address. // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" int16_t Imm; if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && (!Alignment || (CN->getZExtValue() % Alignment) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else Base = N; return true; // [r+0] } /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { // Check to see if we can easily represent this as an [r+r] address. This // will fail if it thinks that the address is more profitably represented as // reg+imm, e.g. where imm = 0. if (SelectAddressRegReg(N, Base, Index, DAG)) return true; // If the address is the result of an add, we will utilize the fact that the // address calculation includes an implicit add. However, we can reduce // register pressure if we do not materialize a constant just for use as the // index register. We only get rid of the add if it is not an add of a // value and a 16-bit signed constant and both have a single use. int16_t imm = 0; if (N.getOpcode() == ISD::ADD && (!isIntS16Immediate(N.getOperand(1), imm) || !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } // Otherwise, do it the hard way, using R0 as the base register. Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; bool isLoad = true; SDValue Ptr; EVT VT; unsigned Alignment; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Alignment = ST->getAlignment(); isLoad = false; } else return false; // PowerPC doesn't have preinc load/store instructions for vectors (except // for QPX, which does have preinc r+r forms). if (VT.isVector()) { if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { return false; } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { AM = ISD::PRE_INC; return true; } } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { // Common code will reject creating a pre-inc form if the base pointer // is a frame index, or if N is a store and the base pointer is either // the same as or a predecessor of the value being stored. Check for // those situations here, and try with swapped Base/Offset instead. bool Swap = false; if (isa(Base) || isa(Base)) Swap = true; else if (!isLoad) { SDValue Val = cast(N)->getValue(); if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) Swap = true; } if (Swap) std::swap(Base, Offset); AM = ISD::PRE_INC; return true; } // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4)) return false; } if (LoadSDNode *LD = dyn_cast(N)) { // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of // sext i32 to i64 when addr mode is r+i. if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && LD->getExtensionType() == ISD::SEXTLOAD && isa(Offset)) return false; } AM = ISD::PRE_INC; return true; } //===----------------------------------------------------------------------===// // LowerOperation implementation //===----------------------------------------------------------------------===// /// Return true if we should reference labels using a PICBase, set the HiOpFlags /// and LoOpFlags to the target MO flags. static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. if (IsPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; } // If this is a reference to a global value that requires a non-lazy-ptr, make // sure that instruction lowering adds it. if (GV && Subtarget.hasLazyResolverStub(GV)) { HiOpFlags |= PPCII::MO_NLP_FLAG; LoOpFlags |= PPCII::MO_NLP_FLAG; if (GV->hasHiddenVisibility()) { HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; } } } static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { SDLoc DL(HiPart); EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, DL, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); // With PIC, the first instruction is actually "GR+hi(&G)". if (isPIC) Hi = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); // Generate non-pic code that has direct accesses to the constant pool. // The address of the global is just (hi(&g)+lo(&g)). return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } static void setUsesTOCBasePtr(MachineFunction &MF) { PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setUsesTOCBasePtr(); } static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA) { EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, MachineMemOperand::MOLoad); } SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); const Constant *C = CP->getConstVal(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); return getTOCEntry(DAG, SDLoc(CP), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(CP), false, GA); } SDValue CPIHi = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); SDValue CPILo = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); } // For 64-bit PowerPC, prefer the more compact relative encodings. // This trades 32 bits per jump table entry for one or two instructions // on the jump site. unsigned PPCTargetLowering::getJumpTableEncoding() const { if (isJumpTableRelative()) return MachineJumpTableInfo::EK_LabelDifference32; return TargetLowering::getJumpTableEncoding(); } bool PPCTargetLowering::isJumpTableRelative() const { if (Subtarget.isPPC64()) return true; return TargetLowering::isJumpTableRelative(); } SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (!Subtarget.isPPC64()) return TargetLowering::getPICJumpTableRelocBase(Table, DAG); switch (getTargetMachine().getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: return TargetLowering::getPICJumpTableRelocBase(Table, DAG); default: return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), getPointerTy(DAG.getDataLayout())); } } const MCExpr * PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { if (!Subtarget.isPPC64()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); switch (getTargetMachine().getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); default: return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); } } SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return getTOCEntry(DAG, SDLoc(JT), true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(GA), false, GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); BlockAddressSDNode *BASDN = cast(Op); const BlockAddress *BA = BASDN->getBlockAddress(); // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. if (Subtarget.isSVR4ABI() && isPositionIndependent()) { if (Subtarget.isPPC64()) setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(GA, DAG); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction().getParent(); PICLevel::Level picLevel = M->getPICLevel(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) : DAG.getRegister(PPC::R2, MVT::i32); SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLS); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); } else GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); } if (Model == TLSModel::LocalDynamic) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); return getTOCEntry(DAG, DL, true, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, DL, false, GA); } SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG); // If the global reference is actually to a non-lazy-pointer, we have to do an // extra load to get the address of the global. if (MOHiFlag & PPCII::MO_NLP_FLAG) Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); return Ptr; } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc dl(Op); if (Op.getValueType() == MVT::v2i64) { // When the operands themselves are v2i64 values, we need to do something // special because VSX has no underlying comparison operations for these. if (Op.getOperand(0).getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. if (CC == ISD::SETEQ || CC == ISD::SETNE) { return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getSetCC(dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), CC)); } return SDValue(); } // We handle most of these in the usual way. return Op; } // If we're comparing for equality to zero, expose the fact that this is // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) return V; if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations. if (C->isAllOnesValue() || C->isNullValue()) return SDValue(); } // If we have an integer seteq/setne, turn it into a compare against zero // by xor'ing the rhs with the lhs, which is faster than setting a // condition register, reading it back out, and masking the correct bit. The // normal approach here uses sub to do this instead of xor. Using xor exposes // the result to other bit-twiddling opportunities. EVT LHSVT = Op.getOperand(0).getValueType(); if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { EVT VT = Op.getValueType(); SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); } return SDValue(); } SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc dl(Node); assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); // gpr_index SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, VAListPtr, MachinePointerInfo(SV), MVT::i8); InChain = GprIndex.getValue(1); if (VT == MVT::i64) { // Check if GprIndex is even SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); // Align GprIndex to be even if it isn't GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, GprIndex); } // fpr index is 1 byte after gpr SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(1, dl, MVT::i32)); // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, FprPtr, MachinePointerInfo(SV), MVT::i8); InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(8, dl, MVT::i32)); SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(4, dl, MVT::i32)); // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); InChain = OverflowArea.getValue(1); SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); // adjustment constant gpr_index * 4/8 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); // OurReg = RegSaveArea + RegConstant SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, RegConstant); // Floating types are 32 bytes into RegSaveArea if (VT.isFloatingPoint()) OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, DAG.getConstant(32, dl, MVT::i32)); // increase {f,g}pr_index by 1 (or 2 if VT is i64) SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, MVT::i32)); InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, VT.isInteger() ? VAListPtr : FprPtr, MachinePointerInfo(SV), MVT::i8); // determine if we should load from reg_save_area or overflow_area SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); // increase overflow_area by 4/8 if gpr/fpr > 8 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, OverflowAreaPlusN); InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, MachinePointerInfo(), MVT::i32); return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); // We have to copy the entire va_list struct: // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = IntPtrTy; Entry.Node = Trmp; Args.push_back(Entry); // TrampSize == (isPPC64 ? 48 : 40); Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, isPPC64 ? MVT::i64 : MVT::i32); Args.push_back(Entry); Entry.Node = FPtr; Args.push_back(Entry); Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDLoc dl(Op); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { // char gpr; /* index into the array of 8 GPRs // * stored in the register save area // * gpr=0 corresponds to r3, // * gpr=1 to r4, etc. // */ // char fpr; /* index into the array of 8 FPRs // * stored in the register save area // * fpr=0 corresponds to f1, // * fpr=1 to f2, etc. // */ // char *overflow_arg_area; // /* location on stack that holds // * the next overflow argument // */ // char *reg_save_area; // /* where r3:r10 and f1:f8 (if saved) // * are stored // */ // } va_list[1]; SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); uint64_t FPROffset = 1; SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), MachinePointerInfo(SV), MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, MachinePointerInfo(SV, nextOffset), MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, MachinePointerInfo(SV, nextOffset)); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers return DAG.getStore(thirdStore, dl, FR, nextPtr, MachinePointerInfo(SV, nextOffset)); } #include "PPCGenCallingConv.inc" // Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; } bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { return true; } bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); // Skip one register if the first unallocated register has an even register // number and there are still argument registers available which have not been // allocated yet. RegNum is actually an index into ArgRegs, which means we // need to skip a register if RegNum is odd. if (RegNum != NumArgRegs && RegNum % 2 == 1) { State.AllocateReg(ArgRegs[RegNum]); } // Always return false here, as this function only makes sure that the first // unallocated register has an odd register number and does not actually // allocate a register for the current argument. return false; } bool llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); int RegsLeft = NumArgRegs - RegNum; // Skip if there is not enough registers left for long double type (4 gpr regs // in soft float mode) and put long double argument on the stack. if (RegNum != NumArgRegs && RegsLeft < 4) { for (int i = 0; i < RegsLeft; i++) { State.AllocateReg(ArgRegs[RegNum + i]); } } return false; } bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned RegNum = State.getFirstUnallocated(ArgRegs); // If there is only one Floating-point register left we need to put both f64 // values of a split ppc_fp128 value on the stack. if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { State.AllocateReg(ArgRegs[RegNum]); } // Always return false here, as this function only makes sure that the two f64 // values a ppc_fp128 value is split into are both passed in registers or both // passed on the stack and does not actually allocate a register for the // current argument. return false; } /// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; /// QFPR - The set of QPX registers that should be allocated for arguments. static const MCPhysReg QFPR[] = { PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); // Round up to multiples of the pointer size, except for array members, // which are always packed. if (!Flags.isInConsecutiveRegs()) ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; return ArgSize; } /// CalculateStackSlotAlignment - Calculates the alignment of this argument /// on the stack. static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned Align = PtrByteSize; // Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128 || ArgVT == MVT::f128) Align = 16; // QPX vector types stored in double-precision are padded to a 32 byte // boundary. else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) Align = 32; // ByVal parameters are aligned as requested. if (Flags.isByVal()) { unsigned BVAlign = Flags.getByValAlign(); if (BVAlign > PtrByteSize) { if (BVAlign % PtrByteSize != 0) llvm_unreachable( "ByVal alignment is not a multiple of the pointer size"); Align = BVAlign; } } // Array members are always packed to their original alignment. if (Flags.isInConsecutiveRegs()) { // If the array member was split into multiple registers, the first // needs to be aligned to the size of the full type. (Except for // ppcf128, which is only aligned as its f64 components.) if (Flags.isSplit() && OrigVT != MVT::ppcf128) Align = OrigVT.getStoreSize(); else Align = ArgVT.getStoreSize(); } return Align; } /// CalculateStackSlotUsed - Return whether this argument will use its /// stack slot (instead of being passed in registers). ArgOffset, /// AvailableFPRs, and AvailableVRs must hold the current argument /// position, and will be updated to account for this argument. static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX) { bool UseMemory = false; // Respect alignment of argument on the stack. unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; // If there's no space left in the argument save area, we must // use memory (this check also catches zero-sized arguments). if (ArgOffset >= LinkageSize + ParamAreaSize) UseMemory = true; // Allocate argument on the stack. ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // If we overran the argument save area, we must use memory // (this check catches arguments passed partially in memory) if (ArgOffset > LinkageSize + ParamAreaSize) UseMemory = true; // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || // QPX registers overlap with the scalar FP registers. (HasQPX && (ArgVT == MVT::v4f32 || ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1))) if (AvailableFPRs > 0) { --AvailableFPRs; return false; } if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128 || ArgVT == MVT::f128) if (AvailableVRs > 0) { --AvailableVRs; return false; } } return UseMemory; } /// EnsureStackAlignment - Round stack frame size up from NumBytes to /// ensure minimum alignment required for target. static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes) { unsigned TargetAlign = Lowering->getStackAlignment(); unsigned AlignMask = TargetAlign - 1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; return NumBytes; } SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); else return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } else { return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ // +--> | Back chain | // | +-----------------------------------+ // | | Floating-point register save area | // | +-----------------------------------+ // | | General register save area | // | +-----------------------------------+ // | | CR save word | // | +-----------------------------------+ // | | VRSAVE save word | // | +-----------------------------------+ // | | Alignment padding | // | +-----------------------------------+ // | | Vector register save area | // | +-----------------------------------+ // | | Local variable space | // | +-----------------------------------+ // | | Parameter list area | // | +-----------------------------------+ // | | LR save word | // | +-----------------------------------+ // SP--> +--- | Back chain | // +-----------------------------------+ // // Specifications: // System V Application Binary Interface PowerPC Processor Supplement // AltiVec Technology Programming Interface Manual MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); if (useSoftFloat() || hasSPE()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); CCInfo.clearWasPPCF128(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // Arguments stored in registers. if (VA.isRegLoc()) { const TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; case MVT::f32: if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else if (Subtarget.hasSPE()) RC = &PPC::SPE4RCRegClass; else RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; else if (Subtarget.hasSPE()) RC = &PPC::SPERCRegClass; else RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: RC = &PPC::VRRCRegClass; break; case MVT::v4f32: RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VRRCRegClass; break; case MVT::v4f64: RC = &PPC::QFRCRegClass; break; case MVT::v4i1: RC = &PPC::QBRCRegClass; break; } // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT == MVT::i1 ? MVT::i32 : ValVT); if (ValVT == MVT::i1) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back( DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); } } // Assign locations to all of the incoming aggregate by value arguments. // Aggregates passed by value are stored in the local variable space of the // caller's stack frame, right above the parameter list area. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); MinReservedArea = std::max(MinReservedArea, LinkageSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); SmallVector MemOps; // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); if (useSoftFloat() || hasSPE()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; FuncInfo->setVarArgsStackOffset( MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are stored to the // VarArgsFrameIndex on the stack so that they may be loaded by // dereferencing the result of va_next. for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 // is set. // The double arguments are stored to the VarArgsFrameIndex // on the stack. for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; const unsigned Num_VR_Regs = array_lengthof(VR); const unsigned Num_QFPR_Regs = Num_FPR_Regs; // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area // on its stack frame. In the ELFv1 ABI, this is always the case; // in the ELFv2 ABI, it is true if this is a vararg function or if // any parameter is located in a stack slot. bool HasParameterArea = !isELFv2ABI || isVarArg; unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (Ins[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) HasParameterArea = true; } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; EVT OrigVT = Ins[ArgNo].ArgVT; unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. unsigned CurArgOffset, Align; auto ComputeArgOffset = [&]() { /* Respect alignment of argument on the stack. */ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; CurArgOffset = ArgOffset; }; if (CallConv != CallingConv::Fast) { ComputeArgOffset(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, Num_GPR_Regs); } // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); if (CallConv == CallingConv::Fast) ComputeArgOffset(); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Empty aggregate parameters do not take up registers. Examples: // struct { } a; // union { } b; // int c[0]; // etc. However, we have to provide a place-holder in InVals, so // pretend we have an 8-byte item at the current address for that // purpose. if (!ObjSize) { int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); continue; } // Create a stack object covering all stack doublewords occupied // by the argument. If the argument is (fully or partially) on // the stack, or if the argument is fully in registers but the // caller has allocated the parameter save anyway, we can refer // directly to the caller's stack frame. Otherwise, create a // local copy in our own frame. int FI; if (HasParameterArea || ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); else FI = MFI.CreateStackObject(ArgSize, Align, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); // Handle aggregates smaller than 8 bytes. if (ObjSize < PtrByteSize) { // The value of the object is its address, which differs from the // address of the enclosing doubleword on big-endian systems. SDValue Arg = FIN; if (!isLittleEndian) { SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); } InVals.push_back(Arg); if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; if (ObjSize==1 || ObjSize==2 || ObjSize==4) { EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, MachinePointerInfo(&*FuncArg), ObjType); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg)); } MemOps.push_back(Store); } // Whether we copied from a register or not, advance the offset // into the parameter save area by a full doubleword. ArgOffset += PtrByteSize; continue; } // The value of the object is its address, which is the address of // its first stack doubleword. InVals.push_back(FIN); // Store whatever pieces of the object are in registers to memory. for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { if (GPR_idx == Num_GPR_Regs) break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += ArgSize; continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::i64: if (Flags.isNest()) { // The 'nest' parameter, if any, is passed in R11. unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); break; } // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; ArgSize = PtrByteSize; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 8; break; case MVT::f32: case MVT::f64: // These can be scalar arguments or elements of a float array type // passed directly. The latter are used to implement ELFv2 homogenous // float aggregates. if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasP8Vector() ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 // once we support fp <-> gpr moves. // This can only ever happen in the presence of f32 array types, // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || needsLoad) { ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; ArgOffset += ArgSize; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: case MVT::f128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. if (VR_idx != Num_VR_Regs) { unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; } // not QPX assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); /* fall through */ case MVT::v4f64: case MVT::v4i1: // QPX vectors are treated like their scalar floating-point subregisters // (except that they're larger). unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; if (QFPR_idx != Num_QFPR_Regs) { const TargetRegisterClass *RC; switch (ObjectVT.getSimpleVT().SimpleTy) { case MVT::v4f64: RC = &PPC::QFRCRegClass; break; case MVT::v4f32: RC = &PPC::QSRCRegClass; break; default: RC = &PPC::QBRCRegClass; break; } unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++QFPR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += Sz; break; } // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { if (ObjSize < ArgSize && !isLittleEndian) CurArgOffset += ArgSize - ObjSize; int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Area that is at least reserved in the caller of this function. unsigned MinReservedArea; if (HasParameterArea) MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); else MinReservedArea = LinkageSize; // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI.CreateFixedObject(PtrByteSize, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } SDValue PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned ArgOffset = LinkageSize; // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR_32); const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have // too many vectors to fit in registers, something that only occurs in // constructed examples:), but we have to walk the arglist to figure // that out...for the pathological case, compute VecArgOffset as the // start of the vector parameter area. Computing VecArgOffset is the // entire point of the following loop. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. unsigned ObjSize = Flags.getByValSize(); unsigned ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; VecArgOffset += ArgSize; continue; } switch(ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::f32: VecArgOffset += 4; break; case MVT::i64: // PPC64 case MVT::f64: // FIXME: We are guaranteed to be !isPPC64 at this point. // Does MVT::i64 apply? VecArgOffset += 8; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: // Nothing to do, we're only looking at Nonvector args here. break; } } } // We've found where the vector parameter area in memory is. Skip the // first 12 parameters; these don't use that memory. VecArgOffset = ((VecArgOffset+15)/16)*16; VecArgOffset += 12*16; // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } unsigned CurArgOffset = ArgOffset; // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { if (isVarArg || isPPC64) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize); } else nAltivecParamsAtEnd++; } else // Calculate min reserved area. MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, Flags, PtrByteSize); // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Objects of size 1 and 2 are right justified, everything else is // left justified. This means the memory address is adjusted forwards. if (ObjSize==1 || ObjSize==2) { CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg), ObjType); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += PtrByteSize; continue; } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers // to memory. ArgOffset will be the address of the beginning // of the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; } else { ArgOffset += ArgSize - (ArgOffset-CurArgOffset); break; } } continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); if (ObjectVT == MVT::i1) ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // All int arguments reserve stack space in the Darwin ABI. ArgOffset += PtrByteSize; break; } LLVM_FALLTHROUGH; case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // All int arguments reserve stack space in the Darwin ABI. ArgOffset += 8; break; case MVT::f32: case MVT::f64: // Every 4 bytes of argument space consumes one of the GPRs available for // argument passing. if (GPR_idx != Num_GPR_Regs) { ++GPR_idx; if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) ++GPR_idx; } if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; } // All FP arguments reserve stack space in the Darwin ABI. ArgOffset += isPPC64 ? 8 : ObjSize; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { ArgOffset += PtrByteSize; if (GPR_idx != Num_GPR_Regs) GPR_idx++; } ArgOffset += 16; GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? } ++VR_idx; } else { if (!isVarArg && !isPPC64) { // Vectors go after all the nonvectors. CurArgOffset = VecArgOffset; VecArgOffset += 16; } else { // Vectors are aligned. ArgOffset = ((ArgOffset+15)/16)*16; CurArgOffset = ArgOffset; ArgOffset += 16; } needsLoad = true; } break; } // We need to load the argument to a virtual register if we determined above // that we ran out of physical registers of the appropriate type. if (needsLoad) { int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Allow for Altivec parameters at the end, if needed. if (nAltivecParamsAtEnd) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += 16*nAltivecParamsAtEnd; } // Area that is at least reserved in the caller of this function. MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; if (isPPC64) VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { if (!isTailCall) return 0; PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); unsigned CallerMinReservedArea = FI->getMinReservedArea(); int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; // Remember only if the new adjustment is bigger. if (SPDiff < FI->getTailCallSPDelta()) FI->setTailCallSPDelta(SPDiff); return SPDiff; } static bool isFunctionGlobalAddress(SDValue Callee); static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G) return false; // The medium and large code models are expected to provide a sufficiently // large TOC to provide all data addressing needs of a module with a // single TOC. Since each module will be addressed with a single TOC then we // only need to check that caller and callee don't cross dso boundaries. if (CodeModel::Medium == TM.getCodeModel() || CodeModel::Large == TM.getCodeModel()) return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal()); // Otherwise we need to ensure callee and caller are in the same section, // since the linker may allocate multiple TOCs, and we don't know which // sections will belong to the same TOC base. const GlobalValue *GV = G->getGlobal(); if (!GV->isStrongDefinitionForLinker()) return false; // Any explicitly-specified sections and section prefixes must also match. // Also, if we're using -ffunction-sections, then each function is always in // a different section (the same is true for COMDAT functions). if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || GV->getSection() != Caller->getSection()) return false; if (const auto *F = dyn_cast(GV)) { if (F->getSectionPrefix() != Caller->getSectionPrefix()) return false; } // If the callee might be interposed, then we can't assume the ultimate call // target will be in the same section. Even in cases where we can assume that // interposition won't happen, in any case where the linker might insert a // stub to allow for interposition, we must generate code as though // interposition might occur. To understand why this matters, consider a // situation where: a -> b -> c where the arrows indicate calls. b and c are // in the same section, but a is in a different module (i.e. has a different // TOC base pointer). If the linker allows for interposition between b and c, // then it will generate a stub for the call edge between b and c which will // save the TOC pointer into the designated stack slot allocated by b. If we // return true here, and therefore allow a tail call between b and c, that // stack slot won't exist and the b -> c stub will end up saving b'c TOC base // pointer into the stack slot allocated by a (where the a -> b stub saved // a's TOC base pointer). If we're not considering a tail call, but rather, // whether a nop is needed after the call instruction in b, because the linker // will insert a stub, it might complain about a missing nop if we omit it // (although many don't complain in this case). if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) return false; return true; } static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl &Outs) { assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const unsigned ParamAreaSize = NumGPRs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = NumFPRs; unsigned AvailableVRs = NumVRs; for (const ISD::OutputArg& Param : Outs) { if (Param.Flags.isNest()) continue; if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) return true; } return false; } static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) { if (CS.arg_size() != CallerFn->arg_size()) return false; ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin(); ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end(); Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { const Value* CalleeArg = *CalleeArgIter; const Value* CallerArg = &(*CallerArgIter); if (CalleeArg == CallerArg) continue; // e.g. @caller([4 x i64] %a, [4 x i64] %b) { // tail call @callee([4 x i64] undef, [4 x i64] %b) // } // 1st argument of callee is undef and has the same type as caller. if (CalleeArg->getType() == CallerArg->getType() && isa(CalleeArg)) continue; return false; } return true; } // Returns true if TCO is possible between the callers and callees // calling conventions. static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC) { // Tail calls are possible with fastcc and ccc. auto isTailCallableCC = [] (CallingConv::ID CC){ return CC == CallingConv::C || CC == CallingConv::Fast; }; if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC)) return false; // We can safely tail call both fastcc and ccc callees from a c calling // convention caller. If the caller is fastcc, we may have less stack space // than a non-fastcc caller with the same signature so disable tail-calls in // that case. return CallerCC == CallingConv::C || CallerCC == CalleeCC; } bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, CallingConv::ID CalleeCC, ImmutableCallSite CS, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. if (isVarArg) return false; auto &Caller = DAG.getMachineFunction().getFunction(); // Check that the calling conventions are compatible for tco. if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC)) return false; // Caller contains any byval parameter is not supported. if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) return false; // Callee contains any byval parameter is not supported, too. // Note: This is a quick work around, because in some cases, e.g. // caller's stack size > callee's stack size, we are still able to apply // sibling call optimization. For example, gcc is able to do SCO for caller1 // in the following example, but not for caller2. // struct test { // long int a; // char ary[56]; // } gTest; // __attribute__((noinline)) int callee(struct test v, struct test *b) { // b->a = v.a; // return 0; // } // void caller1(struct test a, struct test c, struct test *b) { // callee(gTest, b); } // void caller2(struct test *b) { callee(gTest, b); } if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) return false; // If callee and caller use different calling conventions, we cannot pass // parameters on stack since offsets for the parameter area may be different. if (Caller.getCallingConv() != CalleeCC && needStackSlotPassParameters(Subtarget, Outs)) return false; // No TCO/SCO on indirect call because Caller have to restore its TOC if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) return false; // If the caller and callee potentially have different TOC bases then we // cannot tail call since we need to restore the TOC pointer after the call. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. if (CalleeCC == CallingConv::Fast && TailCallOpt) return true; if (DisableSCO) return false; // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. if (!hasSameArgumentList(&Caller, CS) && needStackSlotPassParameters(Subtarget, Outs)) { return false; } return true; } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. if (isVarArg) return false; MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallerCC = MF.getFunction().getCallingConv(); if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { // Functions containing by val parameters are not supported. for (unsigned i = 0; i != Ins.size(); i++) { ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Flags.isByVal()) return false; } // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; // At the moment we can only do local tail calls (in same module, hidden // or protected) if we are generating PIC. if (GlobalAddressSDNode *G = dyn_cast(Callee)) return G->getGlobal()->hasHiddenVisibility() || G->getGlobal()->hasProtectedVisibility(); } return false; } /// isCallCompatibleAddress - Return the immediate to use if the specified /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) return nullptr; // Top 6 bits have to be sext of immediate. return DAG .getConstant( (int)C->getZExtValue() >> 2, SDLoc(Op), DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) .getNode(); } namespace { struct TailCallArgumentInfo { SDValue Arg; SDValue FrameIdxOp; int FrameIdx = 0; TailCallArgumentInfo() = default; }; } // end anonymous namespace /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void StoreTailCallArgumentsToStackSlot( SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl &TailCallArgs, SmallVectorImpl &MemOpChains, const SDLoc &dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. MemOpChains.push_back(DAG.getStore( Chain, dl, Arg, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } } /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to /// the appropriate stack slot for the tail call optimized function call. static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl) { if (SPDiff) { // Calculate the new stack slot for the return address. MachineFunction &MF = DAG.getMachineFunction(); const PPCSubtarget &Subtarget = MF.getSubtarget(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); bool isPPC64 = Subtarget.isPPC64(); int SlotSize = isPPC64 ? 8 : 4; int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack(MF, NewRetAddr)); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. if (Subtarget.isDarwinABI()) { int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset(); int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), NewFPIdx)); } } return Chain; } /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate /// the position of the argument. static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; Info.Arg = Arg; Info.FrameIdxOp = FIN; Info.FrameIdx = FI; TailCallArguments.push_back(Info); } /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address /// stack slot. Returns the chain as result and the loaded frame pointers in /// LROpOut/FPOpout. Used when tail calling. SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, const SDLoc &dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (Subtarget.isDarwinABI()) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo()); Chain = SDValue(FPOpOut.getNode(), 1); } } return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" of size "Size". Alignment information is /// specified by the specific parameter attribute. The copy will be passed as /// a byval function parameter. /// Sometimes what we are copying is the end of a larger object, the part that /// does not fit in registers. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, false, false, MachinePointerInfo(), MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of /// tail calls. static void LowerMemOpCallTo( SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl &MemOpChains, SmallVectorImpl &TailCallArguments, const SDLoc &dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, dl, PtrVT)); } MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); } static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl &TailCallArguments) { // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector MemOpChains2; // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); if (!MemOpChains2.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } // Is this global address that of a function that can be called by name? (as // opposed to something that must hold a descriptor for an indirect call). static bool isFunctionGlobalAddress(SDValue Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (Callee.getOpcode() == ISD::GlobalTLSAddress || Callee.getOpcode() == ISD::TargetGlobalTLSAddress) return false; return G->getGlobal()->getValueType()->isFunctionTy(); } return false; } static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, bool isPatchPoint, bool hasNest, SmallVectorImpl> &RegsToPass, SmallVectorImpl &Ops, std::vector &NodeTys, ImmutableCallSite CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; if (!isSVR4ABI || !isPPC64) if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { // If this is an absolute destination address, use the munged value. Callee = SDValue(Dest, 0); needIndirectCall = false; } // PC-relative references to external symbols should go through $stub, unless // we're building with the leopard linker or later, which automatically // synthesizes these stubs. const TargetMachine &TM = DAG.getTarget(); const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); const GlobalValue *GV = nullptr; if (auto *G = dyn_cast(Callee)) GV = G->getGlobal(); bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; if (isFunctionGlobalAddress(Callee)) { GlobalAddressSDNode *G = cast(Callee); // A call to a TLS address is actually an indirect call to a // thread-specific pointer. unsigned OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; // If the callee is a GlobalAddress/ExternalSymbol node (quite common, // every direct call is) turn it into a TargetGlobalAddress / // TargetExternalSymbol node so that legalize doesn't hack it. Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, Callee.getValueType(), 0, OpFlags); needIndirectCall = false; } if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; if (UsePlt) OpFlags = PPCII::MO_PLT; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), OpFlags); needIndirectCall = false; } if (isPatchPoint) { // We'll form an invalid direct call when lowering a patchpoint; the full // sequence for an indirect call is complicated, and many of the // instructions introduced might have side effects (and, thus, can't be // removed later). The call itself will be removed as soon as the // argument/return lowering is complete, so the fact that it has the wrong // kind of operands should not really matter. needIndirectCall = false; } if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). // The function descriptor is a three doubleword structure with the // following fields: function entry point, TOC base address and // environment pointer. // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. // 4. Load the environment pointer from the function descriptor into // r11. // 5. Branch to the function entry point address. // 6. On return of the callee, the TOC of the caller needs to be // restored (this is done in FinishCall()). // // The loads are scheduled at the beginning of the call sequence, and the // register copies are flagged together to ensure that no other // operations can be scheduled in between. E.g. without flagging the // copies together, a TOC access in the caller could be scheduled between // the assignment of the callee TOC and the branch to the callee, which // results in the TOC access going through the TOC of the callee instead // of going through the TOC of the caller, which leads to incorrect code. // Load the address of the function entry point from the function // descriptor. SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); if (LDChain.getValueType() == MVT::Glue) LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() ? (MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant) : MachineMemOperand::MONone; MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, /* Alignment = */ 8, MMOFlags); // Load environment pointer into r11. SDValue PtrOff = DAG.getIntPtrConstant(16, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), /* Alignment = */ 8, MMOFlags); SDValue TOCOff = DAG.getIntPtrConstant(8, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), /* Alignment = */ 8, MMOFlags); setUsesTOCBasePtr(DAG); SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, InFlag); Chain = TOCVal.getValue(0); InFlag = TOCVal.getValue(1); // If the function call has an explicit 'nest' parameter, it takes the // place of the environment pointer. if (!hasNest) { SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, InFlag); Chain = EnvVal.getValue(0); InFlag = EnvVal.getValue(1); } MTCTROps[0] = Chain; MTCTROps[1] = LoadFuncPtr; MTCTROps[2] = InFlag; } Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); InFlag = Chain.getValue(1); NodeTys.clear(); NodeTys.push_back(MVT::Other); NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); } // If this is a direct call, pass the chain and the callee. if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); } // If this is a tail call add stack pointer delta. if (isTailCall) Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live // into the call. if (isSVR4ABI && isPPC64 && !isPatchPoint) { setUsesTOCBasePtr(DAG); Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); } return CallOpc; } SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { SmallVector RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult( Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) ? RetCC_PPC_Cold : RetCC_PPC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } SDValue PPCTargetLowering::FinishCall( CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, bool isPatchPoint, bool hasNest, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue InFlag, SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, ImmutableCallSite CS) const { std::vector NodeTys; SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, SPDiff, isTailCall, isPatchPoint, hasNest, RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); // Emit tail call. if (isTailCall) { assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa(Callee)) && "Expecting an global address, external symbol, absolute value or register"); DAG.getMachineFunction().getFrameInfo().setHasTailCall(); return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub // function which saves the current TOC, loads the TOC of the callee and // branches to the callee. The NOP will be replaced with a load instruction // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. MachineFunction &MF = DAG.getMachineFunction(); if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. // We are using a target-specific load with r2 hard coded, because the // result of a target-independent load would never go directly into r2, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); // The address needs to go after the chain input but before the flag (or // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); } else if (CallOpc == PPCISD::CALL && !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(BytesCalleePops, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; bool isPatchPoint = CLI.IsPatchPoint; ImmutableCallSite CS = CLI.CS; if (isTailCall) { if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall())) isTailCall = false; else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) isTailCall = IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS, isVarArg, Outs, Ins, DAG); else isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); if (isTailCall) { ++NumTailCalls; if (!getTargetMachine().Options.GuaranteedTailCallOpt) ++NumSiblingCalls; assert(isa(Callee) && "Callee should be an llvm::Function object."); LLVM_DEBUG( const GlobalValue *GV = cast(Callee)->getGlobal(); const unsigned Width = 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); dbgs() << "TCO caller: " << left_justify(DAG.getMachineFunction().getName(), Width) << ", callee linkage: " << GV->getVisibility() << ", " << GV->getLinkage() << "\n"); } } if (!isTailCall && CS && CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // When long calls (i.e. indirect calls) are always used, calls are always // made via function pointer. If we have a function name, first translate it // into a pointer. if (Subtarget.useLongCalls() && isa(Callee) && !isTailCall) Callee = LowerGlobalAddress(Callee, DAG); if (Subtarget.isSVR4ABI()) { if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); else return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, dl, DAG, InVals, CS); } SDValue PPCTargetLowering::LowerCall_32SVR4( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite CS) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && "Unknown calling convention!"); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, parameter list area and the part of the local variable space which // contains copies of aggregates which are passed by value. // Assign locations to all of the outgoing arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), PtrByteSize); if (useSoftFloat()) CCInfo.PreAnalyzeCallOperands(Outs); if (isVarArg) { // Handle fixed and variable vector arguments differently. // Fixed vector arguments go into registers as long as registers are // available. Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; if (Outs[i].IsFixed) { Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } else { Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } if (Result) { #ifndef NDEBUG errs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"; #endif llvm_unreachable(nullptr); } } } else { // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } CCInfo.clearWasPPCF128(); // Assign locations to all of the outgoing aggregate by value arguments. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are // stored. unsigned NumBytes = CCByValInfo.getNextStackOffset(); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to // create a copy of it in the local variable space of the current stack // frame (which is the stack frame of the caller) and pass the address of // this copy to the callee. assert((j < ByValArgLocs.size()) && "Index out of bounds!"); CCValAssign &ByValVA = ByValArgLocs[j++]; assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); // Memory reserved in the local variable space of the callers stack frame. unsigned LocMemOffset = ByValVA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0, SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; // Pass the address of the aggregate copy on the stack either in a // physical register or in the parameter list area of the current stack // frame to the callee. Arg = PtrOff; } if (VA.isRegLoc()) { if (Arg.getValueType() == MVT::i1) Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { // Put argument in the parameter list area of the current stack frame. assert(VA.isMemLoc()); unsigned LocMemOffset = VA.getLocMemOffset(); if (!isTailCall) { SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, TailCallArguments); } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Set CR bit 6 to true if this is a vararg call with floating args passed in // registers. if (isVarArg) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, InFlag }; Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); InFlag = Chain.getValue(1); } if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } // Copy an argument into memory, being careful to do this outside the // call sequence for the call to which the argument belongs. SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. int64_t FrameSize = CallSeqStart.getConstantOperandVal(1); SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0, SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; } SDValue PPCTargetLowering::LowerCall_64SVR4( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite CS) const { bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); bool hasNest = false; bool IsSibCall = false; EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) IsSibCall = true; // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage // area is 32 bytes reserved space for [SP][CR][LR][TOC]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = useSoftFloat() ? 0 : 13; const unsigned NumVRs = array_lengthof(VR); const unsigned NumQFPRs = NumFPRs; // On ELFv2, we can avoid allocating the parameter area if all the arguments // can be passed to the callee in registers. // For the fast calling convention, there is another check below. // Note: We should keep consistent with LowerFormalArguments_64SVR4() bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast; if (!HasParameterArea) { unsigned ParamAreaSize = NumGPRs * PtrByteSize; unsigned AvailableFPRs = NumFPRs; unsigned AvailableVRs = NumVRs; unsigned NumBytesTmp = NumBytes; for (unsigned i = 0; i != NumOps; ++i) { if (Outs[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytesTmp, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) HasParameterArea = true; } } // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; // Avoid allocating parameter area for fastcc functions if all the arguments // can be passed in the registers. if (CallConv == CallingConv::Fast) HasParameterArea = false; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; if (Flags.isNest()) continue; if (CallConv == CallingConv::Fast) { if (Flags.isByVal()) { NumGPRsUsed += (Flags.getByValSize()+7)/8; if (NumGPRsUsed > NumGPRs) HasParameterArea = true; } else { switch (ArgVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i1: case MVT::i32: case MVT::i64: if (++NumGPRsUsed <= NumGPRs) continue; break; case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: case MVT::f128: if (++NumVRsUsed <= NumVRs) continue; break; case MVT::v4f32: // When using QPX, this is handled like a FP register, otherwise, it // is an Altivec register. if (Subtarget.hasQPX()) { if (++NumFPRsUsed <= NumFPRs) continue; } else { if (++NumVRsUsed <= NumVRs) continue; } break; case MVT::f32: case MVT::f64: case MVT::v4f64: // QPX case MVT::v4i1: // QPX if (++NumFPRsUsed <= NumFPRs) continue; break; } HasParameterArea = true; } } /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); NumBytes = ((NumBytes + Align - 1) / Align) * Align; NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } unsigned NumBytesActuallyUsed = NumBytes; // In the old ELFv1 ABI, // the prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. // In the ELFv2 ABI, we allocate the parameter area iff a callee // really requires memory operands, e.g. a vararg function. if (HasParameterArea) NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); else NumBytes = LinkageSize; // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); int SPDiff = 0; // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. if (!IsSibCall) SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (isTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. auto ComputePtrOff = [&]() { /* Respect alignment of argument on the stack. */ unsigned Align = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); }; if (CallConv != CallingConv::Fast) { ComputePtrOff(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, NumGPRs); } // Promote integers to 64-bit values. if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { // Note: Size includes alignment padding, so // struct x { short a; char b; } // will have Size = 4. With #pragma pack(1), it will have Size = 3. // These are the proper values we need for right-justifying the // aggregate in a parameter register. unsigned Size = Flags.getByValSize(); // An empty aggregate parameter takes up no storage and no // registers. if (Size == 0) continue; if (CallConv == CallingConv::Fast) ComputePtrOff(); // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; continue; } } if (GPR_idx == NumGPRs && Size < 8) { SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) // FIXME: The above statement is likely due to a misunderstanding of the // documents. All arguments must be copied into the parameter area BY // THE CALLEE in the event that the callee takes the address of any // formal argument. That has not yet been implemented. However, it is // reasonable to use the stack area as a staging area for the register // load. // Skip this for small aggregates, as we will use the same slot for a // right-justified copy, below. if (Size >= 8) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // When a register is available, pass a small aggregate right-justified. if (Size < 8 && GPR_idx != NumGPRs) { // The easiest way to get this right-justified in a register // is to copy the structure into the rightmost portion of a // local variable slot, then load the whole slot into the // register. // FIXME: The memcpy seems to produce pretty awful code for // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); // Load the slot into the register. SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); // Done with this argument. ArgOffset += PtrByteSize; continue; } // For aggregates larger than PtrByteSize, copy the pieces of the // object that fit into registers from the parameter save area. for (unsigned j=0; j gpr moves. // In the non-vararg case, this can only ever happen in the // presence of f32 array types, since otherwise we never run // out of FPRs before running out of GPRs. SDValue ArgVal; // Double values are always passed in a single GPR. if (Arg.getValueType() != MVT::f32) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); // Non-array float values are extended and passed in a GPR. } else if (!Flags.isInConsecutiveRegs()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); // If we have an array of floats, we collect every odd element // together with its predecessor into one GPR. } else if (ArgOffset % PtrByteSize != 0) { SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (!isLittleEndian) std::swap(Lo, Hi); ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); // The final element, if even, goes into the first half of a GPR. } else if (Flags.isInConsecutiveRegsLast()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); if (!isLittleEndian) ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); // Non-final even elements are skipped; they will be handled // together the with subsequent argument on the next go-around. } else ArgVal = SDValue(); if (ArgVal.getNode()) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. if (Arg.getValueType() == MVT::f32 && !isLittleEndian && !Flags.isInConsecutiveRegs()) { SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } assert(HasParameterArea && "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); NeededLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || NeededLoad) { ArgOffset += (Arg.getValueType() == MVT::f32 && Flags.isInConsecutiveRegs()) ? 4 : 8; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: case MVT::f128: if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. // For a varargs call, named arguments go into VRs or on the stack as // usual; unnamed arguments always go to the stack or the corresponding // GPRs when within range. For now, we always put the value in both // locations (or even all three). if (isVarArg) { assert(HasParameterArea && "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); assert(HasParameterArea && "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); if (CallConv == CallingConv::Fast) ArgOffset += 16; } if (CallConv != CallingConv::Fast) ArgOffset += 16; break; } // not QPX assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && "Invalid QPX parameter type"); /* fall through */ case MVT::v4f64: case MVT::v4i1: { bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; if (isVarArg) { assert(HasParameterArea && "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (QFPR_idx != NumQFPRs) { SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); } ArgOffset += (IsF32 ? 16 : 32); for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs QPX params go into registers or on the stack. if (QFPR_idx != NumQFPRs) { RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); } else { if (CallConv == CallingConv::Fast) ComputePtrOff(); assert(HasParameterArea && "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); if (CallConv == CallingConv::Fast) ArgOffset += (IsF32 ? 16 : 32); } if (CallConv != CallingConv::Fast) ArgOffset += (IsF32 ? 16 : 32); break; } } } assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && "mismatch in size of parameter area"); (void)NumBytesActuallyUsed; if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. if (!isTailCall && !isPatchPoint && !isFunctionGlobalAddress(Callee) && !isa(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. setUsesTOCBasePtr(DAG); SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore( Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. if (isELFv2ABI && !isPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (isTailCall && !IsSibCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } SDValue PPCTargetLowering::LowerCall_Darwin( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite CS) const { unsigned NumOps = Outs.size(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; // Add up all the space actually used. // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually // they all go in registers, but we must reserve stack space for them for // possible use by the caller. In varargs or 64-bit calls, parameters are // assigned stack space in order, with padding so Altivec parameters are // 16-byte aligned. unsigned nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { if (!isVarArg && !isPPC64) { // Non-varargs Altivec parameters go after all the non-Altivec // parameters; handle those later so we know how much padding we need. nAltivecParamsAtEnd++; continue; } // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. NumBytes = ((NumBytes+15)/16)*16; } NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); } // Allow for Altivec parameters at the end, if needed. if (nAltivecParamsAtEnd) { NumBytes = ((NumBytes+15)/16)*16; NumBytes += 16*nAltivecParamsAtEnd; } // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (isTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR_32); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // On PPC64, promote integers to 64-bit values. if (isPPC64 && Arg.getValueType() == MVT::i32) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); // Very small objects are passed right-justified. Everything else is // passed left-justified. if (Size==1 || Size==2) { EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; } else { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; } continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // For small aggregates (Darwin only) and aggregates >= PtrByteSize, // copy the pieces of the object that fit into registers from the // parameter save area. for (unsigned j=0; j NumVRs) { unsigned j = 0; // Offset is aligned; skip 1st 12 params which go in V registers. ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; EVT ArgType = Outs[i].VT; if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { SDValue PtrOff; // We are emitting Altivec params in order. LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, true, MemOpChains, TailCallArguments, dl); ArgOffset += 16; } } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as // an extra parameter, so do that. if (!isTailCall && !isFunctionGlobalAddress(Callee) && !isa(Callee) && !isBLACompatibleAddress(Callee, DAG)) RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : PPC::R12), Callee)); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } bool PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn( Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) ? RetCC_PPC_Cold : RetCC_PPC); } SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) ? RetCC_PPC_Cold : RetCC_PPC); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[i]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; } Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (PPC::G8RCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); else if (PPC::F8RCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else if (PPC::CRRCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i1)); else if (PPC::VRRCRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::Other)); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Get the correct type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNAREAOFFSET node. SDValue Ops[2] = {Chain, FPSIdx}; SDVTList VTs = DAG.getVTList(IntVT); return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; SDValue StackPtr = DAG.getRegister(SP, PtrVT); // Get the operands for the STACKRESTORE. SDValue Chain = Op.getOperand(0); SDValue SaveSP = Op.getOperand(1); // Load the old link SP. SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); } SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int RASI = FI->getReturnAddrSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); } return DAG.getFrameIndex(RASI, PtrVT); } SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int FPSI = FI->getFramePointerSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } return DAG.getFrameIndex(FPSI, PtrVT); } SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDLoc dl(Op); // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); // Construct a node for the frame pointer save index. SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNALLOC node. SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); return DAG.getFrameIndex(FI, PtrVT); } SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorLoad(Op, DAG); assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"); // First, load 8 bits into 32 bits, then truncate to 1 bit. SDLoc dl(Op); LoadSDNode *LD = cast(Op); SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; return DAG.getMergeValues(Ops, dl); } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (Op.getOperand(1).getValueType().isVector()) return LowerVectorStore(Op, DAG); assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); // First, zero extend to 32 bits, then use a truncating store to 8 bits. SDLoc dl(Op); StoreSDNode *ST = cast(Op); SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } // FIXME: Remove this once the ANDI glue bug is fixed: SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"); SDLoc DL(Op); return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, Op.getOperand(0)); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Not FP? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. if (!DAG.getTarget().Options.NoInfsFPMath || !DAG.getTarget().Options.NoNaNsFPMath) return Op; // TODO: Propagate flags from the select rather than global settings. SDNodeFlags Flags; Flags.setNoInfs(true); Flags.setNoNaNs(true); ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); LLVM_FALLTHROUGH; case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt LLVM_FALLTHROUGH; case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); } SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); LLVM_FALLTHROUGH; case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( Op.getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. SDValue Chain; if (i32Stack) { MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, dl, FIPtr.getValueType())); MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); } RLI.Chain = Chain; RLI.Ptr = FIPtr; RLI.MPI = MPI; } /// Custom lowers floating point to integer conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( Op.getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); break; case MVT::i64: assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, Src); Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); break; } return Tmp; } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { // FP to INT conversions are legal for f128. if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128)) return Op; // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). if (Op.getOperand(0).getValueType() == MVT::ppcf128) { if (Op.getValueType() == MVT::i32) { if (Op.getOpcode() == ISD::FP_TO_SINT) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(0), DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(0), DAG.getIntPtrConstant(1, dl)); // Add the two halves of the long double in round-to-zero mode. SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // Now use a smaller FP_TO_SINT. return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); } if (Op.getOpcode() == ISD::FP_TO_UINT) { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. // TODO: Are there fast-math-flags to propagate to this FSUB? SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Op.getOperand(0), Tmp); True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, DAG.getConstant(0x80000000, dl, MVT::i32)); SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op.getOperand(0)); return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False, ISD::SETGE); } } return SDValue(); } if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) return LowerFP_TO_INTDirectMove(Op, DAG, dl); ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); } // We're trying to insert a regular store, S, and then a load, L. If the // incoming value, O, is a load, we might just be able to have our load use the // address used by O. However, we don't know if anything else will store to // that address before we can load from it. To prevent this situation, we need // to insert our load, L, into the chain as a peer of O. To do this, we give L // the same chain operand as O, we create a token factor from the chain results // of O and L, and we replace all uses of O's chain result with that token // factor (see spliceIntoChain below for this last part). bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET) const { SDLoc dl(Op); if (ET == ISD::NON_EXTLOAD && (Op.getOpcode() == ISD::FP_TO_UINT || Op.getOpcode() == ISD::FP_TO_SINT) && isOperationLegalOrCustom(Op.getOpcode(), Op.getOperand(0).getValueType())) { LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return true; } LoadSDNode *LD = dyn_cast(Op); if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || LD->isNonTemporal()) return false; if (LD->getMemoryVT() != MemVT) return false; RLI.Ptr = LD->getBasePtr(); if (LD->isIndexed() && !LD->getOffset().isUndef()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, LD->getOffset()); } RLI.Chain = LD->getChain(); RLI.MPI = LD->getPointerInfo(); RLI.IsDereferenceable = LD->isDereferenceable(); RLI.IsInvariant = LD->isInvariant(); RLI.Alignment = LD->getAlignment(); RLI.AAInfo = LD->getAAInfo(); RLI.Ranges = LD->getRanges(); RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); return true; } // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { if (!ResChain) return; SDLoc dl(NewResChain); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, NewResChain, DAG.getUNDEF(MVT::Other)); assert(TF.getNode() != NewResChain.getNode() && "A new TF really is required here"); DAG.ReplaceAllUsesOfValueWith(ResChain, TF); DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } /// Analyze profitability of direct move /// prefer float load to int load plus direct move /// when there is no integer use of int load bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { SDNode *Origin = Op.getOperand(0).getNode(); if (Origin->getOpcode() != ISD::LOAD) return true; // If there is no LXSIBZX/LXSIHZX, like Power8, // prefer direct move if the memory size is 1 or 2 bytes. MachineMemOperand *MMO = cast(Origin)->getMemOperand(); if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2) return true; for (SDNode::use_iterator UI = Origin->use_begin(), UE = Origin->use_end(); UI != UE; ++UI) { // Only look at the users of the loaded value. if (UI.getUse().get().getResNo() != 0) continue; if (UI->getOpcode() != ISD::SINT_TO_FP && UI->getOpcode() != ISD::UINT_TO_FP) return true; } return false; } /// Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); SDValue FP; SDValue Src = Op.getOperand(0); bool SinglePrec = Op.getValueType() == MVT::f32; bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); if (WordInt) { FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, dl, MVT::f64, Src); FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); } else { FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); } return FP; } SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Conversions to f128 are legal. if (EnableQuadPrecision && (Op.getValueType() == MVT::f128)) return Op; if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) return SDValue(); SDValue Value = Op.getOperand(0); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); if (Op.getValueType() != MVT::v4f64) Value = DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(), Value, DAG.getIntPtrConstant(1, dl)); return Value; } // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i1) return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), DAG.getConstantFP(1.0, dl, Op.getValueType()), DAG.getConstantFP(0.0, dl, Op.getValueType())); // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have // to prepare the input operand. Bits that might be truncated when // converting to double-precision are replaced by a bit that won't // be lost at this stage, but is below the single-precision rounding // position. // // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this // is the case, we are guaranteed the value will fit into the 53 bit // mantissa of an IEEE double-precision value without rounding.) // If any of those low 11 bits were not zero originally, make sure // bit 12 (value 2048) is set instead, so that the final rounding // to single-precision gets the correct result. SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, SINT, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::ADD, dl, MVT::i64, Round, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round, DAG.getConstant(-2048, dl, MVT::i64)); // However, we cannot use that value unconditionally: if the magnitude // of the input value is small, the bit-twiddling we did above might // end up visibly changing the output. Fortunately, in that case, we // don't need to twiddle bits since the original input will convert // exactly to double-precision floating-point already. Therefore, // construct a conditional to use the original value if the top 11 // bits are all sign-bit copies, and use the rounded value computed // above otherwise. SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, SINT, DAG.getConstant(53, dl, MVT::i32)); Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, Cond, DAG.getConstant(1, dl, MVT::i64)); Cond = DAG.getSetCC(dl, MVT::i32, Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } ReuseLoadInfo RLI; SDValue Bits; MachineFunction &MF = DAG.getMachineFunction(); if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasLFIWAX() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasFPCVT() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (((Subtarget.hasLFIWAX() && SINT.getOpcode() == ISD::SIGN_EXTEND) || (Subtarget.hasFPCVT() && SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIdx = MFI.CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, DAG))) { int FrameIdx = MFI.CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; } MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); if (ReusingLoad) spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); int FrameIdx = MFI.CreateStackObject(8, 8, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Op.getOperand(0)); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( DAG.getEntryNode(), dl, Ext64, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); // Load the value as a double. Ld = DAG.getLoad( MVT::f64, dl, Store, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); } // FCFID it and return it. SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: 00 Round to nearest 01 Round to 0 10 Round to +inf 11 Round to -inf FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf To perform the conversion, we do: ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) */ MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { MVT::f64, // return register MVT::Glue // unused in this context }; SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); // Save FP register to stack slot int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, MachinePointerInfo()); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)); SDValue CWD2 = DAG.getNode(ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::XOR, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(1, dl, MVT::i32)); SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); SDLoc dl(Op); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"); // Expand into a bunch of logical ops, followed by a select_cc. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } //===----------------------------------------------------------------------===// // Vector related lowering. // /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. if (Val == -1) SplatSize = 1; EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); } /// BuildIntrinsicOp - Return a unary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); } /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); } /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); int Ops[16]; for (unsigned i = 0; i != 16; ++i) Ops[i] = i + Amt; SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// Do we have an efficient pattern in a .td file for this node? /// /// \param V - pointer to the BuildVectorSDNode being matched /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? /// /// There are some patterns where it is beneficial to keep a BUILD_VECTOR /// node as a BUILD_VECTOR node rather than expanding it. The patterns where /// the opposite is true (expansion is beneficial) are: /// - The node builds a vector out of integers that are not 32 or 64-bits /// - The node builds a vector out of constants /// - The node is a "load-and-splat" /// In all other cases, we will choose to keep the BUILD_VECTOR. static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector) { EVT VecVT = V->getValueType(0); bool RightType = VecVT == MVT::v2f64 || (HasP8Vector && VecVT == MVT::v4f32) || (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32)); if (!RightType) return false; bool IsSplat = true; bool IsLoad = false; SDValue Op0 = V->getOperand(0); // This function is called in a block that confirms the node is not a constant // splat. So a constant BUILD_VECTOR here means the vector is built out of // different constants. if (V->isConstant()) return false; for (int i = 0, e = V->getNumOperands(); i < e; ++i) { if (V->getOperand(i).isUndef()) return false; // We want to expand nodes that represent load-and-splat even if the // loaded value is a floating point truncation or conversion to int. if (V->getOperand(i).getOpcode() == ISD::LOAD || (V->getOperand(i).getOpcode() == ISD::FP_ROUND && V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) IsLoad = true; // If the operands are different or the input is not a load and has more // uses than just this BV node, then it isn't a splat. if (V->getOperand(i) != Op0 || (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) IsSplat = false; } return !(IsSplat && IsLoad); } // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128. SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Op0 = Op->getOperand(0); if (!EnableQuadPrecision || (Op.getValueType() != MVT::f128 ) || (Op0.getOpcode() != ISD::BUILD_PAIR) || (Op0.getOperand(0).getValueType() != MVT::i64) || (Op0.getOperand(1).getValueType() != MVT::i64)) return SDValue(); return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0), Op0.getOperand(1)); } // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { // We first build an i32 vector, load it into a QPX register, // then convert it to a floating-point vector and compare it // to a zero vector to get the boolean result. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); assert(BVN->getNumOperands() == 4 && "BUILD_VECTOR for v4i1 does not have 4 operands"); bool IsConst = true; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) continue; if (!isa(BVN->getOperand(i))) { IsConst = false; break; } } if (IsConst) { Constant *One = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); Constant *NegOne = ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); Constant *CV[4]; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); else if (isNullConstant(BVN->getOperand(i))) CV[i] = NegOne; else CV[i] = One; } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), 16 /* alignment */); SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other}); return DAG.getMemIntrinsicNode( PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } SmallVector Stores; for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).isUndef()) continue; unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); if (StoreSize > 4) { Stores.push_back( DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx, PtrInfo.getWithOffset(Offset), MVT::i32)); } else { SDValue StoreValue = BVN->getOperand(i); if (StoreSize < 4) StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx, PtrInfo.getWithOffset(Offset))); } } SDValue StoreChain; if (!Stores.empty()) StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); // Now load from v4i32 into the QPX register; this will extend it to // v4i64 but not yet convert it to a floating point. Nevertheless, this // is typed as v4f64 because the QPX register integer states are not // explicitly represented. SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32), FIdx}; SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other}); SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Ops, MVT::v4i32, PtrInfo); LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), LoadedVect); SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64); return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); } // All other QPX vectors are handled by generic code. if (Subtarget.hasQPX()) return SDValue(); // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be // lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. if (Subtarget.hasVSX() && haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; return SDValue(); } unsigned SplatBits = APSplatBits.getZExtValue(); unsigned SplatUndef = APSplatUndef.getZExtValue(); unsigned SplatSize = SplatBitSize / 8; // First, handle single instruction cases. // All zeros? if (SplatBits == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } return Op; } // We have XXSPLTIB for constant splats one byte wide if (Subtarget.hasP9Vector() && SplatSize == 1) { // This is a splat of 1-byte elements with some elements potentially undef. // Rather than trying to match undef in the SDAG patterns, ensure that all // elements are the same constant. if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { SmallVector Ops(16, DAG.getConstant(SplatBits, dl, MVT::i32)); SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); if (Op.getValueType() != MVT::v16i8) return DAG.getBitcast(Op.getValueType(), NewBV); return NewBV; } // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll // detect that constant splats like v8i16: 0xABAB are really just splats // of a 1-byte constant. In this case, we need to convert the node to a // splat of v16i8 and a bitcast. if (Op.getValueType() != MVT::v16i8) return DAG.getBitcast(Op.getValueType(), DAG.getConstant(SplatBits, dl, MVT::v16i8)); return Op; } // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> (32-SplatBitSize)); if (SextVal >= -16 && SextVal <= 15) return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) // If this value is in the range [17,31] and is odd, use: // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) // If this value is in the range [-31,-17] and is odd, use: // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) // Note the last two are three-instruction sequences. if (SextVal >= -32 && SextVal <= 31) { // To avoid having these optimizations undone by constant folding, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); EVT VT = (SplatSize == 1 ? MVT::v16i8 : (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); if (VT == Op.getValueType()) return RetVal; else return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important // for fneg/fabs. if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { // Make -1 and vspltisw -1: SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); // Make the VSLW intrinsic, computing 0x8000_0000. SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, OnesV, DAG, dl); // xor by OnesV to invert it. Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 }; for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { // Indirect through the SplatCsts array so that we favor 'vsplti -1' for // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' int i = SplatCsts[idx]; // Figure out what shift amount will be used by altivec if shifted by i in // this splat size. unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + srl self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + sra self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, Intrinsic::ppc_altivec_vsraw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } } return SDValue(); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VMRGHW, OP_VMRGLW, OP_VSPLTISW0, OP_VSPLTISW1, OP_VSPLTISW2, OP_VSPLTISW3, OP_VSLDOI4, OP_VSLDOI8, OP_VSLDOI12 }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); int ShufIdxs[16]; switch (OpNum) { default: llvm_unreachable("Unknown i32 permute!"); case OP_VMRGHW: ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; break; case OP_VMRGLW: ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; break; case OP_VSPLTISW0: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+0; break; case OP_VSPLTISW1: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+4; break; case OP_VSPLTISW2: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+8; break; case OP_VSPLTISW3: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+12; break; case OP_VSLDOI4: return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI8: return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } EVT VT = OpLHS.getValueType(); OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled /// by the VINSERTB instruction introduced in ISA 3.0, else just return default /// SDValue. SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const { const unsigned BytesInVector = 16; bool IsLE = Subtarget.isLittleEndian(); SDLoc dl(N); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); unsigned ShiftElts = 0, InsertAtByte = 0; bool Swap = false; // Shifts required to get the byte we want at element 7. unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9}; unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8}; ArrayRef Mask = N->getMask(); int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // For each mask element, find out if we're just inserting something // from V2 into V1 or vice versa. // Possible permutations inserting an element from V2 into V1: // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 // ... // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X // Inserting from V1 into V2 will be similar, except mask range will be // [16,31]. bool FoundCandidate = false; // If both vector operands for the shuffle are the same vector, the mask // will contain only elements from the first one and the second one will be // undef. unsigned VINSERTBSrcElem = IsLE ? 8 : 7; // Go through the mask of half-words to find an element that's being moved // from one vector to the other. for (unsigned i = 0; i < BytesInVector; ++i) { unsigned CurrentElement = Mask[i]; // If 2nd operand is undefined, we should only look for element 7 in the // Mask. if (V2.isUndef() && CurrentElement != VINSERTBSrcElem) continue; bool OtherElementsInOrder = true; // Examine the other elements in the Mask to see if they're in original // order. for (unsigned j = 0; j < BytesInVector; ++j) { if (j == i) continue; // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined, // in which we always assume we're always picking from the 1st operand. int MaskOffset = (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0; if (Mask[j] != OriginalOrder[j] + MaskOffset) { OtherElementsInOrder = false; break; } } // If other elements are in original order, we record the number of shifts // we need to get the element we want into element 7. Also record which byte // in the vector we should insert into. if (OtherElementsInOrder) { // If 2nd operand is undefined, we assume no shifts and no swapping. if (V2.isUndef()) { ShiftElts = 0; Swap = false; } else { // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4. ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF] : BigEndianShifts[CurrentElement & 0xF]; Swap = CurrentElement < BytesInVector; } InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; FoundCandidate = true; break; } } if (!FoundCandidate) return SDValue(); // Candidate found, construct the proper SDAG sequence with VINSERTB, // optionally with VECSHL if shift is required. if (Swap) std::swap(V1, V2); if (V2.isUndef()) V2 = V1; if (ShiftElts) { SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, DAG.getConstant(ShiftElts, dl, MVT::i32)); return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl, DAG.getConstant(InsertAtByte, dl, MVT::i32)); } return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); } /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled /// by the VINSERTH instruction introduced in ISA 3.0, else just return default /// SDValue. SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const { const unsigned NumHalfWords = 8; const unsigned BytesInVector = NumHalfWords * 2; // Check that the shuffle is on half-words. if (!isNByteElemShuffleMask(N, 2, 1)) return SDValue(); bool IsLE = Subtarget.isLittleEndian(); SDLoc dl(N); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); unsigned ShiftElts = 0, InsertAtByte = 0; bool Swap = false; // Shifts required to get the half-word we want at element 3. unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5}; unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4}; uint32_t Mask = 0; uint32_t OriginalOrderLow = 0x1234567; uint32_t OriginalOrderHigh = 0x89ABCDEF; // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a // 32-bit space, only need 4-bit nibbles per element. for (unsigned i = 0; i < NumHalfWords; ++i) { unsigned MaskShift = (NumHalfWords - 1 - i) * 4; Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift); } // For each mask element, find out if we're just inserting something // from V2 into V1 or vice versa. Possible permutations inserting an element // from V2 into V1: // X, 1, 2, 3, 4, 5, 6, 7 // 0, X, 2, 3, 4, 5, 6, 7 // 0, 1, X, 3, 4, 5, 6, 7 // 0, 1, 2, X, 4, 5, 6, 7 // 0, 1, 2, 3, X, 5, 6, 7 // 0, 1, 2, 3, 4, X, 6, 7 // 0, 1, 2, 3, 4, 5, X, 7 // 0, 1, 2, 3, 4, 5, 6, X // Inserting from V1 into V2 will be similar, except mask range will be [8,15]. bool FoundCandidate = false; // Go through the mask of half-words to find an element that's being moved // from one vector to the other. for (unsigned i = 0; i < NumHalfWords; ++i) { unsigned MaskShift = (NumHalfWords - 1 - i) * 4; uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF; uint32_t MaskOtherElts = ~(0xF << MaskShift); uint32_t TargetOrder = 0x0; // If both vector operands for the shuffle are the same vector, the mask // will contain only elements from the first one and the second one will be // undef. if (V2.isUndef()) { ShiftElts = 0; unsigned VINSERTHSrcElem = IsLE ? 4 : 3; TargetOrder = OriginalOrderLow; Swap = false; // Skip if not the correct element or mask of other elements don't equal // to our expected order. if (MaskOneElt == VINSERTHSrcElem && (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; FoundCandidate = true; break; } } else { // If both operands are defined. // Target order is [8,15] if the current mask is between [0,7]. TargetOrder = (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow; // Skip if mask of other elements don't equal our expected order. if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { // We only need the last 3 bits for the number of shifts. ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7] : BigEndianShifts[MaskOneElt & 0x7]; InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; Swap = MaskOneElt < NumHalfWords; FoundCandidate = true; break; } } } if (!FoundCandidate) return SDValue(); // Candidate found, construct the proper SDAG sequence with VINSERTH, // optionally with VECSHL if shift is required. if (Swap) std::swap(V1, V2); if (V2.isUndef()) V2 = V1; SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); if (ShiftElts) { // Double ShiftElts because we're left shifting on v16i8 type. SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, DAG.getConstant(2 * ShiftElts, dl, MVT::i32)); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl); SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned ShiftElts, InsertAtByte; bool Swap = false; if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); if (ShiftElts) { SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } if (Subtarget.hasP9Altivec()) { SDValue NewISDNode; if ((NewISDNode = lowerToVINSERTH(SVOp, DAG))) return NewISDNode; if ((NewISDNode = lowerToVINSERTB(SVOp, DAG))) return NewISDNode; } if (Subtarget.hasVSX() && PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); } if (Subtarget.hasVSX() && PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); } if (Subtarget.hasP9Vector()) { if (PPC::isXXBRHShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); } else if (PPC::isXXBRWShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); } else if (PPC::isXXBRDShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); } else if (PPC::isXXBRQShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); } } if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); - // If the source for the shuffle is a scalar_to_vector that came from a - // 32-bit load, it will have used LXVWSX so we don't need to splat again. - if (Subtarget.hasP9Vector() && - ((isLittleEndian && SplatIdx == 3) || - (!isLittleEndian && SplatIdx == 0))) { - SDValue Src = V1.getOperand(0); - if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && - Src.getOperand(0).getOpcode() == ISD::LOAD && - Src.getOperand(0).hasOneUse()) - return V1; - } SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, DAG.getConstant(SplatIdx, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); } // Left shifts of 8 bytes are actually swaps. Convert accordingly. if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); } } if (Subtarget.hasQPX()) { if (VT.getVectorNumElements() != 4) return SDValue(); if (V2.isUndef()) V2 = V1; int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); if (AlignIdx != -1) { return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, DAG.getConstant(AlignIdx, dl, MVT::i32)); } else if (SVOp->isSplat()) { int SplatIdx = SVOp->getSplatIndex(); if (SplatIdx >= 4) { std::swap(V1, V2); SplatIdx -= 4; } return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, DAG.getConstant(SplatIdx, dl, MVT::i32)); } // Lower this into a qvgpci/qvfperm pair. // Compute the qvgpci literal unsigned idx = 0; for (unsigned i = 0; i < 4; ++i) { int m = SVOp->getMaskElt(i); unsigned mm = m >= 0 ? (unsigned) m : i; idx |= mm << (3-i)*3; } SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, DAG.getConstant(idx, dl, MVT::i32)); return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); } // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.isUndef()) { if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. ArrayRef PermMask = SVOp->getMask(); unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; } if (EltNo == 8) { EltNo = ByteSource/4; } else if (EltNo != ByteSource/4) { isFourElementShuffle = false; break; } } PFIndexes[i] = EltNo; } // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. // For now, we skip this for little endian until such time as we have a // little-endian perfect shuffle table. if (isFourElementShuffle && !isLittleEndian) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); // Determining when to avoid vperm is tricky. Many things affect the cost // of vperm, particularly how many times the perm mask needs to be computed. // For example, if the perm mask can be hoisted out of a loop or is already // used (perhaps because there are multiple permutes with the same shuffle // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of // the loop requires an extra register. // // As a compromise, we only emit discrete instructions if the shuffle can be // generated in 3 or fewer operations. When we have loop information // available, if this block is within a loop, we should avoid using vperm // for 3-operation perms and use a constant pool load instead. if (Cost < 3) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. if (V2.isUndef()) V2 = V1; // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. // For little endian, the order of the input vectors is reversed, and // the permutation mask is complemented with respect to 31. This is // necessary to produce proper semantics with the big-endian-biased vperm // instruction. EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), dl, MVT::i32)); else ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, MVT::i32)); } SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); if (isLittleEndian) return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V2, V1, VPermMask); else return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); } /// getVectorCompareInfo - Given an intrinsic, return false if it is not a /// vector comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; isDot = false; switch (IntrinsicID) { default: return false; // Comparison predicates. case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = true; break; case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpneb_p: case Intrinsic::ppc_altivec_vcmpneh_p: case Intrinsic::ppc_altivec_vcmpnew_p: case Intrinsic::ppc_altivec_vcmpnezb_p: case Intrinsic::ppc_altivec_vcmpnezh_p: case Intrinsic::ppc_altivec_vcmpnezw_p: if (Subtarget.hasP9Altivec()) { switch (IntrinsicID) { default: llvm_unreachable("Unknown comparison intrinsic."); case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; } isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; isDot = true; } else return false; break; // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: case Intrinsic::ppc_vsx_xvcmpgtdp_p: case Intrinsic::ppc_vsx_xvcmpeqsp_p: case Intrinsic::ppc_vsx_xvcmpgesp_p: case Intrinsic::ppc_vsx_xvcmpgtsp_p: if (Subtarget.hasVSX()) { switch (IntrinsicID) { case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; } isDot = true; } else return false; break; // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; break; case Intrinsic::ppc_altivec_vcmpequd: if (Subtarget.hasP8Altivec()) CompareOpc = 199; else return false; break; case Intrinsic::ppc_altivec_vcmpneb: case Intrinsic::ppc_altivec_vcmpneh: case Intrinsic::ppc_altivec_vcmpnew: case Intrinsic::ppc_altivec_vcmpnezb: case Intrinsic::ppc_altivec_vcmpnezh: case Intrinsic::ppc_altivec_vcmpnezw: if (Subtarget.hasP9Altivec()) switch (IntrinsicID) { default: llvm_unreachable("Unknown comparison intrinsic."); case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; break; case Intrinsic::ppc_altivec_vcmpgtsd: if (Subtarget.hasP8Altivec()) CompareOpc = 967; else return false; break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; break; case Intrinsic::ppc_altivec_vcmpgtud: if (Subtarget.hasP8Altivec()) CompareOpc = 711; else return false; break; } return true; } /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. if (Subtarget.isPPC64()) return DAG.getRegister(PPC::X13, MVT::i64); return DAG.getRegister(PPC::R2, MVT::i32); } // We are looking for absolute values here. // The idea is to try to fit one of two patterns: // max (a, (0-a)) OR max ((0-a), a) if (Subtarget.hasP9Vector() && (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw || IntrinsicID == Intrinsic::ppc_altivec_vmaxsh || IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) { SDValue V1 = Op.getOperand(1); SDValue V2 = Op.getOperand(2); if (V1.getSimpleValueType() == V2.getSimpleValueType() && (V1.getSimpleValueType() == MVT::v4i32 || V1.getSimpleValueType() == MVT::v8i16 || V1.getSimpleValueType() == MVT::v16i8)) { if ( V1.getOpcode() == ISD::SUB && ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && V1.getOperand(1) == V2 ) { // Generate the abs instruction with the operands return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2); } if ( V2.getOpcode() == ISD::SUB && ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && V2.getOperand(1) == V1 ) { // Generate the abs instruction with the operands return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1); } } } // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. int CompareOpc; bool isDot; if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. if (!isDot) { SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(CompareOpc, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); } // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { Op.getOperand(2), // LHS Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1)); // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? switch (cast(Op.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Return the value of the EQ bit of CR6. BitNo = 0; InvertBit = false; break; case 1: // Return the inverted value of the EQ bit of CR6. BitNo = 0; InvertBit = true; break; case 2: // Return the value of the LT bit of CR6. BitNo = 2; InvertBit = false; break; case 3: // Return the inverted value of the LT bit of CR6. BitNo = 2; InvertBit = true; break; } // Shift the bit into the low position. Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); // Isolate the bit. Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); // If we are supposed to, toggle the bit. if (InvertBit) Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); return Flags; } SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to // the beginning of the argument list. int ArgStart = isa(Op.getOperand(0)) ? 0 : 1; SDLoc DL(Op); switch (cast(Op.getOperand(ArgStart))->getZExtValue()) { case Intrinsic::ppc_cfence: { assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(ArgStart + 1)), Op.getOperand(0)), 0); } default: break; } return SDValue(); } SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { // Check for a DIV with the same operands as this REM. for (auto UI : Op.getOperand(1)->uses()) { if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) if (UI->getOperand(0) == Op.getOperand(0) && UI->getOperand(1) == Op.getOperand(1)) return SDValue(); } return Op; } // Lower scalar BSWAP64 to xxbrd. SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // MTVSRDD Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), Op.getOperand(0)); // XXBRD Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op); // MFVSRD int VectorIndex = 0; if (Subtarget.isLittleEndian()) VectorIndex = 1; Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op, DAG.getTargetConstant(VectorIndex, dl, MVT::i32)); return Op; } // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be // compared to a value that is atomically loaded (atomic loads zero-extend). SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."); SDLoc dl(Op); auto *AtomicNode = cast(Op.getNode()); EVT MemVT = AtomicNode->getMemoryVT(); if (MemVT.getSizeInBits() >= 32) return Op; SDValue CmpOp = Op.getOperand(2); // If this is already correctly zero-extended, leave it alone. auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); if (DAG.MaskedValueIsZero(CmpOp, HighBits)) return Op; // Clear the high bits of the compare operand. unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; SDValue NewCmpOp = DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp, DAG.getConstant(MaskVal, dl, MVT::i32)); // Replace the existing compare operand with the properly zero-extended one. SmallVector Ops; for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) Ops.push_back(AtomicNode->getOperand(i)); Ops[2] = NewCmpOp; MachineMemOperand *MMO = AtomicNode->getMemOperand(); SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other); auto NodeTy = (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16; return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); } SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int // instructions), but for smaller types, we need to first extend up to v2i32 // before doing going farther. if (Op.getValueType() == MVT::v2i64) { EVT ExtVT = cast(Op.getOperand(1))->getVT(); if (ExtVT != MVT::v2i32) { Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), ExtVT.getVectorElementType(), 4))); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, DAG.getValueType(MVT::v2i32)); } return Op; } return SDValue(); } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(16, 16, false); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo()); // Load it out. return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"); ConstantSDNode *C = dyn_cast(Op.getOperand(2)); // We have legal lowering for constant indices but not for variable ones. if (!C) return SDValue(); EVT VT = Op.getValueType(); SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. if (VT == MVT::v8i16 || VT == MVT::v16i8) { SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; unsigned InsertAtElement = C->getZExtValue(); unsigned InsertAtByte = InsertAtElement * BytesInEachElement; if (Subtarget.isLittleEndian()) { InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; } return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, DAG.getConstant(InsertAtByte, dl, MVT::i32)); } return Op; } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDNode *N = Op.getNode(); assert(N->getOperand(0).getValueType() == MVT::v4i1 && "Unknown extract_vector_elt type"); SDValue Value = N->getOperand(0); // The first part of this is like the store lowering except that we don't // need to track the chain. // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), Value); MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue StoreChain = DAG.getEntryNode(); SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), Value, FIdx}; SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Extract the value requested. unsigned Offset = 4*cast(N->getOperand(1))->getZExtValue(); SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); if (!Subtarget.useCRBits()) return IntVal; return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); } /// Lowering for QPX v4i1 loads SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); LoadSDNode *LN = cast(Op.getNode()); SDValue LoadChain = LN->getChain(); SDValue BasePtr = LN->getBasePtr(); if (Op.getValueType() == MVT::v4f64 || Op.getValueType() == MVT::v4f32) { EVT MemVT = LN->getMemoryVT(); unsigned Alignment = LN->getAlignment(); // If this load is properly aligned, then it is legal. if (Alignment >= MemVT.getStoreSize()) return Op; EVT ScalarVT = Op.getValueType().getScalarType(), ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); SDValue Vals[4], LoadChains[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Load; if (ScalarVT != ScalarMemVT) Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * Stride), ScalarMemVT, MinAlign(Alignment, Idx * Stride), LN->getMemOperand()->getFlags(), LN->getAAInfo()); else Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * Stride), MinAlign(Alignment, Idx * Stride), LN->getMemOperand()->getFlags(), LN->getAAInfo()); if (Idx == 0 && LN->isIndexed()) { assert(LN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector load"); Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), LN->getAddressingMode()); } Vals[Idx] = Load; LoadChains[Idx] = Load.getValue(1); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals); if (LN->isIndexed()) { SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; return DAG.getMergeValues(RetOps, dl); } SDValue RetOps[] = { Value, TF }; return DAG.getMergeValues(RetOps, dl); } assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); // To lower v4i1 from a byte array, we load the byte elements of the // vector and then reuse the BUILD_VECTOR logic. SDValue VectElmts[4], VectElmtChains[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); VectElmts[i] = DAG.getExtLoad( ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx, LN->getPointerInfo().getWithOffset(i), MVT::i8, /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo()); VectElmtChains[i] = VectElmts[i].getValue(1); } LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts); SDValue RVals[] = { Value, LoadChain }; return DAG.getMergeValues(RVals, dl); } /// Lowering for QPX v4i1 stores SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); StoreSDNode *SN = cast(Op.getNode()); SDValue StoreChain = SN->getChain(); SDValue BasePtr = SN->getBasePtr(); SDValue Value = SN->getValue(); if (Value.getValueType() == MVT::v4f64 || Value.getValueType() == MVT::v4f32) { EVT MemVT = SN->getMemoryVT(); unsigned Alignment = SN->getAlignment(); // If this store is properly aligned, then it is legal. if (Alignment >= MemVT.getStoreSize()) return Op; EVT ScalarVT = Value.getValueType().getScalarType(), ScalarMemVT = MemVT.getScalarType(); unsigned Stride = ScalarMemVT.getStoreSize(); SDValue Stores[4]; for (unsigned Idx = 0; Idx < 4; ++Idx) { SDValue Ex = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); SDValue Store; if (ScalarVT != ScalarMemVT) Store = DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, SN->getPointerInfo().getWithOffset(Idx * Stride), ScalarMemVT, MinAlign(Alignment, Idx * Stride), SN->getMemOperand()->getFlags(), SN->getAAInfo()); else Store = DAG.getStore(StoreChain, dl, Ex, BasePtr, SN->getPointerInfo().getWithOffset(Idx * Stride), MinAlign(Alignment, Idx * Stride), SN->getMemOperand()->getFlags(), SN->getAAInfo()); if (Idx == 0 && SN->isIndexed()) { assert(SN->getAddressingMode() == ISD::PRE_INC && "Unknown addressing mode on vector store"); Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), SN->getAddressingMode()); } BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, dl, BasePtr.getValueType())); Stores[Idx] = Store; } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); if (SN->isIndexed()) { SDValue RetOps[] = { TF, Stores[0].getValue(1) }; return DAG.getMergeValues(RetOps, dl); } return TF; } assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to // understand how to form the extending load. SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64); Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), Value); MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ops[] = {StoreChain, DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32), Value, FIdx}; SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other); StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, VTs, Ops, MVT::v4i32, PtrInfo); // Move data into the byte array. SDValue Loads[4], LoadChains[4]; for (unsigned i = 0; i < 4; ++i) { unsigned Offset = 4*i; SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset)); LoadChains[i] = Loads[i].getValue(1); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Stores[4]; for (unsigned i = 0; i < 4; ++i) { SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); Stores[i] = DAG.getTruncStore( StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(), SN->getAAInfo()); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); return StoreChain; } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. SDValue RHSSwap = // = vrlw RHS, 16 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); // Shrinkify inputs to v8i16. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); // Low parts multiplied together, generating 32-bit results (we ignore the // top parts). SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, LHS, RHS, DAG, dl, MVT::v4i32); SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); // Shift the high parts up 16 bits. HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG, dl); return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); } else if (Op.getValueType() == MVT::v8i16) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, LHS, RHS, Zero, DAG, dl); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); bool isLittleEndian = Subtarget.isLittleEndian(); // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, LHS, RHS, DAG, dl, MVT::v8i16); EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); // Multiply the odd 8-bit parts, producing 16-bit sums. SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, LHS, RHS, DAG, dl, MVT::v8i16); OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); // Merge the results together. Because vmuleub and vmuloub are // instructions with a big-endian bias, we must reverse the // element numbering and reverse the meaning of "odd" and "even" // when generating little endian code. int Ops[16]; for (unsigned i = 0; i != 8; ++i) { if (isLittleEndian) { Ops[i*2 ] = 2*i; Ops[i*2+1] = 2*i+16; } else { Ops[i*2 ] = 2*i+1; Ops[i*2+1] = 2*i+1+16; } } if (isLittleEndian) return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); else return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { llvm_unreachable("Unknown mul to lower!"); } } /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); // Exception handling lowering. case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); case ISD::BITCAST: return LowerBITCAST(Op, DAG); // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::SREM: case ISD::UREM: return LowerREM(Op, DAG); case ISD::BSWAP: return LowerBSWAP(Op, DAG); case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG); } } void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::READCYCLECOUNTER: { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); Results.push_back(RTB); Results.push_back(RTB.getValue(1)); Results.push_back(RTB.getValue(2)); break; } case ISD::INTRINSIC_W_CHAIN: { if (cast(N->getOperand(1))->getZExtValue() != Intrinsic::ppc_is_decremented_ctr_nonzero) break; assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt)); Results.push_back(NewInt.getValue(1)); break; } case ISD::VAARG: { if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) return; EVT VT = N->getValueType(0); if (VT == MVT::i64) { SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); } return; } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } } //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *Func = Intrinsic::getDeclaration(M, Id); return Builder.CreateCall(Func, {}); } // The mappings for emitLeading/TrailingFence is taken from // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { if (Ord == AtomicOrdering::SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); if (isReleaseOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); return nullptr; } Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. if (isa(Inst) && Subtarget.isPPC64()) return Builder.CreateCall( Intrinsic::getDeclaration( Builder.GetInsertBlock()->getParent()->getParent(), Intrinsic::ppc_cfence, {Inst->getType()}), {Inst}); // FIXME: Can use isync for rmw operation. return callIntrinsic(Builder, Intrinsic::ppc_lwsync); } return nullptr; } MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (AtomicSize) { default: llvm_unreachable("Unexpected size of atomic entity"); case 1: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 2: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 4: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case 8: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); if (CmpOpcode) F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // loopMBB: // l[wd]arx dest, ptr // add r0, dest, incr // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB // For max/min... // loopMBB: // l[wd]arx dest, ptr // cmpl?[wd] incr, dest // bgt exitMBB // loop2MBB: // st[wd]cx. dest, ptr // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); if (CmpOpcode) { // Signed comparisons of byte or halfword values must be sign-extended. if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), ExtReg).addReg(dest); BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) .addReg(incr).addReg(ExtReg); } else BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) .addReg(incr).addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(exitMBB); BB = loop2MBB; } BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; return BB; } MachineBasicBlock * PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, CmpPred); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = Subtarget.isPPC64(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); if (CmpOpcode) F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); unsigned ShiftReg = isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); unsigned MaskReg = RegInfo.createVirtualRegister(RC); unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw incr2, incr, shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // loopMBB: // lwarx tmpDest, ptr // add tmp, tmpDest, incr2 // andc tmp2, tmpDest, mask // and tmp3, tmp, mask // or tmp4, tmp3, tmp2 // stwcx. tmp4, ptr // bne- loopMBB // fallthrough --> exitMBB // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); if (!isLittleEndian) BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) .addReg(incr).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg).addReg(TmpDestReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); if (CmpOpcode) { // For unsigned comparisons, we can directly compare the shifted values. // For signed comparisons we shift and sign extend. unsigned SReg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) .addReg(TmpDestReg).addReg(MaskReg); unsigned ValueReg = SReg; unsigned CmpReg = Incr2Reg; if (CmpOpcode == PPC::CMPW) { ValueReg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) .addReg(SReg).addReg(ShiftReg); unsigned ValueSReg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) .addReg(ValueReg); ValueReg = ValueSReg; CmpReg = incr; } BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) .addReg(CmpReg).addReg(ValueReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(exitMBB); BB = loop2MBB; } BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) .addReg(ShiftReg); return BB; } llvm::MachineBasicBlock * PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // // thisMBB: // SjLjSetup mainMBB // bl mainMBB // v_restore = 1 // b sinkMBB // // mainMBB: // buf[LabelOffset] = LR // v_main = 0 // // sinkMBB: // v = phi(main, restore) // MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MachineInstrBuilder MIB; // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible // with that used by libc, and is not designed to be. Specifically, it // stores only those 'reserved' registers that LLVM does not otherwise // understand how to spill. Also, by convention, by the time this // intrinsic is called, Clang has already stored the frame address in the // first slot of the buffer and stack address in the third. Following the // X86 target code, we'll store the jump address in the second slot. We also // need to save the TOC pointer (R2) to handle jumps between shared // libraries, and that will be stored in the fourth slot. The thread // identifier (R13) is not affected. // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); unsigned LabelReg = MRI.createVirtualRegister(PtrRC); unsigned BufReg = MI.getOperand(1).getReg(); if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); } // Naked functions never have a base pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned BaseReg; if (MF->getFunction().hasFnAttribute(Attribute::Naked)) BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; MIB = BuildMI(*thisMBB, MI, DL, TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) .addMBB(mainMBB); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); // mainMBB: // mainDstReg = 0 MIB = BuildMI(mainMBB, DL, TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); // Store IP if (Subtarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); mainMBB->addSuccessor(sinkMBB); // sinkMBB: BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(PPC::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(thisMBB); MI.eraseFromParent(); return sinkMBB; } MachineBasicBlock * PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; unsigned BP = (PVT == MVT::i64) ? PPC::X30 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 : PPC::R30); MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); unsigned BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored // as necessary). if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) .addImm(0) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) .addImm(0) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload IP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) .addImm(LabelOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload SP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) .addImm(SPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) .addImm(SPOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload BP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) .addImm(BPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) .addImm(BPOffset) .addReg(BufReg); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); } // Jump BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); MI.eraseFromParent(); return MBB; } MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code // will confuse it with a regular operand. Instead, add the dependence // here. setUsesTOCBasePtr(*BB->getParent()); MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); } return emitPatchPoint(MI, BB); } if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { return emitEHSjLjLongJmp(MI, BB); } const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // To "insert" these instructions we actually have to insert their // control-flow patterns. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) { SmallVector Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) Cond.push_back(MI.getOperand(4)); else Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI.getOperand(1)); DebugLoc dl = MI.getDebugLoc(); TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_CC_F4 || MI.getOpcode() == PPC::SELECT_CC_F8 || MI.getOpcode() == PPC::SELECT_CC_F16 || MI.getOpcode() == PPC::SELECT_CC_QFRC || MI.getOpcode() == PPC::SELECT_CC_QSRC || MI.getOpcode() == PPC::SELECT_CC_QBRC || MI.getOpcode() == PPC::SELECT_CC_VRRC || MI.getOpcode() == PPC::SELECT_CC_VSFRC || MI.getOpcode() == PPC::SELECT_CC_VSSRC || MI.getOpcode() == PPC::SELECT_CC_VSRC || MI.getOpcode() == PPC::SELECT_CC_SPE4 || MI.getOpcode() == PPC::SELECT_CC_SPE || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_F16 || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || MI.getOpcode() == PPC::SELECT_SPE || MI.getOpcode() == PPC::SELECT_SPE4 || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_F16 || MI.getOpcode() == PPC::SELECT_SPE4 || MI.getOpcode() == PPC::SELECT_SPE || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } else { unsigned SelectPred = MI.getOperand(4).getImm(); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(SelectPred) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(3).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); } else if (MI.getOpcode() == PPC::ReadTB) { // To read the 64-bit time-base register on a 32-bit target, we read the // two halves. Should the counter have wrapped while it was being read, we // need to try again. // ... // readLoop: // mfspr Rx,TBU # load from TBU // mfspr Ry,TB # load from TB // mfspr Rz,TBU # load from TBU // cmpw crX,Rx,Rz # check if 'old'='new' // bne readLoop # branch if they're not equal // ... MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, readMBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(readMBB); BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); unsigned LoReg = MI.getOperand(0).getReg(); unsigned HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg).addReg(ReadAgainReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); BB->addSuccessor(readMBB); BB->addSuccessor(sinkMBB); } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (MI.getOpcode()) { default: llvm_unreachable("Compare and swap of unknown size"); case PPC::ATOMIC_CMP_SWAP_I8: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I16: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I32: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case PPC::ATOMIC_CMP_SWAP_I64: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned oldval = MI.getOperand(3).getReg(); unsigned newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // loop1MBB: // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. bool is64bit = Subtarget.isPPC64(); bool isLittleEndian = Subtarget.isLittleEndian(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; unsigned dest = MI.getOperand(0).getReg(); unsigned ptrA = MI.getOperand(1).getReg(); unsigned ptrB = MI.getOperand(2).getReg(); unsigned oldval = MI.getOperand(3).getReg(); unsigned newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); unsigned ShiftReg = isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC); unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); unsigned MaskReg = RegInfo.createVirtualRegister(RC); unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw newval2, newval, shift // slw oldval2, oldval,shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // and newval3, newval2, mask // and oldval3, oldval2, mask // loop1MBB: // lwarx tmpDest, ptr // and tmp, tmpDest, mask // cmpw tmp, oldval3 // bne- midMBB // loop2MBB: // andc tmp2, tmpDest, mask // or tmp4, tmp2, newval3 // stwcx. tmp4, ptr // bne- loop1MBB // b exitBB // midMBB: // stwcx. tmpDest, ptr // exitBB: // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); if (!isLittleEndian) BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) .addReg(newval).addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) .addReg(oldval).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) .addReg(Mask3Reg).addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) .addReg(NewVal2Reg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) .addReg(OldVal2Reg).addReg(MaskReg); BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) .addReg(TmpReg).addReg(OldVal3Reg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) .addReg(Tmp2Reg).addReg(NewVal3Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) .addReg(ZeroReg).addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); } else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. unsigned Dest = MI.getOperand(0).getReg(); unsigned Src1 = MI.getOperand(1).getReg(); unsigned Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); // Set rounding mode to round-to-zero. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); // Perform addition. BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || MI.getOpcode() == PPC::ANDIo_1_GT_BIT || MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) ? PPC::ANDIo8 : PPC::ANDIo; bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) .addReg(MI.getOperand(1).getReg()) .addImm(1); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } //===----------------------------------------------------------------------===// // Target Optimization Hooks //===----------------------------------------------------------------------===// static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { // For the estimates, convergence is quadratic, so we essentially double the // number of digits correct after every iteration. For both FRE and FRSQRTE, // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), // this is 2^-14. IEEE float has 23 digits and double has 52 digits. int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) RefinementSteps++; return RefinementSteps; } SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); UseOneConstNR = true; return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default // combiner logic (once the division -> reciprocal/multiply transformation // has taken place). As a result, this matters more for older cores than for // newer ones. // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are two or more FDIVs (for embedded cores with only // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500: case PPC::DIR_E500mc: case PPC::DIR_E5500: return 2; } } // isConsecutiveLSLoc needs to work even if all adds have not yet been // collapsed, and so we need to look through chains of them. static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t& Offset, SelectionDAG &DAG) { if (DAG.isBaseWithConstantOffset(Loc)) { Base = Loc.getOperand(0); Offset += cast(Loc.getOperand(1))->getSExtValue(); // The base might itself be a base plus an offset, and if so, accumulate // that as well. getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); } } static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (VT.getSizeInBits() / 8 != Bytes) return false; SDValue BaseLoc = Base->getBasePtr(); if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FI = cast(Loc)->getIndex(); int BFI = cast(BaseLoc)->getIndex(); int FS = MFI.getObjectSize(FI); int BFS = MFI.getObjectSize(BFI); if (FS != BFS || FS != (int)Bytes) return false; return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); } SDValue Base1 = Loc, Base2 = BaseLoc; int64_t Offset1 = 0, Offset2 = 0; getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; Offset1 = 0; Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; } // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does // not enforce equality of the chain operands. static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (LSBaseSDNode *LS = dyn_cast(N)) { EVT VT = LS->getMemoryVT(); SDValue Loc = LS->getBasePtr(); return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfs: case Intrinsic::ppc_qpx_qvlfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcd: case Intrinsic::ppc_qpx_qvlfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcs: case Intrinsic::ppc_qpx_qvlfcsa: VT = MVT::v2f32; break; case Intrinsic::ppc_qpx_qvlfiwa: case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvw4x_be: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvd2x_be: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_VOID) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_qpx_qvstfd: case Intrinsic::ppc_qpx_qvstfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfs: case Intrinsic::ppc_qpx_qvstfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcd: case Intrinsic::ppc_qpx_qvstfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcs: case Intrinsic::ppc_qpx_qvstfcsa: VT = MVT::v2f32; break; case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_qpx_qvstfiwa: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_vsx_stxvw4x_be: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_stxvd2x_be: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); } return false; } // Return true is there is a nearyby consecutive load to the one provided // (regardless of alignment). We search up and down the chain, looking though // token factors and other loads (but nothing else). As a result, a true result // indicates that it is safe to create a new consecutive load adjacent to the // load provided. static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { SDValue Chain = LD->getChain(); EVT VT = LD->getMemoryVT(); SmallSet LoadRoots; SmallVector Queue(1, Chain.getNode()); SmallSet Visited; // First, search up the chain, branching to follow all token-factor operands. // If we find a consecutive load, then we're done, otherwise, record all // nodes just above the top-level loads and token factors. while (!Queue.empty()) { SDNode *ChainNext = Queue.pop_back_val(); if (!Visited.insert(ChainNext).second) continue; if (MemSDNode *ChainLD = dyn_cast(ChainNext)) { if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; if (!Visited.count(ChainLD->getChain().getNode())) Queue.push_back(ChainLD->getChain().getNode()); } else if (ChainNext->getOpcode() == ISD::TokenFactor) { for (const SDUse &O : ChainNext->ops()) if (!Visited.count(O.getNode())) Queue.push_back(O.getNode()); } else LoadRoots.insert(ChainNext); } // Second, search down the chain, starting from the top-level nodes recorded // in the first phase. These top-level nodes are the nodes just above all // loads and token factors. Starting with their uses, recursively look though // all loads (just the chain uses) and token factors to find a consecutive // load. Visited.clear(); Queue.clear(); for (SmallSet::iterator I = LoadRoots.begin(), IE = LoadRoots.end(); I != IE; ++I) { Queue.push_back(*I); while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); if (!Visited.insert(LoadRoot).second) continue; if (MemSDNode *ChainLD = dyn_cast(LoadRoot)) if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; for (SDNode::use_iterator UI = LoadRoot->use_begin(), UE = LoadRoot->use_end(); UI != UE; ++UI) if (((isa(*UI) && cast(*UI)->getChain().getNode() == LoadRoot) || UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) Queue.push_back(*UI); } } return false; } /// This function is called when we have proved that a SETCC node can be replaced /// by subtraction (and other supporting instructions) so that the result of /// comparison is kept in a GPR instead of CR. This function is purely for /// codegen purposes and has some flags to guide the codegen process. static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); // Zero extend the operands to the largest legal integer. Originally, they // must be of a strictly smaller size. auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), DAG.getConstant(Size, DL, MVT::i32)); auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), DAG.getConstant(Size, DL, MVT::i32)); // Swap if needed. Depends on the condition code. if (Swap) std::swap(Op0, Op1); // Subtract extended integers. auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); // Move the sign bit to the least significant position and zero out the rest. // Now the least significant bit carries the result of original comparison. auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, DAG.getConstant(Size - 1, DL, MVT::i32)); auto Final = Shifted; // Complement the result if needed. Based on the condition code. if (Complement) Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, DAG.getConstant(1, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); } SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // Size of integers being compared has a critical role in the following // analysis, so we prefer to do this when all types are legal. if (!DCI.isAfterLegalizeDAG()) return SDValue(); // If all users of SETCC extend its value to a legal integer type // then we replace SETCC with a subtraction for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::ZERO_EXTEND) return SDValue(); } ISD::CondCode CC = cast(N->getOperand(2))->get(); auto OpSize = N->getOperand(0).getValueSizeInBits(); unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); if (OpSize < Size) { switch (CC) { default: break; case ISD::SETULT: return generateEquivalentSub(N, Size, false, false, DL, DAG); case ISD::SETULE: return generateEquivalentSub(N, Size, true, true, DL, DAG); case ISD::SETUGT: return generateEquivalentSub(N, Size, false, true, DL, DAG); case ISD::SETUGE: return generateEquivalentSub(N, Size, true, false, DL, DAG); } } return SDValue(); } SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); // If we're tracking CR bits, we need to be careful that we don't have: // trunc(binary-ops(zext(x), zext(y))) // or // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) // such that we're unnecessarily moving things into GPRs when it would be // better to keep them in CR bits. // Note that trunc here can be an actual i1 trunc, or can be the effective // truncation that comes from a setcc or select_cc. if (N->getOpcode() == ISD::TRUNCATE && N->getValueType(0) != MVT::i1) return SDValue(); if (N->getOperand(0).getValueType() != MVT::i32 && N->getOperand(0).getValueType() != MVT::i64) return SDValue(); if (N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) { // If we're looking at a comparison, then we need to make sure that the // high bits (all except for the first) don't matter the result. ISD::CondCode CC = cast(N->getOperand( N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); unsigned OpBits = N->getOperand(0).getValueSizeInBits(); if (ISD::isSignedIntSetCC(CC)) { if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) return SDValue(); } else if (ISD::isUnsignedIntSetCC(CC)) { if (!DAG.MaskedValueIsZero(N->getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) : SDValue()); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. KnownBits Op1Known, Op2Known; DAG.computeKnownBits(N->getOperand(0), Op1Known); DAG.computeKnownBits(N->getOperand(1), Op2Known); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) return SDValue(); } } // We now know that the higher-order bits are irrelevant, we just need to // make sure that all of the intermediate operations are bit operations, and // all inputs are extensions. if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC && N->getOperand(0).getOpcode() != ISD::TRUNCATE && N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) return SDValue(); if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && N->getOperand(1).getOpcode() != ISD::AND && N->getOperand(1).getOpcode() != ISD::OR && N->getOperand(1).getOpcode() != ISD::XOR && N->getOperand(1).getOpcode() != ISD::SELECT && N->getOperand(1).getOpcode() != ISD::SELECT_CC && N->getOperand(1).getOpcode() != ISD::TRUNCATE && N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) return SDValue(); SmallVector Inputs; SmallVector BinOps, PromOps; SmallPtrSet Visited; for (unsigned i = 0; i < 2; ++i) { if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(N->getOperand(i))) Inputs.push_back(N->getOperand(i)); else BinOps.push_back(N->getOperand(i)); if (N->getOpcode() == ISD::TRUNCATE) break; } // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by extensions. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not an extension or another binary // operation; we'll abort this transformation. return SDValue(); } } } // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i] || User->getOperand(1) == Inputs[i]) return SDValue(); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i] || User->getOperand(1) == PromOps[i]) return SDValue(); } } } // Replace all inputs with the extension operand. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constants may have users outside the cluster of to-be-promoted nodes, // and so we need to replace those as we do the promotions. if (isa(Inputs[i])) continue; else DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (i1) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. Any intermediate truncations or // extensions disappear. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); if (PromOp.getOpcode() == ISD::TRUNCATE || PromOp.getOpcode() == ISD::SIGN_EXTEND || PromOp.getOpcode() == ISD::ZERO_EXTEND || PromOp.getOpcode() == ISD::ANY_EXTEND) { if (!isa(PromOp.getOperand(0)) && PromOp.getOperand(0).getValueType() != MVT::i1) { // The operand is not yet ready (see comment below). PromOpHandles.emplace_front(PromOp); continue; } SDValue RepValue = PromOp.getOperand(0); if (isa(RepValue)) RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); continue; } unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != MVT::i1) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != MVT::i1)) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If there are any constant inputs, make sure they're replaced now. for (unsigned i = 0; i < 2; ++i) if (isa(Ops[C+i])) Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); } // Now we're left with the initial truncation itself. if (N->getOpcode() == ISD::TRUNCATE) return N->getOperand(0); // Otherwise, this is a comparison. The operands to be compared have just // changed type (to i1), but everything else is the same. return SDValue(N, 0); } SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); // If we're tracking CR bits, we need to be careful that we don't have: // zext(binary-ops(trunc(x), trunc(y))) // or // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) // such that we're unnecessarily moving things into CR bits that can more // efficiently stay in GPRs. Note that if we're not certain that the high // bits are set as required by the final extension, we still may need to do // some masking to get the proper behavior. // This same functionality is important on PPC64 when dealing with // 32-to-64-bit extensions; these occur often when 32-bit values are used as // the return values of functions. Because it is so similar, it is handled // here as well. if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return SDValue(); if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) return SDValue(); if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC) return SDValue(); SmallVector Inputs; SmallVector BinOps(1, N->getOperand(0)), PromOps; SmallPtrSet Visited; // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by truncations. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not a truncation or another binary // operation; we'll abort this transformation. return SDValue(); } } } // The operands of a select that must be truncated when the select is // promoted because the operand is actually part of the to-be-promoted set. DenseMap SelectTruncOp[2]; // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == Inputs[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == PromOps[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } unsigned PromBits = N->getOperand(0).getValueSizeInBits(); bool ReallyNeedsExt = false; if (N->getOpcode() != ISD::ANY_EXTEND) { // If all of the inputs are not already sign/zero extended, then // we'll still need to do that at the end. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; unsigned OpBits = Inputs[i].getOperand(0).getValueSizeInBits(); assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); if ((N->getOpcode() == ISD::ZERO_EXTEND && !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-PromBits))) || (N->getOpcode() == ISD::SIGN_EXTEND && DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < (OpBits-(PromBits-1)))) { ReallyNeedsExt = true; break; } } } // Replace all inputs, either with the truncation operand, or a // truncation or extension to the final output type. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constant inputs need to be replaced with the to-be-promoted nodes that // use them because they might have users outside of the cluster of // promoted nodes. if (isa(Inputs[i])) continue; SDValue InSrc = Inputs[i].getOperand(0); if (Inputs[i].getValueType() == N->getValueType(0)) DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); else if (N->getOpcode() == ISD::SIGN_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); else if (N->getOpcode() == ISD::ZERO_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); else DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (promoted) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != N->getValueType(0)) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } // For SELECT and SELECT_CC nodes, we do a similar check for any // to-be-promoted comparison inputs. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { if ((SelectTruncOp[0].count(PromOp.getNode()) && PromOp.getOperand(0).getValueType() != N->getValueType(0)) || (SelectTruncOp[1].count(PromOp.getNode()) && PromOp.getOperand(1).getValueType() != N->getValueType(0))) { PromOpHandles.emplace_front(PromOp); continue; } } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If this node has constant inputs, then they'll need to be promoted here. for (unsigned i = 0; i < 2; ++i) { if (!isa(Ops[C+i])) continue; if (Ops[C+i].getValueType() == N->getValueType(0)) continue; if (N->getOpcode() == ISD::SIGN_EXTEND) Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else if (N->getOpcode() == ISD::ZERO_EXTEND) Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); } // If we've promoted the comparison inputs of a SELECT or SELECT_CC, // truncate them again to the original value type. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); if (SI0 != SelectTruncOp[0].end()) Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); if (SI1 != SelectTruncOp[1].end()) Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); } DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); } // Now we're left with the initial extension itself. if (!ReallyNeedsExt) return N->getOperand(0); // To zero extend, just mask off everything except for the first bit (in the // i1 case). if (N->getOpcode() == ISD::ZERO_EXTEND) return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), DAG.getConstant(APInt::getLowBitsSet( N->getValueSizeInBits(0), PromBits), dl, N->getValueType(0))); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); return DAG.getNode( ISD::SRA, dl, N->getValueType(0), DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), ShiftCst); } // Is this an extending load from an f32 to an f64? static bool isFPExtLoad(SDValue Op) { if (LoadSDNode *LD = dyn_cast(Op.getNode())) return LD->getExtensionType() == ISD::EXTLOAD && Op.getValueType() == MVT::f64; return false; } /// Reduces the number of fp-to-int conversion when building a vector. /// /// If this vector is built out of floating to integer conversions, /// transform it to a vector built out of floating point values followed by a /// single floating to integer conversion of the vector. /// Namely (build_vector (fptosi $A), (fptosi $B), ...) /// becomes (fptosi (build_vector ($A, $B, ...))) SDValue PPCTargetLowering:: combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue FirstInput = N->getOperand(0); assert(FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."); // This combine happens after legalization so the fp_to_[su]i nodes are // already converted to PPCSISD nodes. unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); if (FirstConversion == PPCISD::FCTIDZ || FirstConversion == PPCISD::FCTIDUZ || FirstConversion == PPCISD::FCTIWZ || FirstConversion == PPCISD::FCTIWUZ) { bool IsSplat = true; bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || FirstConversion == PPCISD::FCTIWUZ; EVT SrcVT = FirstInput.getOperand(0).getValueType(); SmallVector Ops; EVT TargetVT = N->getValueType(0); for (int i = 0, e = N->getNumOperands(); i < e; ++i) { SDValue NextOp = N->getOperand(i); if (NextOp.getOpcode() != PPCISD::MFVSR) return SDValue(); unsigned NextConversion = NextOp.getOperand(0).getOpcode(); if (NextConversion != FirstConversion) return SDValue(); // If we are converting to 32-bit integers, we need to add an FP_ROUND. // This is not valid if the input was originally double precision. It is // also not profitable to do unless this is an extending load in which // case doing this combine will allow us to combine consecutive loads. if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0))) return SDValue(); if (N->getOperand(i) != FirstInput) IsSplat = false; } // If this is a splat, we leave it as-is since there will be only a single // fp-to-int conversion followed by a splat of the integer. This is better // for 32-bit and smaller ints and neutral for 64-bit ints. if (IsSplat) return SDValue(); // Now that we know we have the right type of node, get its operands for (int i = 0, e = N->getNumOperands(); i < e; ++i) { SDValue In = N->getOperand(i).getOperand(0); if (Is32Bit) { // For 32-bit values, we need to add an FP_ROUND node (if we made it // here, we know that all inputs are extending loads so this is safe). if (In.isUndef()) Ops.push_back(DAG.getUNDEF(SrcVT)); else { SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0), DAG.getIntPtrConstant(1, dl)); Ops.push_back(Trunc); } } else Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); } unsigned Opcode; if (FirstConversion == PPCISD::FCTIDZ || FirstConversion == PPCISD::FCTIWZ) Opcode = ISD::FP_TO_SINT; else Opcode = ISD::FP_TO_UINT; EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); return DAG.getNode(Opcode, dl, TargetVT, BV); } return SDValue(); } /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load /// of the vector type if the loads are consecutive. If the loads are /// consecutive but in descending order, a shuffle is added at the end /// to reorder the vector. static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SDLoc dl(N); bool InputsAreConsecutiveLoads = true; bool InputsAreReverseConsecutive = true; unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8; SDValue FirstInput = N->getOperand(0); bool IsRoundOfExtLoad = false; if (FirstInput.getOpcode() == ISD::FP_ROUND && FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { LoadSDNode *LD = dyn_cast(FirstInput.getOperand(0)); IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; } // Not a build vector of (possibly fp_rounded) loads. if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) return SDValue(); for (int i = 1, e = N->getNumOperands(); i < e; ++i) { // If any inputs are fp_round(extload), they all must be. if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) return SDValue(); SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : N->getOperand(i); if (NextInput.getOpcode() != ISD::LOAD) return SDValue(); SDValue PreviousInput = IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); LoadSDNode *LD1 = dyn_cast(PreviousInput); LoadSDNode *LD2 = dyn_cast(NextInput); // If any inputs are fp_round(extload), they all must be. if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) return SDValue(); if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) InputsAreConsecutiveLoads = false; if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) InputsAreReverseConsecutive = false; // Exit early if the loads are neither consecutive nor reverse consecutive. if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) return SDValue(); } assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && "The loads cannot be both consecutive and reverse consecutive."); SDValue FirstLoadOp = IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; SDValue LastLoadOp = IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : N->getOperand(N->getNumOperands()-1); LoadSDNode *LD1 = dyn_cast(FirstLoadOp); LoadSDNode *LDL = dyn_cast(LastLoadOp); if (InputsAreConsecutiveLoads) { assert(LD1 && "Input needs to be a LoadSDNode."); return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), LD1->getAlignment()); } if (InputsAreReverseConsecutive) { assert(LDL && "Input needs to be a LoadSDNode."); SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(), LDL->getPointerInfo(), LDL->getAlignment()); SmallVector Ops; for (int i = N->getNumOperands() - 1; i >= 0; i--) Ops.push_back(i); return DAG.getVectorShuffle(N->getValueType(0), dl, Load, DAG.getUNDEF(N->getValueType(0)), Ops); } return SDValue(); } // This function adds the required vector_shuffle needed to get // the elements of the vector extract in the correct position // as specified by the CorrectElems encoding. static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems) { SDLoc dl(N); unsigned NumElems = Input.getValueType().getVectorNumElements(); SmallVector ShuffleMask(NumElems, -1); // Knowing the element indices being extracted from the original // vector and the order in which they're being inserted, just put // them at element indices required for the instruction. for (unsigned i = 0; i < N->getNumOperands(); i++) { if (DAG.getDataLayout().isLittleEndian()) ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; else ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; CorrectElems = CorrectElems >> 8; Elems = Elems >> 8; } SDValue Shuffle = DAG.getVectorShuffle(Input.getValueType(), dl, Input, DAG.getUNDEF(Input.getValueType()), ShuffleMask); EVT Ty = N->getValueType(0); SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); return BV; } // Look for build vector patterns where input operands come from sign // extended vector_extract elements of specific indices. If the correct indices // aren't used, add a vector shuffle to fix up the indices and create a new // PPCISD:SExtVElems node which selects the vector sign extend instructions // during instruction selection. static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { // This array encodes the indices that the vector sign extend instructions // extract from when extending from one type to another for both BE and LE. // The right nibble of each byte corresponds to the LE incides. // and the left nibble of each byte corresponds to the BE incides. // For example: 0x3074B8FC byte->word // For LE: the allowed indices are: 0x0,0x4,0x8,0xC // For BE: the allowed indices are: 0x3,0x7,0xB,0xF // For example: 0x000070F8 byte->double word // For LE: the allowed indices are: 0x0,0x8 // For BE: the allowed indices are: 0x7,0xF uint64_t TargetElems[] = { 0x3074B8FC, // b->w 0x000070F8, // b->d 0x10325476, // h->w 0x00003074, // h->d 0x00001032, // w->d }; uint64_t Elems = 0; int Index; SDValue Input; auto isSExtOfVecExtract = [&](SDValue Op) -> bool { if (!Op) return false; if (Op.getOpcode() != ISD::SIGN_EXTEND && Op.getOpcode() != ISD::SIGN_EXTEND_INREG) return false; // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value // of the right width. SDValue Extract = Op.getOperand(0); if (Extract.getOpcode() == ISD::ANY_EXTEND) Extract = Extract.getOperand(0); if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; ConstantSDNode *ExtOp = dyn_cast(Extract.getOperand(1)); if (!ExtOp) return false; Index = ExtOp->getZExtValue(); if (Input && Input != Extract.getOperand(0)) return false; if (!Input) Input = Extract.getOperand(0); Elems = Elems << 8; Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; Elems |= Index; return true; }; // If the build vector operands aren't sign extended vector extracts, // of the same input vector, then return. for (unsigned i = 0; i < N->getNumOperands(); i++) { if (!isSExtOfVecExtract(N->getOperand(i))) { return SDValue(); } } // If the vector extract indicies are not correct, add the appropriate // vector_shuffle. int TgtElemArrayIdx; int InputSize = Input.getValueType().getScalarSizeInBits(); int OutputSize = N->getValueType(0).getScalarSizeInBits(); if (InputSize + OutputSize == 40) TgtElemArrayIdx = 0; else if (InputSize + OutputSize == 72) TgtElemArrayIdx = 1; else if (InputSize + OutputSize == 48) TgtElemArrayIdx = 2; else if (InputSize + OutputSize == 80) TgtElemArrayIdx = 3; else if (InputSize + OutputSize == 96) TgtElemArrayIdx = 4; else return SDValue(); uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; CorrectElems = DAG.getDataLayout().isLittleEndian() ? CorrectElems & 0x0F0F0F0F0F0F0F0F : CorrectElems & 0xF0F0F0F0F0F0F0F0; if (Elems != CorrectElems) { return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); } // Regular lowering will catch cases where a shuffle is not needed. return SDValue(); } SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); if (!Subtarget.hasVSX()) return SDValue(); // The target independent DAG combiner will leave a build_vector of // float-to-int conversions intact. We can generate MUCH better code for // a float-to-int conversion of a vector of floats. SDValue FirstInput = N->getOperand(0); if (FirstInput.getOpcode() == PPCISD::MFVSR) { SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); if (Reduced) return Reduced; } // If we're building a vector out of consecutive loads, just load that // vector type. SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); if (Reduced) return Reduced; // If we're building a vector out of extended elements from another vector // we have P9 vector integer extend instructions. The code assumes legal // input types (i.e. it can't handle things like v4i16) so do not run before // legalization. if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) { Reduced = combineBVOfVecSExt(N, DAG); if (Reduced) return Reduced; } if (N->getValueType(0) != MVT::v2f64) return SDValue(); // Looking for: // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) if (FirstInput.getOpcode() != ISD::SINT_TO_FP && FirstInput.getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) return SDValue(); SDValue Ext1 = FirstInput.getOperand(0); SDValue Ext2 = N->getOperand(1).getOperand(0); if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); ConstantSDNode *Ext1Op = dyn_cast(Ext1.getOperand(1)); ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); if (!Ext1Op || !Ext2Op) return SDValue(); if (Ext1.getValueType() != MVT::i32 || Ext2.getValueType() != MVT::i32) if (Ext1.getOperand(0) != Ext2.getOperand(0)) return SDValue(); int FirstElem = Ext1Op->getZExtValue(); int SecondElem = Ext2Op->getZExtValue(); int SubvecIdx; if (FirstElem == 0 && SecondElem == 1) SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; else if (FirstElem == 2 && SecondElem == 3) SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; else return SDValue(); SDValue SrcVec = Ext1.getOperand(0); auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; return DAG.getNode(NodeType, dl, MVT::v2f64, SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"); if (useSoftFloat() || !Subtarget.has64BitSupport()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Op(N, 0); // Don't handle ppc_fp128 here or conversions that are out-of-range capable // from the hardware. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) || Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64)) return SDValue(); SDValue FirstOperand(Op.getOperand(0)); bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && (FirstOperand.getValueType() == MVT::i8 || FirstOperand.getValueType() == MVT::i16); if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { bool Signed = N->getOpcode() == ISD::SINT_TO_FP; bool DstDouble = Op.getValueType() == MVT::f64; unsigned ConvOp = Signed ? (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); SDValue WidthConst = DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, dl, false); LoadSDNode *LDN = cast(FirstOperand.getNode()); SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i8, LDN->getMemOperand()); // For signed conversion, we need to sign-extend the value in the VSR if (Signed) { SDValue ExtOps[] = { Ld, WidthConst }; SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); } else return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); } // For i32 intermediate values, unfortunately, the conversion functions // leave the upper 32 bits of the value are undefined. Within the set of // scalar instructions, we have no method for zero- or sign-extending the // value. Thus, we cannot handle i32 intermediate values here. if (Op.getOperand(0).getValueType() == MVT::i32) return SDValue(); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; // If we're converting from a float, to an int, and back to a float again, // then we don't need the store/load pair at all. if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && Subtarget.hasFPCVT()) || (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { SDValue Src = Op.getOperand(0).getOperand(0); if (Src.getValueType() == MVT::f32) { Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); DCI.AddToWorklist(Src.getNode()); } else if (Src.getValueType() != MVT::f64) { // Make sure that we don't pick up a ppc_fp128 source value. return SDValue(); } unsigned FCTOp = Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); DCI.AddToWorklist(FP.getNode()); } return FP; } return SDValue(); } // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX load"); case ISD::LOAD: { LoadSDNode *LD = cast(N); Chain = LD->getChain(); Base = LD->getBasePtr(); MMO = LD->getMemOperand(); // If the MMO suggests this isn't a load of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_W_CHAIN: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Similarly to the store case below, Intrin->getBasePtr() doesn't get // us what we want. Get operand 2 instead. Base = Intrin->getOperand(2); MMO = Intrin->getMemOperand(); break; } } MVT VecTy = N->getValueType(0).getSimpleVT(); // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is // aligned and the type is a vector with elements up to 4 bytes if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) && VecTy.getScalarSizeInBits() <= 32 ) { return SDValue(); } SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), LoadOps, MVT::v2f64, MMO); DCI.AddToWorklist(Load.getNode()); Chain = Load.getValue(1); SDValue Swap = DAG.getNode( PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); DCI.AddToWorklist(Swap.getNode()); // Add a bitcast if the resulting load type doesn't match v2f64. if (VecTy != MVT::v2f64) { SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); DCI.AddToWorklist(N.getNode()); // Package {bitcast value, swap's chain} to match Load's shape. return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), N, Swap.getValue(1)); } return Swap; } // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for // builtins) into stores with swaps. SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; unsigned SrcOpnd; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX store"); case ISD::STORE: { StoreSDNode *ST = cast(N); Chain = ST->getChain(); Base = ST->getBasePtr(); MMO = ST->getMemOperand(); SrcOpnd = 1; // If the MMO suggests this isn't a store of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_VOID: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Intrin->getBasePtr() oddly does not get what we want. Base = Intrin->getOperand(3); MMO = Intrin->getMemOperand(); SrcOpnd = 2; break; } } SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is // aligned and the type is a vector with elements up to 4 bytes if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) && VecTy.getScalarSizeInBits() <= 32 ) { return SDValue(); } // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); DCI.AddToWorklist(Src.getNode()); } SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); DCI.AddToWorklist(Swap.getNode()); Chain = Swap.getValue(1); SDValue StoreOps[] = { Chain, Swap, Base }; SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, DAG.getVTList(MVT::Other), StoreOps, VecTy, MMO); DCI.AddToWorklist(Store.getNode()); return Store; } // Handle DAG combine for STORE (FP_TO_INT F). SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); unsigned Opcode = N->getOperand(1).getOpcode(); assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && "Not a FP_TO_INT Instruction!"); SDValue Val = N->getOperand(1).getOperand(0); EVT Op1VT = N->getOperand(1).getValueType(); EVT ResVT = Val.getValueType(); // Floating point types smaller than 32 bits are not legal on Power. if (ResVT.getScalarSizeInBits() < 32) return SDValue(); // Only perform combine for conversion to i64/i32 or power9 i16/i8. bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32 || Op1VT == MVT::i64 || (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() || cast(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) return SDValue(); // Extend f32 values to f64 if (ResVT.getScalarSizeInBits() == 32) { Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); } // Set signed or unsigned conversion opcode. unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ? PPCISD::FP_TO_SINT_IN_VSR : PPCISD::FP_TO_UINT_IN_VSR; Val = DAG.getNode(ConvOpcode, dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); // Set number of bytes being converted. unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8; SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), DAG.getIntPtrConstant(ByteSize, dl, false), DAG.getValueType(Op1VT) }; Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); switch (N->getOpcode()) { default: break; case ISD::SHL: return combineSHL(N, DCI); case ISD::SRA: return combineSRA(N, DCI); case ISD::SRL: return combineSRL(N, DCI); case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); break; case PPCISD::SRL: if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. return N->getOperand(0); break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); unsigned Opcode = N->getOperand(1).getOpcode(); if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) { SDValue Val= combineStoreFPToInt(N, DCI); if (Val) return Val; } // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (Op1VT == MVT::i32 || Op1VT == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) { // STBRX can only handle simple types. EVT mVT = cast(N)->getMemoryVT(); if (mVT.isExtended()) break; SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); // If the type of BSWAP operand is wider than stored memory width // it need to be shifted to the right side before STBRX. if (Op1VT.bitsGT(mVT)) { int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, DAG.getConstant(Shift, dl, MVT::i32)); // Need to truncate if this is a bswap of i64 stored as i32/i16. if (Op1VT == MVT::i64) BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); } SDValue Ops[] = { N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } // STORE Constant:i32<0> -> STORE Constant:i64<0> // So it can increase the chance of CSE constant construction. if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() && isa(N->getOperand(1)) && Op1VT == MVT::i32) { // Need to sign-extended to 64-bits to handle negative values. EVT MemVT = cast(N)->getMemoryVT(); uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), MemVT.getSizeInBits()); SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64); // DAG.getTruncStore() can't be used here because it doesn't accept // the general (base + offset) addressing mode. // So we use UpdateNodeOperands and setTruncatingStore instead. DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2), N->getOperand(3)); cast(N)->setTruncatingStore(true); return SDValue(N, 0); } // For little endian, VSX stores require generating xxswapd/lxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Op1VT.isSimple()) { MVT StoreVT = Op1VT.getSimpleVT(); if (Subtarget.needsSwapsForVSXMemOps() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } break; } case ISD::LOAD: { LoadSDNode *LD = cast(N); EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); if (Subtarget.needsSwapsForVSXMemOps() && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); } // We sometimes end up with a 64-bit integer load, from which we extract // two single-precision floating-point numbers. This happens with // std::complex, and other similar structures, because of the way we // canonicalize structure copies. However, if we lack direct moves, // then the final bitcasts from the extracted integer values to the // floating-point numbers turn into store/load pairs. Even with direct moves, // just loading the two floating-point numbers is likely better. auto ReplaceTwoFloatLoad = [&]() { if (VT != MVT::i64) return false; if (LD->getExtensionType() != ISD::NON_EXTLOAD || LD->isVolatile()) return false; // We're looking for a sequence like this: // t13: i64,ch = load t0, t6, undef:i64 // t16: i64 = srl t13, Constant:i32<32> // t17: i32 = truncate t16 // t18: f32 = bitcast t17 // t19: i32 = truncate t13 // t20: f32 = bitcast t19 if (!LD->hasNUsesOfValue(2, 0)) return false; auto UI = LD->use_begin(); while (UI.getUse().getResNo() != 0) ++UI; SDNode *Trunc = *UI++; while (UI.getUse().getResNo() != 0) ++UI; SDNode *RightShift = *UI; if (Trunc->getOpcode() != ISD::TRUNCATE) std::swap(Trunc, RightShift); if (Trunc->getOpcode() != ISD::TRUNCATE || Trunc->getValueType(0) != MVT::i32 || !Trunc->hasOneUse()) return false; if (RightShift->getOpcode() != ISD::SRL || !isa(RightShift->getOperand(1)) || RightShift->getConstantOperandVal(1) != 32 || !RightShift->hasOneUse()) return false; SDNode *Trunc2 = *RightShift->use_begin(); if (Trunc2->getOpcode() != ISD::TRUNCATE || Trunc2->getValueType(0) != MVT::i32 || !Trunc2->hasOneUse()) return false; SDNode *Bitcast = *Trunc->use_begin(); SDNode *Bitcast2 = *Trunc2->use_begin(); if (Bitcast->getOpcode() != ISD::BITCAST || Bitcast->getValueType(0) != MVT::f32) return false; if (Bitcast2->getOpcode() != ISD::BITCAST || Bitcast2->getValueType(0) != MVT::f32) return false; if (Subtarget.isLittleEndian()) std::swap(Bitcast, Bitcast2); // Bitcast has the second float (in memory-layout order) and Bitcast2 // has the first one. SDValue BasePtr = LD->getBasePtr(); if (LD->isIndexed()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, LD->getOffset()); } auto MMOFlags = LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), MMOFlags, LD->getAAInfo()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(4, dl)); SDValue FloatLoad2 = DAG.getLoad( MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, LD->getPointerInfo().getWithOffset(4), MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); if (LD->isIndexed()) { // Note that DAGCombine should re-form any pre-increment load(s) from // what is produced here if that makes sense. DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); } DCI.CombineTo(Bitcast2, FloatLoad); DCI.CombineTo(Bitcast, FloatLoad2); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), SDValue(FloatLoad2.getNode(), 1)); return true; }; if (ReplaceTwoFloatLoad()) return SDValue(N, 0); EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); if (LD->isUnindexed() && VT.isVector() && ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && // P8 and later hardware should just use LOAD. !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v4f32)) || (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && LD->getAlignment() >= ScalarABIAlignment)) && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec or QPX load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); // This implements the loading of unaligned vectors as described in // the venerable Apple Velocity Engine overview. Specifically: // https://developer.apple.com/hardwaredrivers/ve/alignment.html // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html // // The general idea is to expand a sequence of one or more unaligned // loads into an alignment-based permutation-control instruction (lvsl // or lvsr), a series of regular vector loads (which always truncate // their input address to an aligned address), and a series of // permutations. The results of these permutations are the requested // loaded values. The trick is that the last "extra" load is not taken // from the address you might suspect (sizeof(vector) bytes after the // last requested load), but rather sizeof(vector) - 1 bytes after the // last requested vector. The point of this is to avoid a page fault if // the base address happened to be aligned. This works because if the // base address is aligned, then adding less than a full vector length // will cause the last vector in the sequence to be (re)loaded. // Otherwise, the next vector will be fetched as you might suspect was // necessary. // We might be able to reuse the permutation generation from // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. Intrinsic::ID Intr, IntrLD, IntrPerm; MVT PermCntlTy, PermTy, LDTy; if (Subtarget.hasAltivec()) { Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl; IntrLD = Intrinsic::ppc_altivec_lvx; IntrPerm = Intrinsic::ppc_altivec_vperm; PermCntlTy = MVT::v16i8; PermTy = MVT::v4i32; LDTy = MVT::v4i32; } else { Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : Intrinsic::ppc_qpx_qvlpcls; IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : Intrinsic::ppc_qpx_qvlfs; IntrPerm = Intrinsic::ppc_qpx_qvfperm; PermCntlTy = MVT::v4f64; PermTy = MVT::v4f64; LDTy = MemVT.getSimpleVT(); } SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered // on the original address. If the address is unaligned, we might start // reading up to (sizeof(vector)-1) bytes below the address of the // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = MF.getMachineMemOperand(LD->getMemOperand(), -(long)MemVT.getStoreSize()+1, 2*MemVT.getStoreSize()-1); // Create the new base load. SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the // alignment), and the value of IncValue (which is actually used to // increment the pointer value) are different! This is because we // require the next load to appear to be aligned, even though it // is actually offset from the base pointer by a lesser amount. int IncOffset = VT.getSizeInBits() / 8; int IncValue = IncOffset; // Walk (both up and down) the chain looking for another load at the real // (aligned) offset (the alignment of the other load does not matter in // this case). If found, then do not use the offset reduction trick, as // that will prevent the loads from being later combined (as they would // otherwise be duplicates). if (!findConsecutiveLoad(LD, DAG)) --IncValue; SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); // Because vperm has a big-endian bias, we must reverse the order // of the input vectors and complement the permute control vector // when generating little endian code. We have already handled the // latter by using lvsr instead of lvsl, so just reverse BaseLoad // and ExtraLoad here. SDValue Perm; if (isLittleEndian) Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != PermTy) Perm = Subtarget.hasAltivec() ? DAG.getNode(ISD::BITCAST, dl, VT, Perm) : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX DAG.getTargetConstant(1, dl, MVT::i64)); // second argument is 1 because this rounding // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. DCI.CombineTo(N, Perm, TF); return SDValue(N, 0); } } break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); if ((IID == Intr || IID == Intrinsic::ppc_qpx_qvlpcld || IID == Intrinsic::ppc_qpx_qvlpcls) && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; if (DAG.MaskedValueIsZero(Add->getOperand(1), APInt::getAllOnesValue(Bits /* alignment */) .zext(Add.getScalarValueSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. return SDValue(*UI, 0); } } } if (isa(Add->getOperand(1))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::ADD && isa(UI->getOperand(1)) && (cast(Add->getOperand(1))->getZExtValue() - cast(UI->getOperand(1))->getZExtValue()) % (1ULL << Bits) == 0) { SDNode *OtherAdd = *UI; for (SDNode::use_iterator VI = OtherAdd->use_begin(), VE = OtherAdd->use_end(); VI != VE; ++VI) { if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(VI->getOperand(0))->getZExtValue() == IID) { return SDValue(*VI, 0); } } } } } } } break; case ISD::INTRINSIC_W_CHAIN: // For little endian, VSX loads require generating lxvd2x/xxswapd. // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: return expandVSXLoadForLE(N, DCI); } } break; case ISD::INTRINSIC_VOID: // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_stxvw4x: case Intrinsic::ppc_vsx_stxvd2x: return expandVSXStoreForLE(N, DCI); } } break; case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr DAG.getValueType(N->getValueType(0)) // VT }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, DAG.getVTList(N->getValueType(0) == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } break; case PPCISD::VCMP: // If a VCMPo node already exists with exactly the same operands as this // node, use its result instead of this node (VCMPo computes both a CR6 and // a normal output). // if (!N->getOperand(0).hasOneUse() && !N->getOperand(1).hasOneUse() && !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMPo's that match. SDNode *VCMPoNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); UI != E; ++UI) if (UI->getOpcode() == PPCISD::VCMPo && UI->getOperand(1) == N->getOperand(1) && UI->getOperand(2) == N->getOperand(2) && UI->getOperand(0) == N->getOperand(0)) { VCMPoNode = *UI; break; } // If there is no VCMPo node, or if the flag value has a single use, don't // transform this. if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) break; // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPoNode->use_begin(); FlagUser == nullptr; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { FlagUser = User; break; } } } // If the user is a MFOCRF instruction, we know this is safe. // Otherwise we give up for right now. if (FlagUser->getOpcode() == PPCISD::MFOCRF) return SDValue(VCMPoNode, 0); } break; case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(Cond.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero) { // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); assert(Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, N->getOperand(0), Target); } } break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This // lowering is done pre-legalize, because the legalizer lowers the predicate // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); // Sometimes the promoted value of the intrinsic is ANDed by some non-zero // value. If so, pass-through the AND to get to the intrinsic. if (LHS.getOpcode() == ISD::AND && LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(LHS.getOperand(1)) && !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"); unsigned Val = cast(RHS)->getZExtValue(); bool isBDNZ = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); assert(LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } int CompareOpc; bool isDot; if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know // that the condition is never/always true. unsigned Val = cast(RHS)->getZExtValue(); if (Val != 0 && Val != 1) { if (CC == ISD::SETEQ) // Cond never true, remove branch. return N->getOperand(0); // Always !=, turn it into an unconditional branch. return DAG.getNode(ISD::BR, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; switch (cast(LHS.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Branch on the value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; break; case 1: // Branch on the inverted value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; break; case 2: // Branch on the value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; break; case 3: // Branch on the inverted value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; break; } return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), DAG.getConstant(CompOpc, dl, MVT::i32), DAG.getRegister(PPC::CR6, MVT::i32), N->getOperand(4), CompNode.getValue(1)); } break; } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); } return SDValue(); } SDValue PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const { // fold (sdiv X, pow2) EVT VT = N->getValueType(0); if (VT == MVT::i64 && !Subtarget.isPPC64()) return SDValue(); if ((VT != MVT::i32 && VT != MVT::i64) || !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) return SDValue(); SDLoc DL(N); SDValue N0 = N->getOperand(0); bool IsNegPow2 = (-Divisor).isPowerOf2(); unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); Created.push_back(Op.getNode()); if (IsNegPow2) { Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); Created.push_back(Op.getNode()); } return Op; } //===----------------------------------------------------------------------===// // Inline Assembly Support //===----------------------------------------------------------------------===// void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { Known.resetAll(); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. if (cast(Op.getOperand(2))->getVT() == MVT::i16) Known.Zero = 0xFFFF0000; break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast(Op.getOperand(0))->getZExtValue()) { default: break; case Intrinsic::ppc_altivec_vcmpbfp_p: case Intrinsic::ppc_altivec_vcmpeqfp_p: case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: Known.Zero = ~1U; // All bits but the low one are known to be zero. break; } } } } unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: { if (!ML) break; const PPCInstrInfo *TII = Subtarget.getInstrInfo(); // For small loops (between 5 and 8 instructions), align to a 32-byte // boundary so that the entire loop fits in one instruction-cache line. uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { LoopSize += TII->getInstSizeInBytes(*J); if (LoopSize > 32) break; } if (LoopSize > 16 && LoopSize <= 32) return 5; break; } } return TargetLowering::getPrefLoopAlignment(ML); } /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'b': case 'r': case 'f': case 'd': case 'v': case 'y': return C_RegisterClass; case 'Z': // FIXME: While Z does indicate a memory constraint, it specifically // indicates an r+r address (used in conjunction with the 'y' modifier // in the replacement string). Currently, we're forcing the base // register to be r0 in the asm printer (which is interpreted as zero) // and forming the complete address in the second register. This is // suboptimal. return C_Memory; } } else if (Constraint == "wc") { // individual CR bits. return C_RegisterClass; } else if (Constraint == "wa" || Constraint == "wd" || Constraint == "wf" || Constraint == "ws") { return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight PPCTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) return CW_Register; // an individual CR bit. else if ((StringRef(constraint) == "wa" || StringRef(constraint) == "wd" || StringRef(constraint) == "wf") && type->isVectorTy()) return CW_Register; else if (StringRef(constraint) == "ws" && type->isDoubleTy()) return CW_Register; switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'b': if (type->isIntegerTy()) weight = CW_Register; break; case 'f': if (type->isFloatTy()) weight = CW_Register; break; case 'd': if (type->isDoubleTy()) weight = CW_Register; break; case 'v': if (type->isVectorTy()) weight = CW_Register; break; case 'y': weight = CW_Register; break; case 'Z': weight = CW_Memory; break; } return weight; } std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); // 'd' and 'f' constraints are both defined to be "the floating point // registers", where one is for 32-bit and the other for 64-bit. We don't // really care overly much here so just give them all the same reg classes. case 'd': case 'f': if (Subtarget.hasSPE()) { if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::SPE4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::SPERCRegClass); } else { if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); if (VT == MVT::v4f64 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); } break; case 'v': if (VT == MVT::v4f64 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); if (Subtarget.hasAltivec()) return std::make_pair(0U, &PPC::VRRCRegClass); break; case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } } else if (Constraint == "wc" && Subtarget.useCRBits()) { // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); } else if ((Constraint == "wa" || Constraint == "wd" || Constraint == "wf") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); } else if (Constraint == "ws" && Subtarget.hasVSX()) { if (VT == MVT::f32 && Subtarget.hasP8Vector()) return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers // (which we call X[0-9]+). If a 64-bit value has been requested, and a // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent // register. // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) return std::make_pair(TRI->getMatchingSuperReg(R.first, PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { R.first = PPC::CR0; R.second = &PPC::CRRCRegClass; } return R; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; char Letter = Constraint[0]; switch (Letter) { default: break; case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': { ConstantSDNode *CST = dyn_cast(Op); if (!CST) return; // Must be an immediate to match. SDLoc dl(Op); int64_t Value = CST->getSExtValue(); EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative // numbers are printed as such. switch (Letter) { default: llvm_unreachable("Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. if (isInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'J': // "J" is a constant with only the high-order 16 bits nonzero. if (isShiftedUInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. if (isShiftedInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'K': // "K" is a constant with only the low-order 16 bits nonzero. if (isUInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'M': // "M" is a constant that is greater than 31. if (Value > 31) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'N': // "N" is a positive constant that is an exact power of two. if (Value > 0 && isPowerOf2_64(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'O': // "O" is the constant zero. if (Value == 0) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'P': // "P" is a constant whose negation is a signed 16-bit constant. if (isInt<16>(-Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; } break; } } if (Result.getNode()) { Ops.push_back(Result); return; } // Handle standard constraint letters. TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; // No global is ever allowed as a base. if (AM.BaseGV) return false; // PPC only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. return false; // Otherwise we have r+r or r+i. break; case 2: if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. return false; // Allow 2*r as r+r. break; default: // No other scales are supported. return false; } return true; } SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to // the stack. PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo()); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; if (MF.getFunction().hasFnAttribute(Attribute::Naked)) FrameReg = isPPC64 ? PPC::X1 : PPC::R1; else FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) report_fatal_error("Invalid register global variable type"); bool is64Bit = isPPC64 && VT == MVT::i64; unsigned Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : (is64Bit ? PPC::X13 : PPC::R13)) .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. return false; } bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfs: case Intrinsic::ppc_qpx_qvlfcd: case Intrinsic::ppc_qpx_qvlfcs: case Intrinsic::ppc_qpx_qvlfiwa: case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: case Intrinsic::ppc_altivec_lvehx: case Intrinsic::ppc_altivec_lvewx: case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvw4x: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfd: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfs: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcd: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcs: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::ppc_qpx_qvlfda: case Intrinsic::ppc_qpx_qvlfsa: case Intrinsic::ppc_qpx_qvlfcda: case Intrinsic::ppc_qpx_qvlfcsa: case Intrinsic::ppc_qpx_qvlfiwaa: case Intrinsic::ppc_qpx_qvlfiwza: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvlfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvlfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvlfcsa: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::ppc_qpx_qvstfd: case Intrinsic::ppc_qpx_qvstfs: case Intrinsic::ppc_qpx_qvstfcd: case Intrinsic::ppc_qpx_qvstfcs: case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: case Intrinsic::ppc_altivec_stvehx: case Intrinsic::ppc_altivec_stvewx: case Intrinsic::ppc_vsx_stxvd2x: case Intrinsic::ppc_vsx_stxvw4x: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfd: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfs: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcd: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcs: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::ppc_qpx_qvstfda: case Intrinsic::ppc_qpx_qvstfsa: case Intrinsic::ppc_qpx_qvstfcda: case Intrinsic::ppc_qpx_qvstfcsa: case Intrinsic::ppc_qpx_qvstfiwa: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_qpx_qvstfda: VT = MVT::v4f64; break; case Intrinsic::ppc_qpx_qvstfsa: VT = MVT::v4f32; break; case Intrinsic::ppc_qpx_qvstfcda: VT = MVT::v2f64; break; case Intrinsic::ppc_qpx_qvstfcsa: VT = MVT::v2f32; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; Info.flags = MachineMemOperand::MOStore; return true; } default: break; } return false; } /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { const Function &F = MF.getFunction(); // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && !F.hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. if (Subtarget.hasAltivec() && Size >= 16 && (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } if (Subtarget.isPPC64()) { return MVT::i64; } return MVT::i32; } /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); return !(BitSize == 0 || BitSize > 64); } bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { // Generally speaking, zexts are not free, but they are free when they can be // folded with other operations. if (LoadSDNode *LD = dyn_cast(Val)) { EVT MemVT = LD->getMemoryVT(); if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || (Subtarget.isPPC64() && MemVT == MVT::i32)) && (LD->getExtensionType() == ISD::NON_EXTLOAD || LD->getExtensionType() == ISD::ZEXTLOAD)) return true; } // FIXME: Add other cases... // - 32-bit shifts with a zext to i64 // - zext after ctlz, bswap, etc. // - zext after and by a constant mask return TargetLowering::isZExtFree(Val, VT2); } bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const { assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types"); // Extending to float128 is not free. if (DestVT == MVT::f128) return false; return true; } bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { if (DisablePPCUnaligned) return false; // PowerPC supports unaligned memory access for simple non-vector types. // Although accessing unaligned addresses is not as efficient as accessing // aligned addresses, it is generally more efficient than manual expansion, // and generally only traps for software emulation when crossing page // boundaries. if (!VT.isSimple()) return false; if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && VT != MVT::v4f32 && VT != MVT::v4i32) return false; } else { return false; } } if (VT == MVT::ppcf128) return false; if (Fast) *Fast = true; return true; } bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; case MVT::f128: return (EnableQuadPrecision && Subtarget.hasP9Vector()); default: break; } return false; } const MCPhysReg * PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. The same reasoning applies // to CTR, which is used by any indirect call. static const MCPhysReg ScratchRegs[] = { PPC::X12, PPC::LR8, PPC::CTR8, 0 }; return ScratchRegs; } unsigned PPCTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; } unsigned PPCTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; } bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasVSX() || Subtarget.hasQPX()) return true; return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref || Subtarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; } // Create a fast isel object. FastISel * PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { if (Subtarget.isDarwinABI()) return; if (!Subtarget.isPPC64()) return; // Update IsSplitCSR in PPCFunctionInfo PPCFunctionInfo *PFI = Entry->getParent()->getInfo(); PFI->setIsSplitCSR(true); } void PPCTargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (PPC::G8RCRegClass.contains(*I)) RC = &PPC::G8RCRegClass; else if (PPC::F8RCRegClass.contains(*I)) RC = &PPC::F8RCRegClass; else if (PPC::CRRCRegClass.contains(*I)) RC = &PPC::CRRCRegClass; else if (PPC::VRRCRegClass.contains(*I)) RC = &PPC::VRRCRegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } // Override to enable LOAD_STACK_GUARD lowering on Linux. bool PPCTargetLowering::useLoadStackGuardNode() const { if (!Subtarget.isTargetLinux()) return TargetLowering::useLoadStackGuardNode(); return true; } // Override to disable global variable loading on Linux. void PPCTargetLowering::insertSSPDeclarations(Module &M) const { if (!Subtarget.isTargetLinux()) return TargetLowering::insertSSPDeclarations(M); } bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!VT.isSimple() || !Subtarget.hasVSX()) return false; switch(VT.getSimpleVT().SimpleTy) { default: // For FP types that are currently not supported by PPC backend, return // false. Examples: f16, f80. return false; case MVT::f32: case MVT::f64: case MVT::ppcf128: return Imm.isPosZero(); } } // For vector shift operation op, fold // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y) static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); unsigned Opcode = N->getOpcode(); unsigned TargetOpcode; switch (Opcode) { default: llvm_unreachable("Unexpected shift operation"); case ISD::SHL: TargetOpcode = PPCISD::SHL; break; case ISD::SRL: TargetOpcode = PPCISD::SRL; break; case ISD::SRA: TargetOpcode = PPCISD::SRA; break; } if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) && N1->getOpcode() == ISD::AND) if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) if (Mask->getZExtValue() == OpSizeInBits - 1) return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); return SDValue(); } SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; return SDValue(); } SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; return SDValue(); } SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; return SDValue(); } bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) return false; // If not a tail call then no need to proceed. if (!CI->isTailCall()) return false; // If tail calls are disabled for the caller then we are done. const Function *Caller = CI->getParent()->getParent(); auto Attr = Caller->getFnAttribute("disable-tail-calls"); if (Attr.getValueAsString() == "true") return false; // If sibling calls have been disabled and tail-calls aren't guaranteed // there is no reason to duplicate. auto &TM = getTargetMachine(); if (!TM.Options.GuaranteedTailCallOpt && DisableSCO) return false; // Can't tail call a function called indirectly, or if it has variadic args. const Function *Callee = CI->getCalledFunction(); if (!Callee || Callee->isVarArg()) return false; // Make sure the callee and caller calling conventions are eligible for tco. if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), CI->getCallingConv())) return false; // If the function is local then we have a good chance at tail-calling it return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); } bool PPCTargetLowering:: isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { const Value *Mask = AndI.getOperand(1); // If the mask is suitable for andi. or andis. we should sink the and. if (const ConstantInt *CI = dyn_cast(Mask)) { // Can't handle constants wider than 64-bits. if (CI->getBitWidth() > 64) return false; int64_t ConstVal = CI->getZExtValue(); return isUInt<16>(ConstVal) || (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF)); } // For non-constant masks, we can always use the record-form and. return true; } Index: vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCInstrVSX.td (revision 341364) +++ vendor/llvm/dist-release_70/lib/Target/PowerPC/PPCInstrVSX.td (revision 341365) @@ -1,3934 +1,4007 @@ //===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the VSX extension to the PowerPC instruction set. // //===----------------------------------------------------------------------===// // *********************************** NOTE *********************************** // ** For POWER8 Little Endian, the VSX swap optimization relies on knowing ** // ** which VMX and VSX instructions are lane-sensitive and which are not. ** // ** A lane-sensitive instruction relies, implicitly or explicitly, on ** // ** whether lanes are numbered from left to right. An instruction like ** // ** VADDFP is not lane-sensitive, because each lane of the result vector ** // ** relies only on the corresponding lane of the source vectors. However, ** // ** an instruction like VMULESB is lane-sensitive, because "even" and ** // ** "odd" lanes are different for big-endian and little-endian numbering. ** // ** ** // ** When adding new VMX and VSX instructions, please consider whether they ** // ** are lane-sensitive. If so, they must be added to a switch statement ** // ** in PPCVSXSwapRemoval::gatherVectorInstructions(). ** // **************************************************************************** def PPCRegVSRCAsmOperand : AsmOperandClass { let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; } def vsrc : RegisterOperand { let ParserMatchClass = PPCRegVSRCAsmOperand; } def PPCRegVSFRCAsmOperand : AsmOperandClass { let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; } def vsfrc : RegisterOperand { let ParserMatchClass = PPCRegVSFRCAsmOperand; } def PPCRegVSSRCAsmOperand : AsmOperandClass { let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; } def vssrc : RegisterOperand { let ParserMatchClass = PPCRegVSSRCAsmOperand; } def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; } def spilltovsrrc : RegisterOperand { let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; } // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> ]>; def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> ]>; def SDT_PPCxxswapd : SDTypeProfile<1, 1, [ SDTCisSameAs<0, 1> ]>; def SDTVecConv : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2> ]>; def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, ValueType OutTy, ValueType InTy> { let BaseName = asmbase in { def NAME : XX3Form_Rc; let Defs = [CR6] in def o : XX3Form_Rc, isDOT; } } // Instruction form with a single input register for instructions such as // XXPERMDI. The reason for defining this is that specifying multiple chained // operands (such as loads) to an instruction will perform both chained // operations rather than coalescing them into a single register - even though // the source memory location is the same. This simply forces the instruction // to use the same register for both inputs. // For example, an output DAG such as this: // (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0)) // would result in two load instructions emitted and used as separate inputs // to the XXPERMDI instruction. class XX3Form_2s opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : XX3Form_2 { let XB = XA; } def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let UseVSXReg = 1 in { let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { // Load indexed instructions let mayLoad = 1, mayStore = 0 in { let CodeSize = 3 in def LXSDX : XX1Form_memOp<31, 588, (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, []>; // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later let isPseudo = 1, CodeSize = 3 in def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#XFLOADf64", [(set f64:$XT, (load xoaddr:$src))]>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVD2X : XX1Form_memOp<31, 844, (outs vsrc:$XT), (ins memrr:$src), "lxvd2x $XT, $src", IIC_LdStLFD, [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; def LXVDSX : XX1Form_memOp<31, 332, (outs vsrc:$XT), (ins memrr:$src), "lxvdsx $XT, $src", IIC_LdStLFD, []>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVW4X : XX1Form_memOp<31, 780, (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, []>; } // mayLoad // Store indexed instructions let mayStore = 1, mayLoad = 0 in { let CodeSize = 3 in def STXSDX : XX1Form_memOp<31, 716, (outs), (ins vsfrc:$XT, memrr:$dst), "stxsdx $XT, $dst", IIC_LdStSTFD, []>; // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later let isPseudo = 1, CodeSize = 3 in def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#XFSTOREf64", [(store f64:$XT, xoaddr:$dst)]>; let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // The behaviour of this instruction is endianness-specific so we provide no // pattern to match it without considering endianness. def STXVD2X : XX1Form_memOp<31, 972, (outs), (ins vsrc:$XT, memrr:$dst), "stxvd2x $XT, $dst", IIC_LdStSTFD, []>; def STXVW4X : XX1Form_memOp<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, []>; } } // mayStore // Add/Mul Instructions let isCommutable = 1 in { def XSADDDP : XX3Form<60, 32, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsadddp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; def XSMULDP : XX3Form<60, 48, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmuldp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; def XVADDDP : XX3Form<60, 96, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvadddp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>; def XVADDSP : XX3Form<60, 64, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvaddsp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>; def XVMULDP : XX3Form<60, 112, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmuldp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>; def XVMULSP : XX3Form<60, 80, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmulsp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>; } // Subtract Instructions def XSSUBDP : XX3Form<60, 40, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xssubdp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; def XVSUBDP : XX3Form<60, 104, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvsubdp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>; def XVSUBSP : XX3Form<60, 72, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvsubsp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>; // FMA Instructions let BaseName = "XSMADDADP" in { let isCommutable = 1 in def XSMADDADP : XX3Form<60, 33, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMADDMDP : XX3Form<60, 41, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XSMSUBADP" in { let isCommutable = 1 in def XSMSUBADP : XX3Form<60, 49, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMSUBMDP : XX3Form<60, 57, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XSNMADDADP" in { let isCommutable = 1 in def XSNMADDADP : XX3Form<60, 161, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMADDMDP : XX3Form<60, 169, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XSNMSUBADP" in { let isCommutable = 1 in def XSNMSUBADP : XX3Form<60, 177, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMSUBMDP : XX3Form<60, 185, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVMADDADP" in { let isCommutable = 1 in def XVMADDADP : XX3Form<60, 97, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMADDMDP : XX3Form<60, 105, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVMADDASP" in { let isCommutable = 1 in def XVMADDASP : XX3Form<60, 65, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMADDMSP : XX3Form<60, 73, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVMSUBADP" in { let isCommutable = 1 in def XVMSUBADP : XX3Form<60, 113, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMSUBMDP : XX3Form<60, 121, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVMSUBASP" in { let isCommutable = 1 in def XVMSUBASP : XX3Form<60, 81, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMSUBMSP : XX3Form<60, 89, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVNMADDADP" in { let isCommutable = 1 in def XVNMADDADP : XX3Form<60, 225, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMADDMDP : XX3Form<60, 233, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVNMADDASP" in { let isCommutable = 1 in def XVNMADDASP : XX3Form<60, 193, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMADDMSP : XX3Form<60, 201, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVNMSUBADP" in { let isCommutable = 1 in def XVNMSUBADP : XX3Form<60, 241, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMSUBMDP : XX3Form<60, 249, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XVNMSUBASP" in { let isCommutable = 1 in def XVNMSUBASP : XX3Form<60, 209, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMSUBMSP : XX3Form<60, 217, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } // Division Instructions def XSDIVDP : XX3Form<60, 56, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsdivdp $XT, $XA, $XB", IIC_FPDivD, [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; def XSSQRTDP : XX2Form<60, 75, (outs vsfrc:$XT), (ins vsfrc:$XB), "xssqrtdp $XT, $XB", IIC_FPSqrtD, [(set f64:$XT, (fsqrt f64:$XB))]>; def XSREDP : XX2Form<60, 90, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsredp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfre f64:$XB))]>; def XSRSQRTEDP : XX2Form<60, 74, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrsqrtedp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; def XSTDIVDP : XX3Form_1<60, 61, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; def XSTSQRTDP : XX2Form_1<60, 106, (outs crrc:$crD), (ins vsfrc:$XB), "xstsqrtdp $crD, $XB", IIC_FPCompare, []>; def XVDIVDP : XX3Form<60, 120, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvdivdp $XT, $XA, $XB", IIC_FPDivD, [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>; def XVDIVSP : XX3Form<60, 88, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvdivsp $XT, $XA, $XB", IIC_FPDivS, [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>; def XVSQRTDP : XX2Form<60, 203, (outs vsrc:$XT), (ins vsrc:$XB), "xvsqrtdp $XT, $XB", IIC_FPSqrtD, [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; def XVSQRTSP : XX2Form<60, 139, (outs vsrc:$XT), (ins vsrc:$XB), "xvsqrtsp $XT, $XB", IIC_FPSqrtS, [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; def XVTDIVDP : XX3Form_1<60, 125, (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; def XVTDIVSP : XX3Form_1<60, 93, (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; def XVTSQRTDP : XX2Form_1<60, 234, (outs crrc:$crD), (ins vsrc:$XB), "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; def XVTSQRTSP : XX2Form_1<60, 170, (outs crrc:$crD), (ins vsrc:$XB), "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; def XVREDP : XX2Form<60, 218, (outs vsrc:$XT), (ins vsrc:$XB), "xvredp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (PPCfre v2f64:$XB))]>; def XVRESP : XX2Form<60, 154, (outs vsrc:$XT), (ins vsrc:$XB), "xvresp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (PPCfre v4f32:$XB))]>; def XVRSQRTEDP : XX2Form<60, 202, (outs vsrc:$XT), (ins vsrc:$XB), "xvrsqrtedp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>; def XVRSQRTESP : XX2Form<60, 138, (outs vsrc:$XT), (ins vsrc:$XB), "xvrsqrtesp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>; // Compare Instructions def XSCMPODP : XX3Form_1<60, 43, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>; def XSCMPUDP : XX3Form_1<60, 35, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; defm XVCMPEQDP : XX3Form_Rcr<60, 99, "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; defm XVCMPEQSP : XX3Form_Rcr<60, 67, "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>; defm XVCMPGEDP : XX3Form_Rcr<60, 115, "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpgedp, v2i64, v2f64>; defm XVCMPGESP : XX3Form_Rcr<60, 83, "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpgesp, v4i32, v4f32>; defm XVCMPGTDP : XX3Form_Rcr<60, 107, "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>; defm XVCMPGTSP : XX3Form_Rcr<60, 75, "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>; // Move Instructions def XSABSDP : XX2Form<60, 345, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsabsdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fabs f64:$XB))]>; def XSNABSDP : XX2Form<60, 361, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsnabsdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fabs f64:$XB)))]>; def XSNEGDP : XX2Form<60, 377, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsnegdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fneg f64:$XB))]>; def XSCPSGNDP : XX3Form<60, 176, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xscpsgndp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>; def XVABSDP : XX2Form<60, 473, (outs vsrc:$XT), (ins vsrc:$XB), "xvabsdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (fabs v2f64:$XB))]>; def XVABSSP : XX2Form<60, 409, (outs vsrc:$XT), (ins vsrc:$XB), "xvabssp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fabs v4f32:$XB))]>; def XVCPSGNDP : XX3Form<60, 240, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcpsgndp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>; def XVCPSGNSP : XX3Form<60, 208, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>; def XVNABSDP : XX2Form<60, 489, (outs vsrc:$XT), (ins vsrc:$XB), "xvnabsdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>; def XVNABSSP : XX2Form<60, 425, (outs vsrc:$XT), (ins vsrc:$XB), "xvnabssp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>; def XVNEGDP : XX2Form<60, 505, (outs vsrc:$XT), (ins vsrc:$XB), "xvnegdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (fneg v2f64:$XB))]>; def XVNEGSP : XX2Form<60, 441, (outs vsrc:$XT), (ins vsrc:$XB), "xvnegsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg v4f32:$XB))]>; // Conversion Instructions def XSCVDPSP : XX2Form<60, 265, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsp $XT, $XB", IIC_VecFP, []>; def XSCVDPSXDS : XX2Form<60, 344, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxds $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctidz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPSXDSs : XX2Form<60, 344, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpsxds $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfctidz f32:$XB))]>; def XSCVDPSXWS : XX2Form<60, 88, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxws $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiwz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPSXWSs : XX2Form<60, 88, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpsxws $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfctiwz f32:$XB))]>; def XSCVDPUXDS : XX2Form<60, 328, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxds $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiduz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPUXDSs : XX2Form<60, 328, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpuxds $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfctiduz f32:$XB))]>; def XSCVDPUXWS : XX2Form<60, 72, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxws $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; let isCodeGenOnly = 1 in def XSCVDPUXWSs : XX2Form<60, 72, (outs vssrc:$XT), (ins vssrc:$XB), "xscvdpuxws $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfctiwuz f32:$XB))]>; def XSCVSPDP : XX2Form<60, 329, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvspdp $XT, $XB", IIC_VecFP, []>; def XSCVSXDDP : XX2Form<60, 376, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvsxddp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfcfid f64:$XB))]>; def XSCVUXDDP : XX2Form<60, 360, (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvuxddp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfcfidu f64:$XB))]>; def XVCVDPSP : XX2Form<60, 393, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (int_ppc_vsx_xvcvdpsp v2f64:$XB))]>; def XVCVDPSXDS : XX2Form<60, 472, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpsxds $XT, $XB", IIC_VecFP, [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>; def XVCVDPSXWS : XX2Form<60, 216, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpsxws $XT, $XB", IIC_VecFP, [(set v4i32:$XT, (int_ppc_vsx_xvcvdpsxws v2f64:$XB))]>; def XVCVDPUXDS : XX2Form<60, 456, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpuxds $XT, $XB", IIC_VecFP, [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>; def XVCVDPUXWS : XX2Form<60, 200, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvdpuxws $XT, $XB", IIC_VecFP, [(set v4i32:$XT, (int_ppc_vsx_xvcvdpuxws v2f64:$XB))]>; def XVCVSPDP : XX2Form<60, 457, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (int_ppc_vsx_xvcvspdp v4f32:$XB))]>; def XVCVSPSXDS : XX2Form<60, 408, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspsxds $XT, $XB", IIC_VecFP, []>; def XVCVSPSXWS : XX2Form<60, 152, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspsxws $XT, $XB", IIC_VecFP, [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>; def XVCVSPUXDS : XX2Form<60, 392, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspuxds $XT, $XB", IIC_VecFP, []>; def XVCVSPUXWS : XX2Form<60, 136, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvspuxws $XT, $XB", IIC_VecFP, [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>; def XVCVSXDDP : XX2Form<60, 504, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxddp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>; def XVCVSXDSP : XX2Form<60, 440, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxdsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>; def XVCVSXWDP : XX2Form<60, 248, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxwdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>; def XVCVSXWSP : XX2Form<60, 184, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvsxwsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>; def XVCVUXDDP : XX2Form<60, 488, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxddp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>; def XVCVUXDSP : XX2Form<60, 424, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxdsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>; def XVCVUXWDP : XX2Form<60, 232, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxwdp $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>; def XVCVUXWSP : XX2Form<60, 168, (outs vsrc:$XT), (ins vsrc:$XB), "xvcvuxwsp $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>; // Rounding Instructions def XSRDPI : XX2Form<60, 73, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpi $XT, $XB", IIC_VecFP, [(set f64:$XT, (fround f64:$XB))]>; def XSRDPIC : XX2Form<60, 107, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpic $XT, $XB", IIC_VecFP, [(set f64:$XT, (fnearbyint f64:$XB))]>; def XSRDPIM : XX2Form<60, 121, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpim $XT, $XB", IIC_VecFP, [(set f64:$XT, (ffloor f64:$XB))]>; def XSRDPIP : XX2Form<60, 105, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpip $XT, $XB", IIC_VecFP, [(set f64:$XT, (fceil f64:$XB))]>; def XSRDPIZ : XX2Form<60, 89, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpiz $XT, $XB", IIC_VecFP, [(set f64:$XT, (ftrunc f64:$XB))]>; def XVRDPI : XX2Form<60, 201, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpi $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (fround v2f64:$XB))]>; def XVRDPIC : XX2Form<60, 235, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpic $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; def XVRDPIM : XX2Form<60, 249, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpim $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (ffloor v2f64:$XB))]>; def XVRDPIP : XX2Form<60, 233, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpip $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (fceil v2f64:$XB))]>; def XVRDPIZ : XX2Form<60, 217, (outs vsrc:$XT), (ins vsrc:$XB), "xvrdpiz $XT, $XB", IIC_VecFP, [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; def XVRSPI : XX2Form<60, 137, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspi $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fround v4f32:$XB))]>; def XVRSPIC : XX2Form<60, 171, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspic $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; def XVRSPIM : XX2Form<60, 185, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspim $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (ffloor v4f32:$XB))]>; def XVRSPIP : XX2Form<60, 169, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspip $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (fceil v4f32:$XB))]>; def XVRSPIZ : XX2Form<60, 153, (outs vsrc:$XT), (ins vsrc:$XB), "xvrspiz $XT, $XB", IIC_VecFP, [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; // Max/Min Instructions let isCommutable = 1 in { def XSMAXDP : XX3Form<60, 160, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmaxdp $XT, $XA, $XB", IIC_VecFP, [(set vsfrc:$XT, (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>; def XSMINDP : XX3Form<60, 168, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmindp $XT, $XA, $XB", IIC_VecFP, [(set vsfrc:$XT, (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>; def XVMAXDP : XX3Form<60, 224, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmaxdp $XT, $XA, $XB", IIC_VecFP, [(set vsrc:$XT, (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>; def XVMINDP : XX3Form<60, 232, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmindp $XT, $XA, $XB", IIC_VecFP, [(set vsrc:$XT, (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>; def XVMAXSP : XX3Form<60, 192, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvmaxsp $XT, $XA, $XB", IIC_VecFP, [(set vsrc:$XT, (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>; def XVMINSP : XX3Form<60, 200, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvminsp $XT, $XA, $XB", IIC_VecFP, [(set vsrc:$XT, (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; } // isCommutable } // Uses = [RM] // Logical Instructions let isCommutable = 1 in def XXLAND : XX3Form<60, 130, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxland $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>; def XXLANDC : XX3Form<60, 138, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlandc $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (and v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; let isCommutable = 1 in { def XXLNOR : XX3Form<60, 162, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlnor $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA, v4i32:$XB)))]>; def XXLOR : XX3Form<60, 146, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlor $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>; let isCodeGenOnly = 1 in def XXLORf: XX3Form<60, 146, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>; def XXLXOR : XX3Form<60, 154, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlxor $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>; } // isCommutable let isCodeGenOnly = 1 in def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set v4i32:$XT, (v4i32 immAllZerosV))]>; let isCodeGenOnly = 1 in { def XXLXORdpz : XX3Form_SetZero<60, 154, (outs vsfrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f64:$XT, (fpimm0))]>; def XXLXORspz : XX3Form_SetZero<60, 154, (outs vssrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f32:$XT, (fpimm0))]>; } // Permutation Instructions def XXMRGHW : XX3Form<60, 18, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>; def XXMRGLW : XX3Form<60, 50, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>; def XXPERMDI : XX3Form_2<60, 10, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, imm32SExt16:$DM))]>; let isCodeGenOnly = 1 in def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; def XXSEL : XX4Form<60, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>; def XXSLDWI : XX3Form_2<60, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, imm32SExt16:$SHW))]>; + + let isCodeGenOnly = 1 in + def XXSLDWIs : XX3Form_2s<60, 2, + (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW), + "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>; + def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, [(set v4i32:$XT, (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; let isCodeGenOnly = 1 in def XXSPLTWs : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + } // hasSideEffects } // UseVSXReg = 1 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst), (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), "#SELECT_CC_VSRC", []>; def SELECT_VSRC: Pseudo<(outs vsrc:$dst), (ins crbitrc:$cond, vsrc:$T, vsrc:$F), "#SELECT_VSRC", [(set v2f64:$dst, (select i1:$cond, v2f64:$T, v2f64:$F))]>; def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, i32imm:$BROPC), "#SELECT_CC_VSFRC", []>; def SELECT_VSFRC: Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, f8rc:$T, f8rc:$F), "#SELECT_VSFRC", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, i32imm:$BROPC), "#SELECT_CC_VSSRC", []>; def SELECT_VSSRC: Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, f4rc:$T, f4rc:$F), "#SELECT_VSSRC", [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; } // usesCustomInserter } // AddedComplexity def : InstAlias<"xvmovdp $XT, $XB", (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; def : InstAlias<"xvmovsp $XT, $XB", (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; def : InstAlias<"xxspltd $XT, $XB, 0", (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; def : InstAlias<"xxspltd $XT, $XB, 1", (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; def : InstAlias<"xxmrghd $XT, $XA, $XB", (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; def : InstAlias<"xxmrgld $XT, $XA, $XB", (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; def : InstAlias<"xxswapd $XT, $XB", (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; def : InstAlias<"xxspltd $XT, $XB, 0", (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; def : InstAlias<"xxspltd $XT, $XB, 1", (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; def : InstAlias<"xxswapd $XT, $XB", (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>; let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(v4i32 (vnot_ppc v4i32:$A)), (v4i32 (XXLNOR $A, $A))>; let Predicates = [IsBigEndian] in { def : Pat<(v2f64 (scalar_to_vector f64:$A)), (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; def : Pat<(f64 (extractelt v2f64:$S, 0)), (f64 (EXTRACT_SUBREG $S, sub_64))>; def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; } let Predicates = [IsLittleEndian] in { def : Pat<(v2f64 (scalar_to_vector f64:$A)), (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; def : Pat<(f64 (extractelt v2f64:$S, 0)), (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; } // Additional fnmsub patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), (XSNMSUBADP $B, $C, $A)>; def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), (XSNMSUBADP $B, $C, $A)>; def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), (XVNMSUBADP $B, $C, $A)>; def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), (XVNMSUBADP $B, $C, $A)>; def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), (XVNMSUBASP $B, $C, $A)>; def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), (XVNMSUBASP $B, $C, $A)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2f64 (bitconvert v4i32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2f64 (bitconvert v8i16:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2f64 (bitconvert v16i8:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v4f32 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v4i32 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v8i16 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v16i8 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v2i64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2i64 (bitconvert v4i32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2i64 (bitconvert v8i16:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v2i64 (bitconvert v16i8:$A)), (COPY_TO_REGCLASS $A, VSRC)>; def : Pat<(v4f32 (bitconvert v2i64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v4i32 (bitconvert v2i64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v8i16 (bitconvert v2i64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v16i8 (bitconvert v2i64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v2f64 (bitconvert v2i64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v2i64 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v2f64 (bitconvert v1i128:$A)), (COPY_TO_REGCLASS $A, VRRC)>; def : Pat<(v1i128 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; // sign extension patterns // To extend "in place" from v2i32 to v2i64, we have input data like: // | undef | i32 | undef | i32 | // but xvcvsxwdp expects the input in big-Endian format: // | i32 | undef | i32 | undef | // so we need to shift everything to the left by one i32 (word) before // the conversion. def : Pat<(sext_inreg v2i64:$C, v2i32), (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>; def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), (XVCVSXWDP (XXSLDWI $C, $C, 1))>; def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>; def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>; def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; // Loads. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; // Stores. def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; } // Permutes. def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; // PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and // XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; // Divides. def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), (XVDIVSP $A, $B)>; def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), (XVDIVDP $A, $B)>; // Reciprocal estimate def : Pat<(int_ppc_vsx_xvresp v4f32:$A), (XVRESP $A)>; def : Pat<(int_ppc_vsx_xvredp v2f64:$A), (XVREDP $A)>; // Recip. square root estimate def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), (XVRSQRTESP $A)>; def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), (XVRSQRTEDP $A)>; let Predicates = [IsLittleEndian] in { def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; } // IsLittleEndian let Predicates = [IsBigEndian] in { def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } // IsBigEndian } // AddedComplexity } // HasVSX def ScalarLoads { dag Li8 = (i32 (extloadi8 xoaddr:$src)); dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); dag Li16 = (i32 (extloadi16 xoaddr:$src)); dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); dag Li32 = (i32 (load xoaddr:$src)); } // The following VSX instructions were introduced in Power ISA 2.07 /* FIXME: if the operands are v2i64, these patterns will not match. we should define new patterns or otherwise match the same patterns when the elements are larger than i32. */ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let isCommutable = 1, UseVSXReg = 1 in { def XXLEQV : XX3Form<60, 186, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxleqv $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; def XXLNAND : XX3Form<60, 178, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlnand $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, v4i32:$XB)))]>; } // isCommutable, UseVSXReg def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; let UseVSXReg = 1 in { def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; // VSX scalar loads introduced in ISA 2.07 let mayLoad = 1, mayStore = 0 in { let CodeSize = 3 in def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src), "lxsspx $XT, $src", IIC_LdStLFD, []>; def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src), "lxsiwax $XT, $src", IIC_LdStLFD, []>; def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src), "lxsiwzx $XT, $src", IIC_LdStLFD, []>; // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it // would cause these Pseudos are not expanded in expandPostRAPseudos() let isPseudo = 1 in { // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later let CodeSize = 3 in def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), "#XFLOADf32", [(set f32:$XT, (load xoaddr:$src))]>; // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#LIWAX", [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), "#LIWZX", [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; } } // mayLoad // VSX scalar stores introduced in ISA 2.07 let mayStore = 1, mayLoad = 0 in { let CodeSize = 3 in def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), "stxsspx $XT, $dst", IIC_LdStSTFD, []>; def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it // would cause these Pseudos are not expanded in expandPostRAPseudos() let isPseudo = 1 in { // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later let CodeSize = 3 in def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), "#XFSTOREf32", [(store f32:$XT, xoaddr:$dst)]>; // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), "#STIWX", [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; } } // mayStore } // UseVSXReg = 1 def : Pat<(f64 (extloadf32 xoaddr:$src)), (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), (f32 (XFLOADf32 xoaddr:$src))>; def : Pat<(f64 (fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; let UseVSXReg = 1 in { // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { def XSADDSP : XX3Form<60, 0, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsaddsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>; def XSMULSP : XX3Form<60, 16, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsmulsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; } // isCommutable def XSDIVSP : XX3Form<60, 24, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xsdivsp $XT, $XA, $XB", IIC_FPDivS, [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>; def XSRESP : XX2Form<60, 26, (outs vssrc:$XT), (ins vssrc:$XB), "xsresp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfre f32:$XB))]>; def XSRSP : XX2Form<60, 281, (outs vssrc:$XT), (ins vsfrc:$XB), "xsrsp $XT, $XB", IIC_VecFP, []>; def XSSQRTSP : XX2Form<60, 11, (outs vssrc:$XT), (ins vssrc:$XB), "xssqrtsp $XT, $XB", IIC_FPSqrtS, [(set f32:$XT, (fsqrt f32:$XB))]>; def XSRSQRTESP : XX2Form<60, 10, (outs vssrc:$XT), (ins vssrc:$XB), "xsrsqrtesp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; def XSSUBSP : XX3Form<60, 8, (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xssubsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; // FMA Instructions let BaseName = "XSMADDASP" in { let isCommutable = 1 in def XSMADDASP : XX3Form<60, 1, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmaddasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMADDMSP : XX3Form<60, 9, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XSMSUBASP" in { let isCommutable = 1 in def XSMSUBASP : XX3Form<60, 17, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmsubasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fma f32:$XA, f32:$XB, (fneg f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMSUBMSP : XX3Form<60, 25, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XSNMADDASP" in { let isCommutable = 1 in def XSNMADDASP : XX3Form<60, 129, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, f32:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMADDMSP : XX3Form<60, 137, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } let BaseName = "XSNMSUBASP" in { let isCommutable = 1 in def XSNMSUBASP : XX3Form<60, 145, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, (fneg f32:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMSUBMSP : XX3Form<60, 153, (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } // Single Precision Conversions (FP <-> INT) def XSCVSXDSP : XX2Form<60, 312, (outs vssrc:$XT), (ins vsfrc:$XB), "xscvsxdsp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfcfids f64:$XB))]>; def XSCVUXDSP : XX2Form<60, 296, (outs vssrc:$XT), (ins vsfrc:$XB), "xscvuxdsp $XT, $XB", IIC_VecFP, [(set f32:$XT, (PPCfcfidus f64:$XB))]>; // Conversions between vector and scalar single precision def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), "xscvdpspn $XT, $XB", IIC_VecFP, []>; def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), "xscvspdpn $XT, $XB", IIC_VecFP, []>; } // UseVSXReg = 1 let Predicates = [IsLittleEndian] in { def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; } let Predicates = [IsBigEndian] in { def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), - (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>; // Instructions for converting float to i64 feeding a store. let Predicates = [NoP9Vector] in { def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; } // Instructions for converting float to i32 feeding a store. def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; } // AddedComplexity = 400 } // HasP8Vector let UseVSXReg = 1, AddedComplexity = 400 in { let Predicates = [HasDirectMove] in { // VSX direct move instructions def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), "mfvsrd $rA, $XT", IIC_VecGeneral, [(set i64:$rA, (PPCmfvsr f64:$XT))]>, Requires<[In64BitMode]>; let isCodeGenOnly = 1 in def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT), "mfvsrd $rA, $XT", IIC_VecGeneral, []>, Requires<[In64BitMode]>; def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), "mfvsrwz $rA, $XT", IIC_VecGeneral, [(set i32:$rA, (PPCmfvsr f64:$XT))]>; def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), "mtvsrd $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i64:$rA))]>, Requires<[In64BitMode]>; def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwa $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i32:$rA))]>; def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove let Predicates = [IsISA3_0, HasDirectMove] in { def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), "mtvsrws $XT, $rA", IIC_VecGeneral, []>; def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB), "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, []>, Requires<[In64BitMode]>; def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), "mfvsrld $rA, $XT", IIC_VecGeneral, []>, Requires<[In64BitMode]>; } // IsISA3_0, HasDirectMove } // UseVSXReg = 1 // We want to parse this from asm, but we don't want to emit this as it would // be emitted with a VSX reg. So leave Emit = 0 here. def : InstAlias<"mfvrd $rA, $XT", (MFVRD g8rc:$rA, vrrc:$XT), 0>; def : InstAlias<"mffprd $rA, $src", (MFVSRD g8rc:$rA, f8rc:$src)>; /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than a doubleword are shifted left and moved for BE. For LE, they're moved, then swapped to go into the least significant element of the VSR. */ def MovesToVSR { dag BE_BYTE_0 = (MTVSRD (RLDICR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); dag BE_HALF_0 = (MTVSRD (RLDICR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); dag BE_WORD_0 = (MTVSRD (RLDICR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); dag BE_DWORD_0 = (MTVSRD $A); dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), LE_MTVSRW, sub_64)); dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), BE_DWORD_0, sub_64)); dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); } /* Patterns for extracting elements out of vectors. Integer elements are extracted using direct move operations. Patterns for extracting elements whose indices are not available at compile time are also provided with various _VARIABLE_ patterns. The numbering for the DAG's is for LE, but when used on BE, the correct LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). */ def VectorExtractions { // Doubleword extraction dag LE_DWORD_0 = (MFVSRD (EXTRACT_SUBREG (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); dag LE_DWORD_1 = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); // Word extraction dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); // Halfword extraction dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); // Byte extraction dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); /* Variable element number (BE and LE patterns must be specified separately) This is a rather involved process. Conceptually, this is how the move is accomplished: 1. Identify which doubleword contains the element 2. Shift in the VMX register so that the correct doubleword is correctly lined up for the MFVSRD 3. Perform the move so that the element (along with some extra stuff) is in the GPR 4. Right shift within the GPR so that the element is right-justified Of course, the index is an element number which has a different meaning on LE/BE so the patterns have to be specified separately. Note: The final result will be the element right-justified with high order bits being arbitrarily defined (namely, whatever was in the vector register to the left of the value originally). */ /* LE variable byte Number 1. above: - For elements 0-7, we shift left by 8 bytes since they're on the right - For elements 8-15, we need not shift (shift left by zero bytes) This is accomplished by inverting the bits of the index and AND-ing with 0x8 (i.e. clearing all bits of the index and inverting bit 60). */ dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx))); // Number 2. above: // - Now that we set up the shift amount, we shift in the VMX register dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC)); // Number 3. above: // - The doubleword containing our element is moved to a GPR dag LE_MV_VBYTE = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), sub_64)); /* Number 4. above: - Truncate the element number to the range 0-7 (8-15 are symmetrical and out of range values are truncated accordingly) - Multiply by 8 as we need to shift right by the number of bits, not bytes - Shift right in the GPR by the calculated value */ dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), sub_32); dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), sub_32); /* LE variable halfword Number 1. above: - For elements 0-3, we shift left by 8 since they're on the right - For elements 4-7, we need not shift (shift left by zero bytes) Similarly to the byte pattern, we invert the bits of the index, but we AND with 0x4 (i.e. clear all bits of the index and invert bit 61). Of course, the shift is still by 8 bytes, so we must multiply by 2. */ dag LE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62))); // Number 2. above: // - Now that we set up the shift amount, we shift in the VMX register dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC)); // Number 3. above: // - The doubleword containing our element is moved to a GPR dag LE_MV_VHALF = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), sub_64)); /* Number 4. above: - Truncate the element number to the range 0-3 (4-7 are symmetrical and out of range values are truncated accordingly) - Multiply by 16 as we need to shift right by the number of bits - Shift right in the GPR by the calculated value */ dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), sub_32); dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), sub_32); /* LE variable word Number 1. above: - For elements 0-1, we shift left by 8 since they're on the right - For elements 2-3, we need not shift */ dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61))); // Number 2. above: // - Now that we set up the shift amount, we shift in the VMX register dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC)); // Number 3. above: // - The doubleword containing our element is moved to a GPR dag LE_MV_VWORD = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), sub_64)); /* Number 4. above: - Truncate the element number to the range 0-1 (2-3 are symmetrical and out of range values are truncated accordingly) - Multiply by 32 as we need to shift right by the number of bits - Shift right in the GPR by the calculated value */ dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), sub_32); dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), sub_32); /* LE variable doubleword Number 1. above: - For element 0, we shift left by 8 since it's on the right - For element 1, we need not shift */ dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60))); // Number 2. above: // - Now that we set up the shift amount, we shift in the VMX register dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC)); // Number 3. above: // - The doubleword containing our element is moved to a GPR // - Number 4. is not needed for the doubleword as the value is 64-bits dag LE_VARIABLE_DWORD = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), sub_64)); /* LE variable float - Shift the vector to line up the desired element to BE Word 0 - Convert 32-bit float to a 64-bit single precision float */ dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61))); dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); /* LE variable double Same as the LE doubleword except there is no move. */ dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), (v16i8 (COPY_TO_REGCLASS $S, VRRC)), LE_VDWORD_PERM_VEC)); dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); /* BE variable byte The algorithm here is the same as the LE variable byte except: - The shift in the VMX register is by 0/8 for opposite element numbers so we simply AND the element number with 0x8 - The order of elements after the move to GPR is reversed, so we invert the bits of the index prior to truncating to the range 0-7 */ dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDIo8 $Idx, 8))); dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC)); dag BE_MV_VBYTE = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), sub_64)); dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), sub_32); dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), sub_32); /* BE variable halfword The algorithm here is the same as the LE variable halfword except: - The shift in the VMX register is by 0/8 for opposite element numbers so we simply AND the element number with 0x4 and multiply by 2 - The order of elements after the move to GPR is reversed, so we invert the bits of the index prior to truncating to the range 0-3 */ dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62))); dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC)); dag BE_MV_VHALF = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), sub_64)); dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), sub_32); dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), sub_32); /* BE variable word The algorithm is the same as the LE variable word except: - The shift in the VMX register happens for opposite element numbers - The order of elements after the move to GPR is reversed, so we invert the bits of the index prior to truncating to the range 0-1 */ dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61))); dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC)); dag BE_MV_VWORD = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), sub_64)); dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), sub_32); dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), sub_32); /* BE variable doubleword Same as the LE doubleword except we shift in the VMX register for opposite element indices. */ dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60))); dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC)); dag BE_VARIABLE_DWORD = (MFVSRD (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), sub_64)); /* BE variable float - Shift the vector to line up the desired element to BE Word 0 - Convert 32-bit float to a 64-bit single precision float */ dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61))); dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); /* BE variable double Same as the BE doubleword except there is no move. */ dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), (v16i8 (COPY_TO_REGCLASS $S, VRRC)), BE_VDWORD_PERM_VEC)); dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); } def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">; let AddedComplexity = 400 in { // v4f32 scalar <-> vector conversions (BE) let Predicates = [IsBigEndian, HasP8Vector] in { def : Pat<(v4f32 (scalar_to_vector f32:$A)), (v4f32 (XSCVDPSPN $A))>; def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN $S))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; } // IsBigEndian, HasP8Vector // Variable index vector_extract for v2f64 does not require P8Vector let Predicates = [IsBigEndian, HasVSX] in def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; let Predicates = [IsBigEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (BE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; def : Pat<(v8i16 (scalar_to_vector i32:$A)), (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; def : Pat<(v4i32 (scalar_to_vector i32:$A)), (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; // v2i64 scalar <-> vector conversions (BE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), (i64 VectorExtractions.LE_DWORD_1)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), (i64 VectorExtractions.LE_DWORD_0)>; def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), (i64 VectorExtractions.BE_VARIABLE_DWORD)>; } // IsBigEndian, HasDirectMove let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in { def : Pat<(i32 (vector_extract v16i8:$S, 0)), (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), (i32 VectorExtractions.BE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), (i32 VectorExtractions.BE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), (i32 VectorExtractions.LE_WORD_3)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), (i32 VectorExtractions.LE_WORD_0)>; def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 VectorExtractions.BE_VARIABLE_WORD)>; } // IsBigEndian, HasDirectMove, NoP9Altivec // v4f32 scalar <-> vector conversions (LE) let Predicates = [IsLittleEndian, HasP8Vector] in { def : Pat<(v4f32 (scalar_to_vector f32:$A)), (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN $S))>; def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; } // IsLittleEndian, HasP8Vector // Variable index vector_extract for v2f64 does not require P8Vector let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; // Variable index unsigned vector_extract on Power9 let Predicates = [HasP9Altivec, IsLittleEndian] in { def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), (VEXTUBRX $Idx, $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), (VEXTUHRX (LI8 0), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), (VEXTUHRX (LI8 2), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), (VEXTUHRX (LI8 4), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), (VEXTUHRX (LI8 6), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), (VEXTUHRX (LI8 8), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), (VEXTUHRX (LI8 10), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), (VEXTUHRX (LI8 12), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), (VEXTUHRX (LI8 14), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), (VEXTUWRX (LI8 0), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), (VEXTUWRX (LI8 4), $S)>; // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (i32 VectorExtractions.LE_WORD_2), sub_32)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), (VEXTUWRX (LI8 12), $S)>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), (EXTSW (VEXTUWRX (LI8 0), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), (EXTSW (VEXTUWRX (LI8 4), $S))>; // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (i32 VectorExtractions.LE_WORD_2), sub_32))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), (EXTSW (VEXTUWRX (LI8 12), $S))>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), (i32 (EXTRACT_SUBREG (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 0)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>; def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 (EXTRACT_SUBREG (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; def : Pat<(i32 (vector_extract v4i32:$S, 0)), (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>; // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX def : Pat<(i32 (vector_extract v4i32:$S, 2)), (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>; } let Predicates = [HasP9Altivec, IsBigEndian] in { def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), (VEXTUBLX $Idx, $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), (VEXTUHLX (LI8 0), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), (VEXTUHLX (LI8 2), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), (VEXTUHLX (LI8 4), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), (VEXTUHLX (LI8 6), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), (VEXTUHLX (LI8 8), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), (VEXTUHLX (LI8 10), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), (VEXTUHLX (LI8 12), $S)>; def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), (VEXTUHLX (LI8 14), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), (VEXTUWLX (LI8 0), $S)>; // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (i32 VectorExtractions.LE_WORD_2), sub_32)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), (VEXTUWLX (LI8 8), $S)>; def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), (VEXTUWLX (LI8 12), $S)>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), (EXTSW (VEXTUWLX (LI8 0), $S))>; // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (i32 VectorExtractions.LE_WORD_2), sub_32))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), (EXTSW (VEXTUWLX (LI8 8), $S))>; def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), (EXTSW (VEXTUWLX (LI8 12), $S))>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), (i32 (EXTRACT_SUBREG (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 0)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>; def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 (EXTRACT_SUBREG (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; def : Pat<(i32 (vector_extract v4i32:$S, 0)), (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>; // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX def : Pat<(i32 (vector_extract v4i32:$S, 1)), (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>; } let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; def : Pat<(v8i16 (scalar_to_vector i32:$A)), (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; def : Pat<(v4i32 (scalar_to_vector i32:$A)), (v4i32 MovesToVSR.LE_WORD_0)>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 MovesToVSR.LE_DWORD_0)>; // v2i64 scalar <-> vector conversions (LE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), (i64 VectorExtractions.LE_DWORD_0)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), (i64 VectorExtractions.LE_DWORD_1)>; def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), (i64 VectorExtractions.LE_VARIABLE_DWORD)>; } // IsLittleEndian, HasDirectMove let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in { def : Pat<(i32 (vector_extract v16i8:$S, 0)), (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), (i32 VectorExtractions.LE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), (i32 VectorExtractions.LE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), (i32 VectorExtractions.LE_WORD_0)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), (i32 VectorExtractions.LE_WORD_3)>; def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), (i32 VectorExtractions.LE_VARIABLE_WORD)>; } // IsLittleEndian, HasDirectMove, NoP9Altivec let Predicates = [HasDirectMove, HasVSX] in { // bitconvert f32 -> i32 // (convert to 32-bit fp single, shift right 1 word, move to GPR) def : Pat<(i32 (bitconvert f32:$S)), (i32 (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3), sub_64)))>; // bitconvert i32 -> f32 // (move to FPR, shift left 1 word, convert to 64-bit fp single) def : Pat<(f32 (bitconvert i32:$A)), (f32 (XSCVSPDPN (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>; // bitconvert f64 -> i64 // (move to GPR, nothing else needed) def : Pat<(i64 (bitconvert f64:$S)), (i64 (MFVSRD $S))>; // bitconvert i64 -> f64 // (move to FPR, nothing else needed) def : Pat<(f64 (bitconvert i64:$S)), (f64 (MTVSRD $S))>; } // Materialize a zero-vector of long long def : Pat<(v2i64 immAllZerosV), (v2i64 (XXLXORz))>; } def AlignValues { dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); } // The following VSX instructions were introduced in Power ISA 3.0 def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; let AddedComplexity = 400, Predicates = [HasP9Vector] in { // [PO VRT XO VRB XO /] class X_VT5_XO5_VB5 opcode, bits<5> xo2, bits<10> xo, string opc, list pattern> : X_RD5_XO5_RS5; // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] class X_VT5_XO5_VB5_Ro opcode, bits<5> xo2, bits<10> xo, string opc, list pattern> : X_VT5_XO5_VB5, isDOT; // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), // So we use different operand class for VRB class X_VT5_XO5_VB5_TyVB opcode, bits<5> xo2, bits<10> xo, string opc, RegisterOperand vbtype, list pattern> : X_RD5_XO5_RS5; // [PO VRT XO VRB XO /] class X_VT5_XO5_VB5_VSFR opcode, bits<5> xo2, bits<10> xo, string opc, list pattern> : X_RD5_XO5_RS5; // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] class X_VT5_XO5_VB5_VSFR_Ro opcode, bits<5> xo2, bits<10> xo, string opc, list pattern> : X_VT5_XO5_VB5_VSFR, isDOT; let UseVSXReg = 1 in { // [PO T XO B XO BX /] class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, list pattern> : XX2_RD5_XO5_RS6; // [PO T XO B XO BX TX] class XX2_XT6_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, RegisterOperand vtype, list pattern> : XX2_RD6_XO5_RS6; // [PO T A B XO AX BX TX], src and dest register use different operand class class XX3_XT5_XA5_XB5 opcode, bits<8> xo, string opc, RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, InstrItinClass itin, list pattern> : XX3Form; } // UseVSXReg = 1 // [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc, list pattern> : XForm_1; // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5_Ro opcode, bits<10> xo, string opc, list pattern> : X_VT5_VA5_VB5, isDOT; // [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5_FMA opcode, bits<10> xo, string opc, list pattern> : XForm_1, RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">; // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5_FMA_Ro opcode, bits<10> xo, string opc, list pattern> : X_VT5_VA5_VB5_FMA, isDOT; //===--------------------------------------------------------------------===// // Quad-Precision Scalar Move Instructions: // Copy Sign def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", [(set f128:$vT, (fcopysign f128:$vB, f128:$vA))]>; // Absolute/Negative-Absolute/Negate def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp", [(set f128:$vT, (fabs f128:$vB))]>; def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp", [(set f128:$vT, (fneg (fabs f128:$vB)))]>; def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp", [(set f128:$vT, (fneg f128:$vB))]>; //===--------------------------------------------------------------------===// // Quad-Precision Scalar Floating-Point Arithmetic Instructions: // Add/Divide/Multiply/Subtract let isCommutable = 1 in { def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp", [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>; def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", [(set f128:$vT, (int_ppc_addf128_round_to_odd f128:$vA, f128:$vB))]>; def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", [(set f128:$vT, (int_ppc_mulf128_round_to_odd f128:$vA, f128:$vB))]>; } def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", [(set f128:$vT, (int_ppc_subf128_round_to_odd f128:$vA, f128:$vB))]>; def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", [(set f128:$vT, (int_ppc_divf128_round_to_odd f128:$vA, f128:$vB))]>; // Square-Root def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", [(set f128:$vT, (fsqrt f128:$vB))]>; def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", [(set f128:$vT, (int_ppc_sqrtf128_round_to_odd f128:$vB))]>; // (Negative) Multiply-{Add/Subtract} def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>; def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo", [(set f128:$vT, (int_ppc_fmaf128_round_to_odd f128:$vA,f128:$vB,f128:$vTi))]>; def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>; def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" , [(set f128:$vT, (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi)))]>; def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>; def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo", [(set f128:$vT, (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, f128:$vTi)))]>; def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo", [(set f128:$vT, (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; // Additional fnmsub patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: // [PO BF // VRA VRB XO /] class X_BF3_VA5_VB5 opcode, bits<10> xo, string opc, list pattern> : XForm_17 { let Pattern = pattern; } // QP Compare Ordered/Unordered def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; // DP/QP Compare Exponents def XSCMPEXPDP : XX3Form_1<60, 59, (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>, UseVSXReg; def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; // DP Compare ==, >=, >, != // Use vsrc for XT, because the entire register of XT is set. // XT.dword[1] = 0x0000_0000_0000_0000 def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, IIC_FPCompare, []>; def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, IIC_FPCompare, []>; def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, IIC_FPCompare, []>; //===--------------------------------------------------------------------===// // Quad-Precision Floating-Point Conversion Instructions: // Convert DP -> QP def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, [(set f128:$vT, (fpextend f64:$vB))]>; // Round & Convert QP -> DP (dword[1] is set to zero) def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", [(set f64:$vT, (int_ppc_truncf128_round_to_odd f128:$vB))]>; // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; // Convert (Un)Signed DWord -> QP. def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>; def : Pat<(f128 (sint_to_fp i64:$src)), (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))), (f128 (XSCVSDQP $src))>; def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))), (f128 (XSCVSDQP (VEXTSW2Ds $src)))>; def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>; def : Pat<(f128 (uint_to_fp i64:$src)), (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))), (f128 (XSCVUDQP $src))>; // Convert (Un)Signed Word -> QP. def : Pat<(f128 (sint_to_fp i32:$src)), (f128 (XSCVSDQP (MTVSRWA $src)))>; def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))), (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; def : Pat<(f128 (uint_to_fp i32:$src)), (f128 (XSCVUDQP (MTVSRWZ $src)))>; def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))), (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; let UseVSXReg = 1 in { //===--------------------------------------------------------------------===// // Round to Floating-Point Integer Instructions // (Round &) Convert DP <-> HP // Note! xscvdphp's src and dest register both use the left 64 bits, so we use // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, // but we still use vsfrc for it. def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; // Vector HP -> SP def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>; def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, [(set v4f32:$XT, (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; } // UseVSXReg = 1 // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a // separate pattern so that it can convert the input register class from // VRRC(v8i16) to VSRC. def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; class Z23_VT5_R1_VB5_RMC2_EX1 opcode, bits<8> xo, bit ex, string opc, list pattern> : Z23Form_8 { let RC = ex; } // Round to Quad-Precision Integer [with Inexact] def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; // Use current rounding mode def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; // Round to nearest, ties away from zero def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; // Round towards Zero def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; // Round towards +Inf def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; // Round towards -Inf def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; // Use current rounding mode, [with Inexact] def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; // Round Quad-Precision to Double-Extended Precision (fp80) def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; //===--------------------------------------------------------------------===// // Insert/Extract Instructions // Insert Exponent DP/QP // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg; // vB NOTE: only vB.dword[0] is used, that's why we don't use // X_VT5_VA5_VB5 form def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; // Extract Exponent/Significand DP/QP def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; // Vector Insert Word let UseVSXReg = 1 in { // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. def XXINSERTW : XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM), "xxinsertw $XT, $XB, $UIM", IIC_VecFP, [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB, imm32SExt16:$UIM))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; // Vector Extract Unsigned Word def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; } // UseVSXReg = 1 // Vector Insert Exponent DP/SP def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>; def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc, IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>; // Vector Extract Exponent/Significand DP/SP def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc, [(set v2i64: $XT, (int_ppc_vsx_xvxexpdp v2f64:$XB))]>; def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc, [(set v4i32: $XT, (int_ppc_vsx_xvxexpsp v4f32:$XB))]>; def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc, [(set v2i64: $XT, (int_ppc_vsx_xvxsigdp v2f64:$XB))]>; def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc, [(set v4i32: $XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))]>; let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Extra patterns expanding to vector Extract Word/Insert Word def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)), (v4i32 (XXINSERTW $A, $B, imm:$IMM))>; def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)), (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>; } // AddedComplexity = 400, HasP9Vector //===--------------------------------------------------------------------===// // Test Data Class SP/DP/QP let UseVSXReg = 1 in { def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; } // UseVSXReg = 1 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; // Vector Test Data Class SP/DP let UseVSXReg = 1 in { def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, [(set v4i32: $XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, [(set v2i64: $XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; } // UseVSXReg = 1 //===--------------------------------------------------------------------===// // Maximum/Minimum Type-C/Type-J DP // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, IIC_VecFP, []>; //===--------------------------------------------------------------------===// // Vector Byte-Reverse H/W/D/Q Word def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>; def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, []>; def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; // Vector Reverse def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), (v4i32 (XXBRW $A))>; def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), (v2i64 (XXBRD $A))>; def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; // Vector Permute def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, IIC_VecPerm, []>; def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, IIC_VecPerm, []>; // Vector Splat Immediate Byte def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg; //===--------------------------------------------------------------------===// // Vector/Scalar Load/Store Instructions // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. let mayLoad = 1, mayStore = 0 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg; // Load DWord def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), "lxsd $vD, $src", IIC_LdStLFD, []>; // Load SP from src, convert it to DP, and place in dword[0] def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), "lxssp $vD, $src", IIC_LdStLFD, []>; // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different // "out" and "in" dag class X_XT6_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand vtype, list pattern> : XX1Form_memOp, UseVSXReg; // Load as Integer Byte/Halfword & Zero Indexed def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>; // Load Vector Halfword*8/Byte*16 Indexed def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>; // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, [(set v2f64:$XT, (load xaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>, UseVSXReg; def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvll $XT, $src, $rB", IIC_LdStLoad, [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>, UseVSXReg; // Load Vector Word & Splat Indexed def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; } // mayLoad // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. let mayStore = 1, mayLoad = 0 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg; // Store DWord def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), "stxsd $vS, $dst", IIC_LdStSTFD, []>; // Convert DP of dword[0] to SP, and Store to dst def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), "stxssp $vS, $dst", IIC_LdStSTFD, []>; // [PO S RA RB XO SX] class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand vtype, list pattern> : XX1Form_memOp, UseVSXReg; // Store as Integer Byte/Halfword Indexed def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; let isCodeGenOnly = 1 in { def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>; def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>; } // Store Vector Halfword*8/Byte*16 Indexed def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>; def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>; // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, [(store v2f64:$XT, xaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form_memOp<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), "stxvl $XT, $dst, $rB", IIC_LdStLoad, [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst, i64:$rB)]>, UseVSXReg; def STXVLL : XX1Form_memOp<31, 429, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), "stxvll $XT, $dst, $rB", IIC_LdStLoad, [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst, i64:$rB)]>, UseVSXReg; } // mayStore let Predicates = [IsLittleEndian] in { def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; } let Predicates = [IsBigEndian] in { def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; } // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead // of f64 def : Pat<(v8i16 (PPCmtvsrz i32:$A)), (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; def : Pat<(v16i8 (PPCmtvsrz i32:$A)), (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; // Patterns for which instructions from ISA 3.0 are a better match let Predicates = [IsLittleEndian, HasP9Vector] in { def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; } // IsLittleEndian, HasP9Vector // D-Form Load/Store def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(f128 (quadwOffsetLoad iqaddr:$src)), (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(quadwOffsetStore f128:$rS, iqaddr:$dst), (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector - (f32 (fpround (f64 (extloadf32 xoaddr:$src)))))), - (v4f32 (LXVWSX xoaddr:$src))>; + let AddedComplexity = 400 in { + // LIWAX - This instruction is used for sign extending i32 -> i64. + // LIWZX - This instruction will be emitted for i32, f32, and when + // zero-extending i32 to i64 (zext i32 -> i64). + let Predicates = [IsLittleEndian] in { + + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + } + + } + // Build vectors from i8 loads def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; // Build vectors from i16 loads def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), (STXSIBXv $S, xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; // Scalar stores of i16 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), (STXSIHXv $S, xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; } // IsBigEndian, HasP9Vector let Predicates = [IsLittleEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), (STXSIBXv $S, xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>; // Scalar stores of i16 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), (STXSIHXv $S, xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; } // IsLittleEndian, HasP9Vector // Vector sign extensions def : Pat<(f64 (PPCVexts f64:$A, 1)), (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; def : Pat<(f64 (PPCVexts f64:$A, 2)), (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; let isPseudo = 1 in { def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), "#DFLOADf32", [(set f32:$XT, (load ixaddr:$src))]>; def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), "#DFLOADf64", [(set f64:$XT, (load ixaddr:$src))]>; def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), "#DFSTOREf32", [(store f32:$XT, ixaddr:$dst)]>; def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), "#DFSTOREf64", [(store f64:$XT, ixaddr:$dst)]>; } def : Pat<(f64 (extloadf32 ixaddr:$src)), (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), (f32 (DFLOADf32 ixaddr:$src))>; + + let AddedComplexity = 400 in { + // The following pseudoinstructions are used to ensure the utilization + // of all 64 VSX registers. + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + } + } + let Predicates = [IsBigEndian, HasP9Vector] in { // (Un)Signed DWord vector extract -> QP def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), (f128 (XSCVUDQP (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; // (Un)Signed Word vector extract -> QP def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; foreach Idx = [0,2,3] in { def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>; } foreach Idx = 0-3 in { def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))), (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>; } // (Un)Signed HWord vector extract -> QP foreach Idx = 0-7 in { def : Pat<(f128 (sint_to_fp (i32 (sext_inreg (vector_extract v8i16:$src, Idx), i16)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)), sub_64)))>; // The SDAG adds the `and` since an `i16` is being extracted as an `i32`. def : Pat<(f128 (uint_to_fp (and (i32 (vector_extract v8i16:$src, Idx)), 65535))), (f128 (XSCVUDQP (EXTRACT_SUBREG (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>; } // (Un)Signed Byte vector extract -> QP foreach Idx = 0-15 in { def : Pat<(f128 (sint_to_fp (i32 (sext_inreg (vector_extract v16i8:$src, Idx), i8)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>; def : Pat<(f128 (uint_to_fp (and (i32 (vector_extract v16i8:$src, Idx)), 255))), (f128 (XSCVUDQP (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>; } // Unsiged int in vsx register -> QP def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), (f128 (XSCVUDQP (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; } // IsBigEndian, HasP9Vector let Predicates = [IsLittleEndian, HasP9Vector] in { // (Un)Signed DWord vector extract -> QP def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), (f128 (XSCVUDQP (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; // (Un)Signed Word vector extract -> QP foreach Idx = [[0,3],[1,2],[3,0]] in { def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)), sub_64)))>; } def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in { def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>; } // (Un)Signed HWord vector extract -> QP // The Nested foreach lists identifies the vector element and corresponding // register byte location. foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in { def : Pat<(f128 (sint_to_fp (i32 (sext_inreg (vector_extract v8i16:$src, !head(Idx)), i16)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSH2D (VEXTRACTUH !head(!tail(Idx)), $src)), sub_64)))>; def : Pat<(f128 (uint_to_fp (and (i32 (vector_extract v8i16:$src, !head(Idx))), 65535))), (f128 (XSCVUDQP (EXTRACT_SUBREG (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>; } // (Un)Signed Byte vector extract -> QP foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7], [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in { def : Pat<(f128 (sint_to_fp (i32 (sext_inreg (vector_extract v16i8:$src, !head(Idx)), i8)))), (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)), sub_64)))>; def : Pat<(f128 (uint_to_fp (and (i32 (vector_extract v16i8:$src, !head(Idx))), 255))), (f128 (XSCVUDQP (EXTRACT_SUBREG (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>; } // Unsiged int in vsx register -> QP def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), (f128 (XSCVUDQP (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>; } // IsLittleEndian, HasP9Vector // Convert (Un)Signed DWord in memory -> QP def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))), (f128 (XSCVSDQP (LXSDX xaddr:$src)))>; def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))), (f128 (XSCVSDQP (LXSD ixaddr:$src)))>; def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))), (f128 (XSCVUDQP (LXSDX xaddr:$src)))>; def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))), (f128 (XSCVUDQP (LXSD ixaddr:$src)))>; // Convert Unsigned HWord in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; // Convert Unsigned Byte in memory -> QP def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; def : Pat<(i32 (fp_to_sint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; def : Pat<(i32 (fp_to_uint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; // Instructions for store(fptosi). // The 8-byte version is repeated here due to availability of D-Form STXSD. def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), xaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), ixaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8), (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8), (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; // Instructions for store(fptoui). def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8), (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), xaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8), (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), ixaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8), (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8), (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; def : Pat<(PPCstore_scal_int_from_vsr (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; // Round & Convert QP -> DP/SP def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; // Convert SP -> QP def : Pat<(f128 (fpextend f32:$src)), (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; } // end HasP9Vector, AddedComplexity let AddedComplexity = 400 in { let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in { def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; } let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; } } let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in { def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), (ins spilltovsrrc:$XT, memrr:$dst), "#SPILLTOVSR_STX", []>; def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), "#SPILLTOVSR_ST", []>; } let mayLoad = 1 in { def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), (ins memrr:$src), "#SPILLTOVSR_LDX", []>; def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), "#SPILLTOVSR_LD", []>; } } } // Integer extend helper dags 32 -> 64 def AnyExts { dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32); dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32); dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32); } def DblToFlt { dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0)))); dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1)))); dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } def ExtDbl { dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0)))))); dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1)))))); dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0)))))); dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1)))))); dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0)))))); dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1)))))); dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0)))))); dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1)))))); } def ByteToWord { dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); } def ByteToDWord { dag LE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); dag LE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); dag BE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); dag BE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); } def HWordToWord { dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); } def HWordToDWord { dag LE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); dag LE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); dag BE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); dag BE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); } def WordToDWord { dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); } def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } def FltToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A))))); } def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } def FltToLongLoadP9 { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); } def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } def FltToULongLoadP9 { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); } def FltToLong { dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A))))); } def FltToULong { dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz (fpextend f32:$A))))); } def DblToInt { dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A)))); dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B)))); dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C)))); dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D)))); } def DblToUInt { dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A)))); dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B)))); dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C)))); dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D)))); } def DblToLong { dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A)))); } def DblToULong { dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A)))); } def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } def DblToIntLoadP9 { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); } def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } def DblToUIntLoadP9 { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); } def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); } def DblToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A))))); } // FP merge dags (for f32 -> v4f32) def MrgFP { dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $C, VSRC), 0)); dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $D, VSRC), 0)); dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); } // Word-element merge dags - conversions from f64 to i32 merged into vectors. def MrgWords { // For big endian, we merge low and hi doublewords (A, B). dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); // For little endian, we merge low and hi doublewords (B, A). dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); // For big endian, we merge hi doublewords of (A, C) and (B, D), convert // then merge. dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), (COPY_TO_REGCLASS f64:$C, VSRC), 0)); dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), (COPY_TO_REGCLASS f64:$D, VSRC), 0)); dag CVACS = (v4i32 (XVCVDPSXWS AC)); dag CVBDS = (v4i32 (XVCVDPSXWS BD)); dag CVACU = (v4i32 (XVCVDPUXWS AC)); dag CVBDU = (v4i32 (XVCVDPUXWS BD)); // For little endian, we merge hi doublewords of (D, B) and (C, A), convert // then merge. dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), (COPY_TO_REGCLASS f64:$B, VSRC), 0)); dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), (COPY_TO_REGCLASS f64:$A, VSRC), 0)); dag CVDBS = (v4i32 (XVCVDPSXWS DB)); dag CVCAS = (v4i32 (XVCVDPSXWS CA)); dag CVDBU = (v4i32 (XVCVDPUXWS DB)); dag CVCAU = (v4i32 (XVCVDPUXWS CA)); } // Patterns for BUILD_VECTOR nodes. let AddedComplexity = 400 in { let Predicates = [HasVSX] in { // Build vectors of floating point converted to i32. def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, DblToInt.A, DblToInt.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, DblToUInt.A, DblToUInt.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>; def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; } let Predicates = [HasVSX, NoP9Vector] in { // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; } // Big endian, available on all targets with VSX let Predicates = [IsBigEndian, HasVSX] in { def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), (v2f64 (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $B, VSRC), 0))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)), (VMRGEW MrgFP.AC, MrgFP.BD)>; def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, DblToFlt.B0, DblToFlt.B1)), (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; // Convert 4 doubles to a vector of ints. def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, DblToInt.C, DblToInt.D)), (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, DblToUInt.C, DblToUInt.D)), (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, ExtDbl.B0S, ExtDbl.B1S)), (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, ExtDbl.B0U, ExtDbl.B1U)), (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; } let Predicates = [IsLittleEndian, HasVSX] in { // Little endian, available on all targets with VSX def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), (v2f64 (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $A, VSRC), 0))>; def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), (VMRGEW MrgFP.AC, MrgFP.BD)>; def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, DblToFlt.B0, DblToFlt.B1)), (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; // Convert 4 doubles to a vector of ints. def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, DblToInt.C, DblToInt.D)), (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, DblToUInt.C, DblToUInt.D)), (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, ExtDbl.B0S, ExtDbl.B1S)), (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, ExtDbl.B0U, ExtDbl.B1U)), (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; } let Predicates = [HasDirectMove] in { // Endianness-neutral constant splat on P8 and newer targets. The reason // for this pattern is that on targets with direct moves, we don't expand // BUILD_VECTOR nodes for v4i32. def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, immSExt5NonZero:$A, immSExt5NonZero:$A)), (v4i32 (VSPLTISW imm:$A))>; } let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in { // Big endian integer vectors using direct moves. def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), (v2i64 (XXPERMDI (COPY_TO_REGCLASS (MTVSRD $A), VSRC), (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0), (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in { // Little endian integer vectors using direct moves. def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), (v2i64 (XXPERMDI (COPY_TO_REGCLASS (MTVSRD $B), VSRC), (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0), (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } let Predicates = [HasP9Vector] in { // Endianness-neutral patterns for const splats with ISA 3.0 instructions. def : Pat<(v4i32 (scalar_to_vector i32:$A)), (v4i32 (MTVSRWS $A))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (v4i32 (MTVSRWS $A))>; def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A)), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; def : Pat<(v16i8 immAllOnesV), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; def : Pat<(v8i16 immAllOnesV), (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; def : Pat<(v4i32 immAllOnesV), (v4i32 (XXSPLTIB 255))>; def : Pat<(v2i64 immAllOnesV), (v2i64 (XXSPLTIB 255))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$A), VSFRC)), 0))>; } let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { def : Pat<(i64 (extractelt v2i64:$A, 1)), (i64 (MFVSRLD $A))>; // Better way to build integer vectors if we have MTVSRDD. Big endian. def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), (v2i64 (MTVSRDD $rB, $rA))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (VMRGOW (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)), (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>; } let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { def : Pat<(i64 (extractelt v2i64:$A, 0)), (i64 (MFVSRLD $A))>; // Better way to build integer vectors if we have MTVSRDD. Little endian. def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), (v2i64 (MTVSRDD $rB, $rA))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), (VMRGOW (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)), (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>; } // P9 Altivec instructions that can be used to build vectors. // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete // with complexities of existing build vector patterns in this file. let Predicates = [HasP9Altivec, IsLittleEndian] in { def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), (v2i64 (VEXTSW2D $A))>; def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), (v2i64 (VEXTSH2D $A))>; def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, HWordToWord.LE_A2, HWordToWord.LE_A3)), (v4i32 (VEXTSH2W $A))>; def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, ByteToWord.LE_A2, ByteToWord.LE_A3)), (v4i32 (VEXTSB2W $A))>; def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), (v2i64 (VEXTSB2D $A))>; } let Predicates = [HasP9Altivec, IsBigEndian] in { def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), (v2i64 (VEXTSW2D $A))>; def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), (v2i64 (VEXTSH2D $A))>; def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, HWordToWord.BE_A2, HWordToWord.BE_A3)), (v4i32 (VEXTSH2W $A))>; def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, ByteToWord.BE_A2, ByteToWord.BE_A3)), (v4i32 (VEXTSB2W $A))>; def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), (v2i64 (VEXTSB2D $A))>; } let Predicates = [HasP9Altivec] in { def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), (v2i64 (VEXTSB2D $A))>; def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), (v2i64 (VEXTSH2D $A))>; def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), (v2i64 (VEXTSW2D $A))>; def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), (v4i32 (VEXTSB2W $A))>; def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), (v4i32 (VEXTSH2W $A))>; } } + Index: vendor/llvm/dist-release_70/lib/Transforms/Utils/LCSSA.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Transforms/Utils/LCSSA.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Transforms/Utils/LCSSA.cpp (revision 341365) @@ -1,450 +1,466 @@ //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass transforms loops by placing phi nodes at the end of the loops for // all values that are live across the loop boundary. For example, it turns // the left into the right code: // // for (...) for (...) // if (c) if (c) // X1 = ... X1 = ... // else else // X2 = ... X2 = ... // X3 = phi(X1, X2) X3 = phi(X1, X2) // ... = X3 + 4 X4 = phi(X3) // ... = X4 + 4 // // This is still valid LLVM; the extra phi nodes are purely redundant, and will // be trivially eliminated by InstCombine. The major benefit of this // transformation is that it makes many other loop optimizations, such as // LoopUnswitching, simpler. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; #define DEBUG_TYPE "lcssa" STATISTIC(NumLCSSA, "Number of live out of a loop variables"); #ifdef EXPENSIVE_CHECKS static bool VerifyLoopLCSSA = true; #else static bool VerifyLoopLCSSA = false; #endif static cl::opt VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), cl::Hidden, cl::desc("Verify loop lcssa form (time consuming)")); /// Return true if the specified block is in the list. static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl &ExitBlocks) { return is_contained(ExitBlocks, BB); } /// For every instruction from the worklist, check to see if it has any uses /// that are outside the current loop. If so, insert LCSSA PHI nodes and /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, DominatorTree &DT, LoopInfo &LI) { SmallVector UsesToRewrite; SmallSetVector PHIsToRemove; PredIteratorCache PredCache; bool Changed = false; // Cache the Loop ExitBlocks across this loop. We expect to get a lot of // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. SmallDenseMap> LoopExitBlocks; while (!Worklist.empty()) { UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); assert(L && "Instruction belongs to a BB that's not part of a loop"); if (!LoopExitBlocks.count(L)) L->getExitBlocks(LoopExitBlocks[L]); assert(LoopExitBlocks.count(L)); const SmallVectorImpl &ExitBlocks = LoopExitBlocks[L]; if (ExitBlocks.empty()) continue; for (Use &U : I->uses()) { Instruction *User = cast(U.getUser()); BasicBlock *UserBB = User->getParent(); if (auto *PN = dyn_cast(User)) UserBB = PN->getIncomingBlock(U); if (InstBB != UserBB && !L->contains(UserBB)) UsesToRewrite.push_back(&U); } // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) continue; ++NumLCSSA; // We are applying the transformation // Invoke instructions are special in that their result value is not // available along their unwind edge. The code below tests to see whether // DomBB dominates the value, so adjust DomBB to the normal destination // block, which is effectively where the value is first usable. BasicBlock *DomBB = InstBB; if (auto *Inv = dyn_cast(I)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT.getNode(DomBB); SmallVector AddedPHIs; SmallVector PostProcessPHIs; SmallVector InsertedPHIs; SSAUpdater SSAUpdate(&InsertedPHIs); SSAUpdate.Initialize(I->getType(), I->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. for (BasicBlock *ExitBB : ExitBlocks) { if (!DT.dominates(DomNode, DT.getNode(ExitBB))) continue; // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), I->getName() + ".lcssa", &ExitBB->front()); // Get the debug location from the original instruction. PN->setDebugLoc(I->getDebugLoc()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. if (!L->contains(Pred)) UsesToRewrite.push_back( &PN->getOperandUse(PN->getOperandNumForIncomingValue( PN->getNumIncomingValues() - 1))); } AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); // LoopSimplify might fail to simplify some loops (e.g. when indirect // branches are involved). In such situations, it might happen that an // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we // create PHIs in such an exit block, we are also inserting PHIs into L2's // header. This could break LCSSA form for L2 because these inserted PHIs // can also have uses outside of L2. Remember all PHIs in such situation // as to revisit than later on. FIXME: Remove this if indirectbr support // into LoopSimplify gets improved. if (auto *OtherLoop = LI.getLoopFor(ExitBB)) if (!L->contains(OtherLoop)) PostProcessPHIs.push_back(PN); } // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (Use *UseToRewrite : UsesToRewrite) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses // in the same block. It assumes the PHI we inserted is at the end of the // block. Instruction *User = cast(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); if (auto *PN = dyn_cast(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. if (UseToRewrite->get()->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); UseToRewrite->set(&UserBB->front()); continue; } // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UseToRewrite); + } + + SmallVector DbgValues; + llvm::findDbgValues(DbgValues, I); + + // Update pre-existing debug value uses that reside outside the loop. + auto &Ctx = I->getContext(); + for (auto DVI : DbgValues) { + BasicBlock *UserBB = DVI->getParent(); + if (InstBB == UserBB || L->contains(UserBB)) + continue; + // We currently only handle debug values residing in blocks where we have + // inserted a PHI instruction. + if (Value *V = SSAUpdate.FindValueForBlock(UserBB)) + DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); } // SSAUpdater might have inserted phi-nodes inside other loops. We'll need // to post-process them to keep LCSSA form. for (PHINode *InsertedPN : InsertedPHIs) { if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent())) if (!L->contains(OtherLoop)) PostProcessPHIs.push_back(InsertedPN); } // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. for (auto *PostProcessPN : PostProcessPHIs) if (!PostProcessPN->use_empty()) Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. If the new PHI is used, store it so that we can // try to propagate dbg.value intrinsics to it. SmallVector NeedDbgValues; for (PHINode *PN : AddedPHIs) if (PN->use_empty()) PHIsToRemove.insert(PN); else NeedDbgValues.push_back(PN); insertDebugValuesForPHIs(InstBB, NeedDbgValues); Changed = true; } // Remove PHI nodes that did not have any uses rewritten. We need to redo the // use_empty() check here, because even if the PHI node wasn't used when added // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is // not guaranteed to handle trees/cycles of PHI nodes that only are used by // each other. Such situations has only been noticed when the input IR // contains unreachable code, and leaving some extra redundant PHI nodes in // such situations is considered a minor problem. for (PHINode *PN : PHIsToRemove) if (PN->use_empty()) PN->eraseFromParent(); return Changed; } // Compute the set of BasicBlocks in the loop `L` dominating at least one exit. static void computeBlocksDominatingExits( Loop &L, DominatorTree &DT, SmallVector &ExitBlocks, SmallSetVector &BlocksDominatingExits) { SmallVector BBWorklist; // We start from the exit blocks, as every block trivially dominates itself // (not strictly). for (BasicBlock *BB : ExitBlocks) BBWorklist.push_back(BB); while (!BBWorklist.empty()) { BasicBlock *BB = BBWorklist.pop_back_val(); // Check if this is a loop header. If this is the case, we're done. if (L.getHeader() == BB) continue; // Otherwise, add its immediate predecessor in the dominator tree to the // worklist, unless we visited it already. BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock(); // Exit blocks can have an immediate dominator not beloinging to the // loop. For an exit block to be immediately dominated by another block // outside the loop, it implies not all paths from that dominator, to the // exit block, go through the loop. // Example: // // |---- A // | | // | B<-- // | | | // |---> C -- // | // D // // C is the exit block of the loop and it's immediately dominated by A, // which doesn't belong to the loop. if (!L.contains(IDomBB)) continue; if (BlocksDominatingExits.insert(IDomBB)) BBWorklist.push_back(IDomBB); } } bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; SmallVector ExitBlocks; L.getExitBlocks(ExitBlocks); if (ExitBlocks.empty()) return false; SmallSetVector BlocksDominatingExits; // We want to avoid use-scanning leveraging dominance informations. // If a block doesn't dominate any of the loop exits, the none of the values // defined in the loop can be used outside. // We compute the set of blocks fullfilling the conditions in advance // walking the dominator tree upwards until we hit a loop header. computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits); SmallVector Worklist; // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, put them into the worklist to rewrite those uses. for (BasicBlock *BB : BlocksDominatingExits) { for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. if (I.use_empty() || (I.hasOneUse() && I.user_back()->getParent() == BB && !isa(I.user_back()))) continue; // Tokens cannot be used in PHI nodes, so we skip over them. // We can run into tokens which are live out of a loop with catchswitch // instructions in Windows EH if the catchswitch has one catchpad which // is inside the loop and another which is not. if (I.getType()->isTokenTy()) continue; Worklist.push_back(&I); } } Changed = formLCSSAForInstructions(Worklist, DT, *LI); // If we modified the code, remove any caches about the loop from SCEV to // avoid dangling entries. // FIXME: This is a big hammer, can we clear the cache more selectively? if (SE && Changed) SE->forgetLoop(&L); assert(L.isLCSSAForm(DT)); return Changed; } /// Process a loop nest depth first. bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; // Recurse depth-first through inner loops. for (Loop *SubLoop : L.getSubLoops()) Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE); Changed |= formLCSSA(L, DT, LI, SE); return Changed; } /// Process all loops in the function, inner-most out. static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT, ScalarEvolution *SE) { bool Changed = false; for (auto &L : *LI) Changed |= formLCSSARecursively(*L, DT, LI, SE); return Changed; } namespace { struct LCSSAWrapperPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid LCSSAWrapperPass() : FunctionPass(ID) { initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry()); } // Cached analysis information for the current function. DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; bool runOnFunction(Function &F) override; void verifyAnalysis() const override { // This check is very expensive. On the loop intensive compiles it may cause // up to 10x slowdown. Currently it's disabled by default. LPPassManager // always does limited form of the LCSSA verification. Similar reasoning // was used for the LoopInfo verifier. if (VerifyLoopLCSSA) { assert(all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }) && "LCSSA form is broken!"); } }; /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. It maintains both of these, /// as well as the CFG. It also requires dominator information. void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); AU.addPreservedID(LoopSimplifyID); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); // This is needed to perform LCSSA verification inside LPPassManager AU.addRequired(); AU.addPreserved(); } }; } char LCSSAWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass) INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); } char &llvm::LCSSAID = LCSSAWrapperPass::ID; /// Transform \p F into loop-closed SSA form. bool LCSSAWrapperPass::runOnFunction(Function &F) { LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); auto *SEWP = getAnalysisIfAvailable(); SE = SEWP ? &SEWP->getSE() : nullptr; return formLCSSAOnAllLoops(LI, *DT, SE); } PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { auto &LI = AM.getResult(F); auto &DT = AM.getResult(F); auto *SE = AM.getCachedResult(F); if (!formLCSSAOnAllLoops(&LI, DT, SE)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); PA.preserve(); PA.preserve(); PA.preserve(); return PA; } Index: vendor/llvm/dist-release_70/lib/Transforms/Utils/SSAUpdater.cpp =================================================================== --- vendor/llvm/dist-release_70/lib/Transforms/Utils/SSAUpdater.cpp (revision 341364) +++ vendor/llvm/dist-release_70/lib/Transforms/Utils/SSAUpdater.cpp (revision 341365) @@ -1,492 +1,497 @@ //===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the SSAUpdater class. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include #include using namespace llvm; #define DEBUG_TYPE "ssaupdater" using AvailableValsTy = DenseMap; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast(AV); } SSAUpdater::SSAUpdater(SmallVectorImpl *NewPHI) : InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete static_cast(AV); } void SSAUpdater::Initialize(Type *Ty, StringRef Name) { if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); ProtoType = Ty; ProtoName = Name; } bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { return getAvailableVals(AV).count(BB); } +Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const { + AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB); + return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr; +} + void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { assert(ProtoType && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; } static bool IsEquivalentPHI(PHINode *PHI, SmallDenseMap &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) return false; // Scan the phi to see if it matches. for (unsigned i = 0, e = PHINumValues; i != e; ++i) if (ValueMapping[PHI->getIncomingBlock(i)] != PHI->getIncomingValue(i)) { return false; } return true; } Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { Value *Res = GetValueAtEndOfBlockInternal(BB); return Res; } Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. if (!HasValueForBlock(BB)) return GetValueAtEndOfBlock(BB); // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector, 8> PredValues; Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast(BB->begin())) { for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) SingularValue = nullptr; } } else { bool isFirstPred = true; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) SingularValue = nullptr; } } // If there are no predecessors, just return undef. if (PredValues.empty()) return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa(BB->begin())) { SmallDenseMap ValueMapping(PredValues.begin(), PredValues.end()); for (PHINode &SomePHI : BB->phis()) { if (IsEquivalentPHI(&SomePHI, ValueMapping)) return &SomePHI; } } // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (const auto &PredValue : PredValues) InsertedPHI->addIncoming(PredValue.second, PredValue.first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *V = SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } // Set the DebugLoc of the inserted PHI, if available. DebugLoc DL; if (const Instruction *I = BB->getFirstNonPHI()) DL = I->getDebugLoc(); InsertedPHI->setDebugLoc(DL); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; } void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast(U.getUser()); Value *V; if (PHINode *UserPN = dyn_cast(User)) V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); else V = GetValueInMiddleOfBlock(User->getParent()); // Notify that users of the existing value that it is being replaced. Value *OldVal = U.get(); if (OldVal != V && OldVal->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(OldVal, V); U.set(V); } void SSAUpdater::RewriteUseAfterInsertions(Use &U) { Instruction *User = cast(U.getUser()); Value *V; if (PHINode *UserPN = dyn_cast(User)) V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); else V = GetValueAtEndOfBlock(User->getParent()); U.set(V); } namespace llvm { template<> class SSAUpdaterTraits { public: using BlkT = BasicBlock; using ValT = Value *; using PhiT = PHINode; using BlkSucc_iterator = succ_iterator; static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } class PHI_iterator { private: PHINode *PHI; unsigned idx; public: explicit PHI_iterator(PHINode *P) // begin iterator : PHI(P), idx(0) {} PHI_iterator(PHINode *P, bool) // end iterator : PHI(P), idx(PHI->getNumIncomingValues()) {} PHI_iterator &operator++() { ++idx; return *this; } bool operator==(const PHI_iterator& x) const { return idx == x.idx; } bool operator!=(const PHI_iterator& x) const { return !operator==(x); } Value *getIncomingValue() { return PHI->getIncomingValue(idx); } BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } }; static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } static PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); } /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds /// vector, set Info->NumPreds, and allocate space in Info->Preds. static void FindPredecessorBlocks(BasicBlock *BB, SmallVectorImpl *Preds) { // We can get our predecessor info by walking the pred_iterator list, // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast(BB->begin())) { Preds->append(SomePhi->block_begin(), SomePhi->block_end()); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Preds->push_back(*PI); } } /// GetUndefVal - Get an undefined value of the same type as the value /// being handled. static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { return UndefValue::get(Updater->ProtoType); } /// CreateEmptyPHI - Create a new PHI instruction in the specified block. /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, Updater->ProtoName, &BB->front()); return PHI; } /// AddPHIOperand - Add the specified value as an operand of the PHI for /// the specified predecessor block. static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { PHI->addIncoming(Val, Pred); } /// InstrIsPHI - Check if an instruction is a PHI. /// static PHINode *InstrIsPHI(Instruction *I) { return dyn_cast(I); } /// ValueIsPHI - Check if a value is a PHI. static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { return dyn_cast(Val); } /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source /// operands, i.e., it was just added. static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { PHINode *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumIncomingValues() == 0) return PHI; return nullptr; } /// GetPHIValue - For the specified PHI instruction, return the value /// that it defines. static Value *GetPHIValue(PHINode *PHI) { return PHI; } }; } // end namespace llvm /// Check to see if AvailableVals has an entry for the specified BB and if so, /// return it. If not, construct SSA form by first calculating the required /// placement of PHIs and then inserting new PHIs where needed. Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); if (Value *V = AvailableVals[BB]) return V; SSAUpdaterImpl Impl(this, &AvailableVals, InsertedPHIs); return Impl.GetValue(BB); } //===----------------------------------------------------------------------===// // LoadAndStorePromoter Implementation //===----------------------------------------------------------------------===// LoadAndStorePromoter:: LoadAndStorePromoter(ArrayRef Insts, SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; const Value *SomeVal; if (const LoadInst *LI = dyn_cast(Insts[0])) SomeVal = LI; else SomeVal = cast(Insts[0])->getOperand(0); if (BaseName.empty()) BaseName = SomeVal->getName(); SSA.Initialize(SomeVal->getType(), BaseName); } void LoadAndStorePromoter:: run(const SmallVectorImpl &Insts) const { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. DenseMap> UsesByBlock; for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. SmallVector LiveInLoads; DenseMap ReplacedLoads; for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); TinyPtrVector &BlockUses = UsesByBlock[BB]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (StoreInst *SI = dyn_cast(User)) { updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); } else // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast(User)); BlockUses.clear(); continue; } // Otherwise, check to see if this block is all loads. bool HasStore = false; for (Instruction *I : BlockUses) { if (isa(I)) { HasStore = true; break; } } // If so, we can queue them all as live in loads. We don't have an // efficient way to tell which on is first in the block and don't want to // scan large blocks, so just add all loads as live ins. if (!HasStore) { for (Instruction *I : BlockUses) LiveInLoads.push_back(cast(I)); BlockUses.clear(); continue; } // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. Value *StoredValue = nullptr; for (Instruction &I : *BB) { if (LoadInst *L = dyn_cast(&I)) { // If this is a load from an unrelated pointer, ignore it. if (!isInstInList(L, Insts)) continue; // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { replaceLoadWithValue(L, StoredValue); L->replaceAllUsesWith(StoredValue); ReplacedLoads[L] = StoredValue; } else { LiveInLoads.push_back(L); } continue; } if (StoreInst *SI = dyn_cast(&I)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; updateDebugInfo(SI); // Remember that this is the active value in the block. StoredValue = SI->getOperand(0); } } // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (LoadInst *ALoad : LiveInLoads) { Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); replaceLoadWithValue(ALoad, NewVal); // Avoid assertions in unreachable code. if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType()); ALoad->replaceAllUsesWith(NewVal); ReplacedLoads[ALoad] = NewVal; } // Allow the client to do stuff before we start nuking things. doExtraRewritesBeforeFinalDeletion(); // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively // propagate the updates until we get to the real value. if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. DenseMap::iterator RLI = ReplacedLoads.find(NewVal); while (RLI != ReplacedLoads.end()) { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } replaceLoadWithValue(cast(User), NewVal); User->replaceAllUsesWith(NewVal); } instructionDeleted(User); User->eraseFromParent(); } } bool LoadAndStorePromoter::isInstInList(Instruction *I, const SmallVectorImpl &Insts) const { return is_contained(Insts, I); } Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/Fast-ISel/bricmpi1.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/Fast-ISel/bricmpi1.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/Fast-ISel/bricmpi1.ll (revision 341365) @@ -0,0 +1,189 @@ +; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel=true -mcpu=mips32r2 \ +; RUN: < %s -verify-machineinstrs | FileCheck %s + +define void @testeq(i32, i32) { +; CHECK-LABEL: testeq: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: beq $[[REG0]], $[[REG1]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp eq i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testne(i32, i32) { +; CHECK-LABEL: testne: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: bne $[[REG0]], $[[REG1]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ne i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testugt(i32, i32) { +; CHECK-LABEL: testugt: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ugt i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testuge(i32, i32) { +; CHECK-LABEL: testuge: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp uge i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testult(i32, i32) { +; CHECK-LABEL: testult: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ult i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testule(i32, i32) { +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ule i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testsgt(i32, i32) { +; CHECK-LABEL: testsgt: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp sgt i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testsge(i32, i32) { +; CHECK-LABEL: testsge: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp sge i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testslt(i32, i32) { +; CHECK-LABEL: testslt: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp slt i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testsle(i32, i32) { +; CHECK-LABEL: testsle: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp sle i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +declare void @llvm.trap() Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll (revision 341365) @@ -0,0 +1,32 @@ +; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \ +; RUN: -mcpu=mips32 -mattr=+fpxx \ +; RUN: -stop-after=expand-isel-pseudos | \ +; RUN: FileCheck %s -check-prefix=FPXX-IMPLICIT-SP + +; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \ +; RUN: -mcpu=mips32r6 -mattr=+fp64,+nooddspreg \ +; RUN: -stop-after=expand-isel-pseudos | \ +; RUN: FileCheck %s -check-prefix=FP64-IMPLICIT-SP + +; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \ +; RUN: -mcpu=mips32r2 -mattr=+fpxx \ +; RUN: -stop-after=expand-isel-pseudos | \ +; RUN: FileCheck %s -check-prefix=NO-IMPLICIT-SP + +define double @foo2(i32 signext %v1, double %d1) { +entry: +; FPXX-IMPLICIT-SP: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp +; FPXX-IMPLICIT-SP: ExtractElementF64 killed %{{[0-9]+}}, 1, implicit $sp +; FP64-IMPLICIT-SP: BuildPairF64_64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp +; FP64-IMPLICIT-SP: ExtractElementF64_64 killed %{{[0-9]+}}, 1, implicit $sp +; NO-IMPLICIT-SP: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}} +; NO-IMPLICIT-SP-NOT: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp +; NO-IMPLICIT-SP: ExtractElementF64 killed %{{[0-9]+}}, 1 +; NO-IMPLICIT-SP-NOT: ExtractElementF64 killed %{{[0-9]+}}, 1, implicit $sp + %conv = fptrunc double %d1 to float + %0 = tail call float @llvm.copysign.f32(float 1.000000e+00, float %conv) + %conv1 = fpext float %0 to double + ret double %conv1 +} + +declare float @llvm.copysign.f32(float, float) Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/longbranch.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/longbranch.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/longbranch.ll (revision 341365) @@ -1,325 +1,322 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Except for the NACL version which isn't parsed by update_llc_test_checks.py ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O3 -relocation-model=pic < %s \ ; RUN: | FileCheck %s -check-prefix=NOLONGBRANCH ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -force-mips-long-branch -O3 -relocation-model=pic < %s \ ; RUN: | FileCheck %s -check-prefix=O32-PIC ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -force-mips-long-branch -O3 -relocation-model=static < %s \ ; RUN: | FileCheck %s -check-prefix=O32-STATIC ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r6 -force-mips-long-branch -O3 \ ; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=O32-R6-PIC ; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips4 -target-abi=n64 -force-mips-long-branch -O3 -relocation-model=pic \ ; RUN: < %s | FileCheck %s -check-prefix=MIPS4 ; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64 -target-abi=n64 -force-mips-long-branch -O3 -relocation-model=pic \ ; RUN: < %s | FileCheck %s -check-prefix=MIPS64 ; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r6 -target-abi=n64 -force-mips-long-branch -O3 \ ; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=N64-R6 ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -mattr=micromips \ ; RUN: -force-mips-long-branch -O3 -relocation-model=pic < %s | FileCheck %s -check-prefix=MICROMIPS ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r2 -mattr=micromips \ ; RUN: -force-mips-long-branch -O3 -relocation-model=static < %s | FileCheck %s -check-prefix=MICROMIPSSTATIC ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r6 -mattr=micromips \ ; RUN: -force-mips-long-branch -O3 -relocation-model=static < %s | FileCheck %s -check-prefix=MICROMIPSR6STATIC ; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r6 -mattr=micromips \ ; RUN: -force-mips-long-branch -O3 -relocation-model=pic < %s | FileCheck %s -check-prefix=MICROMIPSR6PIC ; RUN: llc -mtriple=mipsel-none-nacl -force-mips-long-branch -O3 -relocation-model=pic < %s \ ; RUN: | FileCheck %s -check-prefix=NACL @x = external global i32 define void @test1(i32 signext %s) { ; NOLONGBRANCH-LABEL: test1: ; NOLONGBRANCH: # %bb.0: # %entry ; NOLONGBRANCH-NEXT: lui $2, %hi(_gp_disp) ; NOLONGBRANCH-NEXT: addiu $2, $2, %lo(_gp_disp) ; NOLONGBRANCH-NEXT: beqz $4, $BB0_2 ; NOLONGBRANCH-NEXT: addu $2, $2, $25 ; NOLONGBRANCH-NEXT: # %bb.1: # %then ; NOLONGBRANCH-NEXT: lw $1, %got(x)($2) ; NOLONGBRANCH-NEXT: addiu $2, $zero, 1 ; NOLONGBRANCH-NEXT: sw $2, 0($1) ; NOLONGBRANCH-NEXT: $BB0_2: # %end ; NOLONGBRANCH-NEXT: jr $ra ; NOLONGBRANCH-NEXT: nop ; ; O32-PIC-LABEL: test1: ; O32-PIC: # %bb.0: # %entry ; O32-PIC-NEXT: lui $2, %hi(_gp_disp) ; O32-PIC-NEXT: addiu $2, $2, %lo(_gp_disp) ; O32-PIC-NEXT: bnez $4, $BB0_3 ; O32-PIC-NEXT: addu $2, $2, $25 ; O32-PIC-NEXT: # %bb.1: # %entry ; O32-PIC-NEXT: addiu $sp, $sp, -8 ; O32-PIC-NEXT: sw $ra, 0($sp) ; O32-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) ; O32-PIC-NEXT: bal $BB0_2 ; O32-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) ; O32-PIC-NEXT: $BB0_2: # %entry ; O32-PIC-NEXT: addu $1, $ra, $1 ; O32-PIC-NEXT: lw $ra, 0($sp) ; O32-PIC-NEXT: jr $1 ; O32-PIC-NEXT: addiu $sp, $sp, 8 ; O32-PIC-NEXT: $BB0_3: # %then ; O32-PIC-NEXT: lw $1, %got(x)($2) ; O32-PIC-NEXT: addiu $2, $zero, 1 ; O32-PIC-NEXT: sw $2, 0($1) ; O32-PIC-NEXT: $BB0_4: # %end ; O32-PIC-NEXT: jr $ra ; O32-PIC-NEXT: nop ; ; O32-STATIC-LABEL: test1: ; O32-STATIC: # %bb.0: # %entry ; O32-STATIC-NEXT: bnez $4, $BB0_2 ; O32-STATIC-NEXT: nop ; O32-STATIC-NEXT: # %bb.1: # %entry ; O32-STATIC-NEXT: j $BB0_3 ; O32-STATIC-NEXT: nop ; O32-STATIC-NEXT: $BB0_2: # %then ; O32-STATIC-NEXT: lui $1, %hi(x) ; O32-STATIC-NEXT: addiu $2, $zero, 1 ; O32-STATIC-NEXT: sw $2, %lo(x)($1) ; O32-STATIC-NEXT: $BB0_3: # %end ; O32-STATIC-NEXT: jr $ra ; O32-STATIC-NEXT: nop ; ; O32-R6-PIC-LABEL: test1: ; O32-R6-PIC: # %bb.0: # %entry ; O32-R6-PIC-NEXT: lui $2, %hi(_gp_disp) ; O32-R6-PIC-NEXT: addiu $2, $2, %lo(_gp_disp) ; O32-R6-PIC-NEXT: bnez $4, $BB0_3 ; O32-R6-PIC-NEXT: addu $2, $2, $25 ; O32-R6-PIC-NEXT: # %bb.1: # %entry ; O32-R6-PIC-NEXT: addiu $sp, $sp, -8 ; O32-R6-PIC-NEXT: sw $ra, 0($sp) ; O32-R6-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) ; O32-R6-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) ; O32-R6-PIC-NEXT: balc $BB0_2 ; O32-R6-PIC-NEXT: $BB0_2: # %entry ; O32-R6-PIC-NEXT: addu $1, $ra, $1 ; O32-R6-PIC-NEXT: lw $ra, 0($sp) ; O32-R6-PIC-NEXT: addiu $sp, $sp, 8 ; O32-R6-PIC-NEXT: jrc $1 ; O32-R6-PIC-NEXT: $BB0_3: # %then ; O32-R6-PIC-NEXT: lw $1, %got(x)($2) ; O32-R6-PIC-NEXT: addiu $2, $zero, 1 ; O32-R6-PIC-NEXT: sw $2, 0($1) ; O32-R6-PIC-NEXT: $BB0_4: # %end ; O32-R6-PIC-NEXT: jrc $ra ; ; O32-R6-STATIC-LABEL: test1: ; O32-R6-STATIC: # %bb.0: # %entry ; O32-R6-STATIC-NEXT: bnezc $4, $BB0_2 ; O32-R6-STATIC-NEXT: nop ; O32-R6-STATIC-NEXT: # %bb.1: # %entry ; O32-R6-STATIC-NEXT: bc $BB0_3 ; O32-R6-STATIC-NEXT: $BB0_2: # %then ; O32-R6-STATIC-NEXT: lui $1, %hi(x) ; O32-R6-STATIC-NEXT: addiu $2, $zero, 1 ; O32-R6-STATIC-NEXT: sw $2, %lo(x)($1) ; O32-R6-STATIC-NEXT: $BB0_3: # %end ; O32-R6-STATIC-NEXT: jrc $ra ; ; MIPS4-LABEL: test1: ; MIPS4: # %bb.0: # %entry ; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(test1))) ; MIPS4-NEXT: bnez $4, .LBB0_3 ; MIPS4-NEXT: daddu $2, $1, $25 ; MIPS4-NEXT: # %bb.1: # %entry ; MIPS4-NEXT: daddiu $sp, $sp, -16 ; MIPS4-NEXT: sd $ra, 0($sp) ; MIPS4-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2) ; MIPS4-NEXT: dsll $1, $1, 16 ; MIPS4-NEXT: bal .LBB0_2 ; MIPS4-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2) ; MIPS4-NEXT: .LBB0_2: # %entry ; MIPS4-NEXT: daddu $1, $ra, $1 ; MIPS4-NEXT: ld $ra, 0($sp) ; MIPS4-NEXT: jr $1 ; MIPS4-NEXT: daddiu $sp, $sp, 16 ; MIPS4-NEXT: .LBB0_3: # %then ; MIPS4-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1))) ; MIPS4-NEXT: addiu $2, $zero, 1 ; MIPS4-NEXT: ld $1, %got_disp(x)($1) ; MIPS4-NEXT: sw $2, 0($1) ; MIPS4-NEXT: .LBB0_4: # %end ; MIPS4-NEXT: jr $ra ; MIPS4-NEXT: nop ; ; MIPS64-LABEL: test1: ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(test1))) ; MIPS64-NEXT: bnez $4, .LBB0_3 ; MIPS64-NEXT: daddu $2, $1, $25 ; MIPS64-NEXT: # %bb.1: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: sd $ra, 0($sp) ; MIPS64-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2) ; MIPS64-NEXT: dsll $1, $1, 16 ; MIPS64-NEXT: bal .LBB0_2 ; MIPS64-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2) ; MIPS64-NEXT: .LBB0_2: # %entry ; MIPS64-NEXT: daddu $1, $ra, $1 ; MIPS64-NEXT: ld $ra, 0($sp) ; MIPS64-NEXT: jr $1 ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: .LBB0_3: # %then ; MIPS64-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1))) ; MIPS64-NEXT: addiu $2, $zero, 1 ; MIPS64-NEXT: ld $1, %got_disp(x)($1) ; MIPS64-NEXT: sw $2, 0($1) ; MIPS64-NEXT: .LBB0_4: # %end ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop ; ; N64-R6-LABEL: test1: ; N64-R6: # %bb.0: # %entry ; N64-R6-NEXT: lui $1, %hi(%neg(%gp_rel(test1))) ; N64-R6-NEXT: bnez $4, .LBB0_3 ; N64-R6-NEXT: daddu $2, $1, $25 ; N64-R6-NEXT: # %bb.1: # %entry ; N64-R6-NEXT: daddiu $sp, $sp, -16 ; N64-R6-NEXT: sd $ra, 0($sp) ; N64-R6-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2) ; N64-R6-NEXT: dsll $1, $1, 16 ; N64-R6-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2) ; N64-R6-NEXT: balc .LBB0_2 ; N64-R6-NEXT: .LBB0_2: # %entry ; N64-R6-NEXT: daddu $1, $ra, $1 ; N64-R6-NEXT: ld $ra, 0($sp) ; N64-R6-NEXT: daddiu $sp, $sp, 16 ; N64-R6-NEXT: jrc $1 ; N64-R6-NEXT: .LBB0_3: # %then ; N64-R6-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1))) ; N64-R6-NEXT: addiu $2, $zero, 1 ; N64-R6-NEXT: ld $1, %got_disp(x)($1) ; N64-R6-NEXT: sw $2, 0($1) ; N64-R6-NEXT: .LBB0_4: # %end ; N64-R6-NEXT: jrc $ra ; ; MICROMIPS-LABEL: test1: ; MICROMIPS: # %bb.0: # %entry ; MICROMIPS-NEXT: lui $2, %hi(_gp_disp) ; MICROMIPS-NEXT: addiu $2, $2, %lo(_gp_disp) ; MICROMIPS-NEXT: bnez $4, $BB0_3 ; MICROMIPS-NEXT: addu $2, $2, $25 ; MICROMIPS-NEXT: # %bb.1: # %entry ; MICROMIPS-NEXT: addiu $sp, $sp, -8 ; MICROMIPS-NEXT: sw $ra, 0($sp) ; MICROMIPS-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) ; MICROMIPS-NEXT: bal $BB0_2 ; MICROMIPS-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) ; MICROMIPS-NEXT: $BB0_2: # %entry ; MICROMIPS-NEXT: addu $1, $ra, $1 ; MICROMIPS-NEXT: lw $ra, 0($sp) ; MICROMIPS-NEXT: jr $1 ; MICROMIPS-NEXT: addiu $sp, $sp, 8 ; MICROMIPS-NEXT: $BB0_3: # %then ; MICROMIPS-NEXT: lw $2, %got(x)($2) ; MICROMIPS-NEXT: li16 $3, 1 ; MICROMIPS-NEXT: sw16 $3, 0($2) ; MICROMIPS-NEXT: $BB0_4: # %end ; MICROMIPS-NEXT: jrc $ra ; ; MICROMIPSSTATIC-LABEL: test1: ; MICROMIPSSTATIC: # %bb.0: # %entry ; MICROMIPSSTATIC-NEXT: bnezc $4, $BB0_2 ; MICROMIPSSTATIC-NEXT: # %bb.1: # %entry -; MICROMIPSSTATIC-NEXT: j $BB0_4 -; MICROMIPSSTATIC-NEXT: nop -; MICROMIPSSTATIC-NEXT: $BB0_2: # %entry ; MICROMIPSSTATIC-NEXT: j $BB0_3 ; MICROMIPSSTATIC-NEXT: nop -; MICROMIPSSTATIC-NEXT: $BB0_3: # %then +; MICROMIPSSTATIC-NEXT: $BB0_2: # %then ; MICROMIPSSTATIC-NEXT: lui $1, %hi(x) ; MICROMIPSSTATIC-NEXT: li16 $2, 1 ; MICROMIPSSTATIC-NEXT: sw $2, %lo(x)($1) -; MICROMIPSSTATIC-NEXT: $BB0_4: # %end +; MICROMIPSSTATIC-NEXT: $BB0_3: # %end ; MICROMIPSSTATIC-NEXT: jrc $ra ; ; MICROMIPSR6STATIC-LABEL: test1: ; MICROMIPSR6STATIC: # %bb.0: # %entry ; MICROMIPSR6STATIC-NEXT: bnezc $4, $BB0_2 ; MICROMIPSR6STATIC-NEXT: # %bb.1: # %entry ; MICROMIPSR6STATIC-NEXT: bc $BB0_3 ; MICROMIPSR6STATIC-NEXT: $BB0_2: # %then ; MICROMIPSR6STATIC-NEXT: lui $1, %hi(x) ; MICROMIPSR6STATIC-NEXT: li16 $2, 1 ; MICROMIPSR6STATIC-NEXT: sw $2, %lo(x)($1) ; MICROMIPSR6STATIC-NEXT: $BB0_3: # %end ; MICROMIPSR6STATIC-NEXT: jrc $ra ; ; MICROMIPSR6PIC-LABEL: test1: ; MICROMIPSR6PIC: # %bb.0: # %entry ; MICROMIPSR6PIC-NEXT: lui $2, %hi(_gp_disp) ; MICROMIPSR6PIC-NEXT: addiu $2, $2, %lo(_gp_disp) ; MICROMIPSR6PIC-NEXT: addu $2, $2, $25 ; MICROMIPSR6PIC-NEXT: bnezc $4, $BB0_3 ; MICROMIPSR6PIC-NEXT: # %bb.1: # %entry ; MICROMIPSR6PIC-NEXT: addiu $sp, $sp, -8 ; MICROMIPSR6PIC-NEXT: sw $ra, 0($sp) ; MICROMIPSR6PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) ; MICROMIPSR6PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) ; MICROMIPSR6PIC-NEXT: balc $BB0_2 ; MICROMIPSR6PIC-NEXT: $BB0_2: # %entry ; MICROMIPSR6PIC-NEXT: addu $1, $ra, $1 ; MICROMIPSR6PIC-NEXT: lw $ra, 0($sp) ; MICROMIPSR6PIC-NEXT: addiu $sp, $sp, 8 ; MICROMIPSR6PIC-NEXT: jic $1, 0 ; MICROMIPSR6PIC-NEXT: $BB0_3: # %then ; MICROMIPSR6PIC-NEXT: lw $2, %got(x)($2) ; MICROMIPSR6PIC-NEXT: li16 $3, 1 ; MICROMIPSR6PIC-NEXT: sw16 $3, 0($2) ; MICROMIPSR6PIC-NEXT: $BB0_4: # %end ; MICROMIPSR6PIC-NEXT: jrc $ra ; NACL-LABEL: test1: ; NACL: # %bb.0: ; NACL-NEXT: lui $2, %hi(_gp_disp) ; NACL-NEXT: addiu $2, $2, %lo(_gp_disp) ; NACL-NEXT: bnez $4, $BB0_3 ; NACL-NEXT: addu $2, $2, $25 ; NACL-NEXT: # %bb.1: ; NACL-NEXT: addiu $sp, $sp, -8 ; NACL-NEXT: sw $ra, 0($sp) ; NACL-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) ; NACL-NEXT: bal $BB0_2 ; NACL-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) ; NACL-NEXT: $BB0_2: ; NACL-NEXT: addu $1, $ra, $1 ; NACL-NEXT: lw $ra, 0($sp) ; NACL-NEXT: addiu $sp, $sp, 8 ; NACL-NEXT: jr $1 ; NACL-NEXT: nop ; NACL-NEXT: $BB0_3: ; NACL-NEXT: lw $1, %got(x)($2) ; NACL-NEXT: addiu $2, $zero, 1 ; NACL-NEXT: sw $2, 0($1) ; NACL-NEXT: .p2align 4 ; NACL-NEXT: $BB0_4: ; NACL-NEXT: jr $ra ; NACL-NEXT: nop ; Check the NaCl version. Check that sp change is not in the branch delay slot ; of "jr $1" instruction. Check that target of indirect branch "jr $1" is ; bundle aligned. entry: %cmp = icmp eq i32 %s, 0 br i1 %cmp, label %end, label %then then: store i32 1, i32* @x, align 4 br label %end end: ret void } Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-b-range.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-b-range.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-b-range.ll (revision 341365) @@ -0,0 +1,98 @@ +; RUN: llc -march=mips -relocation-model=pic -mattr=+micromips \ +; RUN: -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s + +; CHECK-LABEL: foo: +; CHECK-NEXT: 0: 41 a2 00 00 lui $2, 0 +; CHECK-NEXT: 4: 30 42 00 00 addiu $2, $2, 0 +; CHECK-NEXT: 8: 03 22 11 50 addu $2, $2, $25 +; CHECK-NEXT: c: fc 42 00 00 lw $2, 0($2) +; CHECK-NEXT: 10: 69 20 lw16 $2, 0($2) +; CHECK-NEXT: 12: 40 c2 00 14 bgtz $2, 44 +; CHECK-NEXT: 16: 00 00 00 00 nop +; CHECK-NEXT: 1a: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 1e: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 22: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 26: 40 60 00 02 bal 8 +; CHECK-NEXT: 2a: 30 21 04 68 addiu $1, $1, 1128 +; CHECK-NEXT: 2e: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 32: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 36: 00 01 0f 3c jr $1 +; CHECK-NEXT: 3a: 33 bd 00 08 addiu $sp, $sp, 8 +; CHECK-NEXT: 3e: 94 00 00 02 b 8 +; CHECK-NEXT: 42: 00 00 00 00 nop +; CHECK-NEXT: 46: 30 20 4e 1f addiu $1, $zero, 19999 +; CHECK-NEXT: 4a: b4 22 00 14 bne $2, $1, 44 +; CHECK-NEXT: 4e: 00 00 00 00 nop +; CHECK-NEXT: 52: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 56: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 5a: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 5e: 40 60 00 02 bal 8 +; CHECK-NEXT: 62: 30 21 04 5c addiu $1, $1, 1116 +; CHECK-NEXT: 66: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 6a: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 6e: 00 01 0f 3c jr $1 +; CHECK-NEXT: 72: 33 bd 00 08 addiu $sp, $sp, 8 +; CHECK-NEXT: 76: 30 20 27 0f addiu $1, $zero, 9999 +; CHECK-NEXT: 7a: 94 22 00 14 beq $2, $1, 44 +; CHECK-NEXT: 7e: 00 00 00 00 nop +; CHECK-NEXT: 82: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 86: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 8a: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 8e: 40 60 00 02 bal 8 +; CHECK-NEXT: 92: 30 21 04 2c addiu $1, $1, 1068 +; CHECK-NEXT: 96: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 9a: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 9e: 00 01 0f 3c jr $1 +; CHECK-NEXT: a2: 33 bd 00 08 addiu $sp, $sp, 8 + +; CHECK: 10466: 00 00 00 00 nop +; CHECK-NEXT: 1046a: 94 00 00 02 b 8 +; CHECK-NEXT: 1046e: 00 00 00 00 nop +; CHECK-NEXT: 10472: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 10476: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 1047a: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 1047e: 40 60 00 02 bal 8 +; CHECK-NEXT: 10482: 30 21 04 00 addiu $1, $1, 1024 +; CHECK-NEXT: 10486: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 1048a: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 1048e: 00 01 0f 3c jr $1 +; CHECK-NEXT: 10492: 33 bd 00 08 addiu $sp, $sp, 8 +; CHECK-NEXT: 10496: 94 00 00 02 b 8 + +@x = external global i32, align 4 + +define void @foo() { + %1 = load i32, i32* @x, align 4 + %2 = icmp sgt i32 %1, 0 + br i1 %2, label %la, label %lf + +la: + switch i32 %1, label %le [ + i32 9999, label %lb + i32 19999, label %lc + ] + +lb: + tail call void asm sideeffect ".space 0", ""() + br label %le + +lc: + tail call void asm sideeffect ".space 0", ""() + br label %le + +le: + tail call void asm sideeffect ".space 66500", ""() + br label %lg + +lf: + tail call void asm sideeffect ".space 0", ""() + br label %lg + +lg: + tail call void asm sideeffect ".space 0", ""() + br label %li + +li: + tail call void asm sideeffect ".space 0", ""() + ret void +} Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-gcc-except-table.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-gcc-except-table.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-gcc-except-table.ll (revision 341365) @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -O3 -filetype=obj < %s | llvm-objdump -s -j .gcc_except_table - | FileCheck %s + +; CHECK: Contents of section .gcc_except_table: +; CHECK-NEXT: 0000 ff9b1501 0c011100 00110e1f 011f1800 +; CHECK-NEXT: 0010 00010000 00000000 + +@_ZTIi = external constant i8* + +define dso_local i32 @main() local_unnamed_addr norecurse personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind + %0 = bitcast i8* %exception.i to i32* + store i32 5, i32* %0, align 16 + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn + to label %.noexc unwind label %return + +.noexc: + unreachable + +return: + %1 = landingpad { i8*, i32 } + catch i8* null + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind + tail call void @__cxa_end_catch() + ret i32 0 +} + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +declare i8* @__cxa_allocate_exception(i32) local_unnamed_addr + +declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-mtc-mfc.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-mtc-mfc.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/micromips-mtc-mfc.ll (revision 341365) @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips -mcpu=mips32r2 -mattr=+micromips \ +; RUN: -show-mc-encoding < %s | FileCheck --check-prefix=MM2 %s +; RUN: llc -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \ +; RUN: -show-mc-encoding < %s | FileCheck --check-prefix=MM6 %s + +define double @foo(double %a, double %b) { +; MM2-LABEL: foo: +; MM2: # %bb.0: # %entry +; MM2-NEXT: mov.d $f0, $f12 # encoding: [0x54,0x0c,0x20,0x7b] +; MM2-NEXT: mtc1 $zero, $f2 # encoding: [0x54,0x02,0x28,0x3b] +; MM2-NEXT: mthc1 $zero, $f2 # encoding: [0x54,0x02,0x38,0x3b] +; MM2-NEXT: c.ule.d $f12, $f2 # encoding: [0x54,0x4c,0x05,0xfc] +; MM2-NEXT: bc1t $BB0_2 # encoding: [0x43,0xa0,A,A] +; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_PC16_S1 +; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00] +; MM2-NEXT: # %bb.1: # %entry +; MM2-NEXT: j $BB0_2 # encoding: [0b110101AA,A,A,A] +; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_26_S1 +; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00] +; MM2-NEXT: $BB0_2: # %return +; MM2-NEXT: jrc $ra # encoding: [0x45,0xbf] +; +; MM6-LABEL: foo: +; MM6: # %bb.0: # %entry +; MM6-NEXT: mov.d $f0, $f12 # encoding: [0x46,0x20,0x60,0x06] +; MM6-NEXT: mtc1 $zero, $f1 # encoding: [0x54,0x01,0x28,0x3b] +; MM6-NEXT: mthc1 $zero, $f1 # encoding: [0x54,0x01,0x38,0x3b] +; MM6-NEXT: cmp.ule.d $f1, $f12, $f1 # encoding: [0x54,0x2c,0x09,0xd5] +; MM6-NEXT: mfc1 $2, $f1 # encoding: [0x54,0x41,0x20,0x3b] +; MM6-NEXT: andi16 $2, $2, 1 # encoding: [0x2d,0x21] +; MM6-NEXT: jrc $ra # encoding: [0x45,0xbf] +entry: + %cmp = fcmp ogt double %a, 0.000000e+00 + br i1 %cmp, label %if.end, label %if.else + +if.else: + br label %return + +if.end: + %mul = fmul double %a, 2.000000e+00 + br label %return + +return: + ret double %a +} + +define double @bar(double %x, double %y) { +; MM2-LABEL: bar: +; MM2: # %bb.0: # %entry +; MM2-NEXT: mov.d $f0, $f14 # encoding: [0x54,0x0e,0x20,0x7b] +; MM2-NEXT: c.olt.d $f12, $f14 # encoding: [0x55,0xcc,0x05,0x3c] +; MM2-NEXT: jr $ra # encoding: [0x00,0x1f,0x0f,0x3c] +; MM2-NEXT: movt.d $f0, $f12, $fcc0 # encoding: [0x54,0x0c,0x02,0x60] +; +; MM6-LABEL: bar: +; MM6: # %bb.0: # %entry +; MM6-NEXT: cmp.lt.d $f0, $f12, $f14 # encoding: [0x55,0xcc,0x01,0x15] +; MM6-NEXT: mfc1 $1, $f0 # encoding: [0x54,0x20,0x20,0x3b] +; MM6-NEXT: mtc1 $1, $f0 # encoding: [0x44,0x81,0x00,0x00] +; MM6-NEXT: sel.d $f0, $f14, $f12 # encoding: [0x55,0x8e,0x02,0xb8] +; MM6-NEXT: jrc $ra # encoding: [0x45,0xbf] +; FIXME: mtc1 is encoded as a regular non-microMIPS instruction +entry: + %z = fcmp olt double %x, %y + %r = select i1 %z, double %x, double %y + ret double %r +} Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir (revision 341365) @@ -0,0 +1,150 @@ +# RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \ +# RUN: -start-before=shrink-wrap -stop-after=prologepilog | FileCheck %s + +--- | + declare void @foo() + define void @testBuildPairF64() { + ret void + } + define void @testBuildPairF64_64() { + ret void + } + define void @testBuildPairF64implicitSp() { + ret void + } + define void @testBuildPairF64_64implicitSp() { + ret void + } + define void @testExtractElementF64() { + ret void + } + define void @testExtractElementF64_64() { + ret void + } + define void @testExtractElementF64implicitSp() { + ret void + } + define void @testExtractElementF64_64implicitSp() { + ret void + } +... +--- +name: testBuildPairF64 +# CHECK-LABEL: name: testBuildPairF64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: BuildPairF64 +body: | + bb.0: + $d0 = BuildPairF64 $zero, $zero + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testBuildPairF64_64 +# CHECK-LABEL: name: testBuildPairF64_64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: BuildPairF64_64 +body: | + bb.0: + $d0_64 = BuildPairF64_64 $zero, $zero + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testBuildPairF64implicitSp +# CHECK-LABEL: name: testBuildPairF64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $d0 = BuildPairF64 $zero, $zero, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testBuildPairF64_64implicitSp +# CHECK-LABEL: name: testBuildPairF64_64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $d0_64 = BuildPairF64_64 $zero, $zero, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64 +# CHECK-LABEL: name: testExtractElementF64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: ExtractElementF64 +body: | + bb.0: + $at = ExtractElementF64 $d6, 1 + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64_64 +# CHECK-LABEL: name: testExtractElementF64_64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: ExtractElementF64_64 +body: | + bb.0: + $at = ExtractElementF64_64 $d12_64, 1 + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64implicitSp +# CHECK-LABEL: name: testExtractElementF64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $at = ExtractElementF64 $d6, 1, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64_64implicitSp +# CHECK-LABEL: name: testExtractElementF64_64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $at = ExtractElementF64_64 $d12_64, 1, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... Index: vendor/llvm/dist-release_70/test/CodeGen/Mips/tls.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/Mips/tls.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/Mips/tls.ll (revision 341365) @@ -1,157 +1,157 @@ ; RUN: llc -mtriple=mipsel-- -disable-mips-delay-filler \ ; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC32 ; RUN: llc -mtriple=mips64el-- -disable-mips-delay-filler \ ; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC64 ; RUN: llc -mtriple=mipsel-- -mattr=+micromips -disable-mips-delay-filler \ ; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=MM ; RUN: llc -mtriple=mipsel-- -disable-mips-delay-filler \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC32 ; RUN: llc -mtriple=mips64el-- -disable-mips-delay-filler \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC64 ; RUN: llc -mtriple=mipsel-- -disable-mips-delay-filler -mips-fix-global-base-reg=false \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=STATICGP32 ; RUN: llc -mtriple=mips64el-- -disable-mips-delay-filler -mips-fix-global-base-reg=false \ ; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=STATICGP64 @t1 = thread_local global i32 0, align 4 define i32 @f1() nounwind { entry: %tmp = load i32, i32* @t1, align 4 ret i32 %tmp ; PIC32-LABEL: f1: ; PIC32-DAG: addu $[[R0:[a-z0-9]+]], $2, $25 ; PIC32-DAG: addiu $4, $[[R0]], %tlsgd(t1) ; PIC32-DAG: lw $25, %call16(__tls_get_addr)($[[R0]]) ; PIC32-DAG: jalr $25 ; PIC32-DAG: lw $2, 0($2) ; PIC64-LABEL: f1: ; PIC64-DAG: daddiu $[[R0:[a-z0-9]+]], $1, %lo(%neg(%gp_rel(f1))) ; PIC64-DAG: daddiu $4, $[[R0]], %tlsgd(t1) ; PIC64-DAG: ld $25, %call16(__tls_get_addr)($[[R0]]) ; PIC64-DAG: jalr $25 ; PIC64-DAG: lw $2, 0($2) ; MM-LABEL: f1: ; MM-DAG: addu $[[R0:[a-z0-9]+]], $2, $25 ; MM-DAG: addiu $4, $[[R0]], %tlsgd(t1) ; MM-DAG: lw $25, %call16(__tls_get_addr)($[[R0]]) ; MM-DAG: move $gp, $2 ; MM-DAG: jalr $25 ; MM-DAG: lw16 $2, 0($2) ; STATIC32-LABEL: f1: ; STATIC32: lui $[[R0:[0-9]+]], %tprel_hi(t1) ; STATIC32: addiu $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1) -; STATIC32: rdhwr $3, $29 +; STATIC32: rdhwr $3, $29{{$}} ; STATIC32: addu $[[R2:[0-9]+]], $3, $[[R1]] ; STATIC32: lw $2, 0($[[R2]]) ; STATIC64-LABEL: f1: ; STATIC64: lui $[[R0:[0-9]+]], %tprel_hi(t1) ; STATIC64: daddiu $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1) -; STATIC64: rdhwr $3, $29, 0 +; STATIC64: rdhwr $3, $29{{$}} ; STATIC64: daddu $[[R2:[0-9]+]], $3, $[[R0]] ; STATIC64: lw $2, 0($[[R2]]) } @t2 = external thread_local global i32 define i32 @f2() nounwind { entry: %tmp = load i32, i32* @t2, align 4 ret i32 %tmp ; PIC32-LABEL: f2: ; PIC32-DAG: addu $[[R0:[a-z0-9]+]], $2, $25 ; PIC32-DAG: addiu $4, $[[R0]], %tlsgd(t2) ; PIC32-DAG: lw $25, %call16(__tls_get_addr)($[[R0]]) ; PIC32-DAG: jalr $25 ; PIC32-DAG: lw $2, 0($2) ; PIC64-LABEL: f2: ; PIC64-DAG: daddiu $[[R0:[a-z0-9]+]], $1, %lo(%neg(%gp_rel(f2))) ; PIC64-DAG: daddiu $4, $[[R0]], %tlsgd(t2) ; PIC64-DAG: ld $25, %call16(__tls_get_addr)($[[R0]]) ; PIC64-DAG: jalr $25 ; PIC64-DAG: lw $2, 0($2) ; MM-LABEL: f2: ; MM-DAG: addu $[[R0:[a-z0-9]+]], $2, $25 ; MM-DAG: lw $25, %call16(__tls_get_addr)($[[R0]]) ; MM-DAG: addiu $4, $[[R0]], %tlsgd(t2) ; MM-DAG: jalr $25 ; MM-DAG: lw16 $2, 0($2) ; STATICGP32-LABEL: f2: ; STATICGP32: lui $[[R0:[0-9]+]], %hi(__gnu_local_gp) ; STATICGP32: addiu $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp) ; STATICGP32: lw ${{[0-9]+}}, %gottprel(t2)($[[GP]]) ; STATICGP64-LABEL: f2: ; STATICGP64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(f2))) ; STATICGP64: daddiu $[[GP:[0-9]+]], $[[R0]], %lo(%neg(%gp_rel(f2))) ; STATICGP64: ld $1, %gottprel(t2)($[[GP]]) ; STATIC32-LABEL: f2: ; STATIC32: lui $[[R0:[0-9]+]], %hi(__gnu_local_gp) ; STATIC32: addiu $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp) -; STATIC32: rdhwr $3, $29 +; STATIC32: rdhwr $3, $29{{$}} ; STATIC32: lw $[[R0:[0-9]+]], %gottprel(t2)($[[GP]]) ; STATIC32: addu $[[R1:[0-9]+]], $3, $[[R0]] ; STATIC32: lw $2, 0($[[R1]]) ; STATIC64-LABEL: f2: ; STATIC64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(f2))) ; STATIC64: daddiu $[[GP:[0-9]+]], $[[R0]], %lo(%neg(%gp_rel(f2))) -; STATIC64: rdhwr $3, $29 +; STATIC64: rdhwr $3, $29{{$}} ; STATIC64: ld $[[R0:[0-9]+]], %gottprel(t2)($[[GP]]) ; STATIC64: daddu $[[R1:[0-9]+]], $3, $[[R0]] ; STATIC64: lw $2, 0($[[R1]]) } @f3.i = internal thread_local unnamed_addr global i32 1, align 4 define i32 @f3() nounwind { entry: ; PIC32-LABEL: f3: ; PIC32: addu $[[R0:[a-z0-9]+]], $2, $25 ; PIC32: addiu $4, $[[R0]], %tlsldm(f3.i) ; PIC32: lw $25, %call16(__tls_get_addr)($[[R0]]) ; PIC32: jalr $25 ; PIC32: lui $[[R0:[0-9]+]], %dtprel_hi(f3.i) ; PIC32: addu $[[R1:[0-9]+]], $[[R0]], $2 ; PIC32: lw $[[R3:[0-9]+]], %dtprel_lo(f3.i)($[[R1]]) ; PIC32: addiu $[[R3]], $[[R3]], 1 ; PIC32: sw $[[R3]], %dtprel_lo(f3.i)($[[R1]]) ; PIC64-LABEL: f3: ; PIC64: lui $[[R0:[a-z0-9]+]], %hi(%neg(%gp_rel(f3))) ; PIC64: daddu $[[R0]], $[[R0]], $25 ; PIC64: daddiu $[[R1:[a-z0-9]+]], $[[R0]], %lo(%neg(%gp_rel(f3))) ; PIC64: daddiu $4, $[[R1]], %tlsldm(f3.i) ; PIC64: ld $25, %call16(__tls_get_addr)($[[R1]]) ; PIC64: jalr $25 ; PIC64: lui $[[R0:[0-9]+]], %dtprel_hi(f3.i) ; PIC64: daddu $[[R1:[0-9]+]], $[[R0]], $2 ; PIC64: lw $[[R2:[0-9]+]], %dtprel_lo(f3.i)($[[R1]]) ; PIC64: addiu $[[R2]], $[[R2]], 1 ; PIC64: sw $[[R2]], %dtprel_lo(f3.i)($[[R1]]) ; MM-LABEL: f3: ; MM: addiu $4, ${{[a-z0-9]+}}, %tlsldm(f3.i) ; MM: jalr $25 ; MM: lui $[[R0:[0-9]+]], %dtprel_hi(f3.i) ; MM: addu16 $[[R1:[0-9]+]], $[[R0]], $2 ; MM: lw ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]]) %0 = load i32, i32* @f3.i, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* @f3.i, align 4 ret i32 %inc } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll (revision 341365) @@ -1,45 +1,56 @@ ; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8 +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-P8 ; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9 +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 @a = external local_unnamed_addr global <4 x i32>, align 16 @pb = external local_unnamed_addr global float*, align 8 define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) { -; CHECK-P8-LABEL: testExpandPostRAPseudo: -; CHECK-P8: lxsiwax 34, 0, 3 -; CHECK-P8-NEXT: xxspltw 34, 34, 1 -; CHECK-P8-NEXT: stvx 2, 0, 4 -; CHECK-P8: #APP -; CHECK-P8-NEXT: #Clobber Rigisters -; CHECK-P8-NEXT: #NO_APP -; CHECK-P8-NEXT: lis 4, 1024 -; CHECK-P8-NEXT: lfiwax 0, 0, 3 -; CHECK-P8: stfsx 0, 3, 4 -; CHECK-P8-NEXT: blr - -; CHECK-P9-LABEL: testExpandPostRAPseudo: -; CHECK-P9: lxvwsx 0, 0, 3 -; CHECK-P9: stxvx 0, 0, 4 -; CHECK-P9: #APP -; CHECK-P9-NEXT: #Clobber Rigisters -; CHECK-P9-NEXT: #NO_APP -; CHECK-P9-NEXT: lis 4, 1024 -; CHECK-P9-NEXT: lfiwax 0, 0, 3 -; CHECK-P9: stfsx 0, 3, 4 -; CHECK-P9-NEXT: blr - +; CHECK-P8-LABEL: testExpandPostRAPseudo: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8: lfiwzx f0, 0, r3 +; CHECK-P8: ld r4, .LC0@toc@l(r4) +; CHECK-P8: xxpermdi vs0, f0, f0, 2 +; CHECK-P8: xxspltw v2, vs0, 3 +; CHECK-P8: stvx v2, 0, r4 +; CHECK-P8: lis r4, 1024 +; CHECK-P8: lfiwax f0, 0, r3 +; CHECK-P8: addis r3, r2, .LC1@toc@ha +; CHECK-P8: ld r3, .LC1@toc@l(r3) +; CHECK-P8: xscvsxdsp f0, f0 +; CHECK-P8: ld r3, 0(r3) +; CHECK-P8: stfsx f0, r3, r4 +; CHECK-P8: blr +; +; CHECK-P9-LABEL: testExpandPostRAPseudo: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: lfiwzx f0, 0, r3 +; CHECK-P9: addis r4, r2, .LC0@toc@ha +; CHECK-P9: ld r4, .LC0@toc@l(r4) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxspltw vs0, vs0, 3 +; CHECK-P9: stxvx vs0, 0, r4 +; CHECK-P9: lis r4, 1024 +; CHECK-P9: lfiwax f0, 0, r3 +; CHECK-P9: addis r3, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC1@toc@l(r3) +; CHECK-P9: xscvsxdsp f0, f0 +; CHECK-P9: ld r3, 0(r3) +; CHECK-P9: stfsx f0, r3, r4 +; CHECK-P9: blr entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer store <4 x i32> %splat.splat, <4 x i32>* @a, align 16 tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() %1 = load i32, i32* %ptr, align 4 %conv = sitofp i32 %1 to float %2 = load float*, float** @pb, align 8 %add.ptr = getelementptr inbounds float, float* %2, i64 16777216 store float %conv, float* %add.ptr, align 4 ret void } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/build-vector-tests.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/build-vector-tests.ll (revision 341365) @@ -1,4807 +1,4819 @@ ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ ; RUN: -check-prefix=P9BE -implicit-check-not frsp ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ ; RUN: -check-prefix=P9LE -implicit-check-not frsp ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ ; RUN: -check-prefix=P8BE -implicit-check-not frsp ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \ ; RUN: -check-prefix=P8LE -implicit-check-not frsp ; This test case comes from the following C test case (included as it may be ; slightly more readable than the LLVM IR. ;/* This test case provides various ways of building vectors to ensure we ; produce optimal code for all cases. The cases are (for each type): ; - All zeros ; - All ones ; - Splat of a constant ; - From different values already in registers ; - From different constants ; - From different values in memory ; - Splat of a value in register ; - Splat of a value in memory ; - Inserting element into existing vector ; - Inserting element from existing vector into existing vector ; ; With conversions (float <-> int) ; - Splat of a constant ; - From different values already in registers ; - From different constants ; - From different values in memory ; - Splat of a value in register ; - Splat of a value in memory ; - Inserting element into existing vector ; - Inserting element from existing vector into existing vector ;*/ ; ;/*=================================== int ===================================*/ ;// P8: xxlxor // ;// P9: xxlxor // ;vector int allZeroi() { // ; return (vector int)0; // ;} // ;// P8: vspltisb -1 // ;// P9: xxspltisb 255 // ;vector int allOnei() { // ; return (vector int)-1; // ;} // ;// P8: vspltisw 1 // ;// P9: vspltisw 1 // ;vector int spltConst1i() { // ; return (vector int)1; // ;} // ;// P8: vspltisw -15; vsrw // ;// P9: vspltisw -15; vsrw // ;vector int spltConst16ki() { // ; return (vector int)((1<<15) - 1); // ;} // ;// P8: vspltisw -16; vsrw // ;// P9: vspltisw -16; vsrw // ;vector int spltConst32ki() { // ; return (vector int)((1<<16) - 1); // ;} // ;// P8: 4 x mtvsrwz, 2 x xxmrgh, vmrgow // ;// P9: 2 x mtvsrdd, vmrgow // ;vector int fromRegsi(int a, int b, int c, int d) { // ; return (vector int){ a, b, c, d }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (or even lxv) // ;vector int fromDiffConstsi() { // ; return (vector int) { 242, -113, 889, 19 }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx // ;vector int fromDiffMemConsAi(int *arr) { // ; return (vector int) { arr[0], arr[1], arr[2], arr[3] }; // ;} // ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm // ;// P9: 2 x lxvx, vperm // ;vector int fromDiffMemConsDi(int *arr) { // ; return (vector int) { arr[3], arr[2], arr[1], arr[0] }; // ;} // ;// P8: sldi 2, lxvd2x, xxswapd // ;// P9: sldi 2, lxvx // ;vector int fromDiffMemVarAi(int *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // ;} // ;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm // ;// P9: sldi 2, 2 x lxvx, vperm // ;vector int fromDiffMemVarDi(int *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // ;} // ;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // ;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow // ;vector int fromRandMemConsi(int *arr) { // ; return (vector int) { arr[4], arr[18], arr[2], arr[88] }; // ;} // ;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // ;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow // ;vector int fromRandMemVari(int *arr, int elem) { // ; return (vector int) { arr[elem+4], arr[elem+1], arr[elem+2], arr[elem+8] };// ;} // ;// P8: mtvsrwz, xxspltw // ;// P9: mtvsrws // ;vector int spltRegVali(int val) { // ; return (vector int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // ;vector int spltMemVali(int *ptr) { // ; return (vector int)*ptr; // ;} // ;// P8: vspltisw // ;// P9: vspltisw // ;vector int spltCnstConvftoi() { // ; return (vector int) 4.74f; // ;} // ;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromRegsConvftoi(float a, float b, float c, float d) { // ; return (vector int) { a, b, c, d }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector int fromDiffConstsConvftoi() { // ; return (vector int) { 24.46f, 234.f, 988.19f, 422.39f }; // ;} // ;// P8: lxvd2x, xxswapd, xvcvspsxws // ;// P9: lxvx, xvcvspsxws // ;vector int fromDiffMemConsAConvftoi(float *ptr) { // ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // ;} // ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspsxws // ;// P9: 2 x lxvx, vperm, xvcvspsxws // ;vector int fromDiffMemConsDConvftoi(float *ptr) { // ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // ;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, load, xvcvspuxws // ;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // ;} // ;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, 2 x load, vperm, xvcvspuxws // ;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // ;} // ;// P8: xscvdpsxws, xxspltw // ;// P9: xscvdpsxws, xxspltw // ;vector int spltRegValConvftoi(float val) { // ; return (vector int) val; // ;} // ;// P8: lxsspx, xscvdpsxws, xxspltw // ;// P9: lxvwsx, xvcvspsxws // ;vector int spltMemValConvftoi(float *ptr) { // ; return (vector int)*ptr; // ;} // ;// P8: vspltisw // ;// P9: vspltisw // ;vector int spltCnstConvdtoi() { // ; return (vector int) 4.74; // ;} // ;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromRegsConvdtoi(double a, double b, double c, double d) { // ; return (vector int) { a, b, c, d }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector int fromDiffConstsConvdtoi() { // ; return (vector int) { 24.46, 234., 988.19, 422.39 }; // ;} // ;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemConsAConvdtoi(double *ptr) { // ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // ;} // ;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemConsDConvdtoi(double *ptr) { // ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // ;} // ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // ;} // ;// P8: xscvdpsxws, xxspltw // ;// P9: xscvdpsxws, xxspltw // ;vector int spltRegValConvdtoi(double val) { // ; return (vector int) val; // ;} // ;// P8: lxsdx, xscvdpsxws, xxspltw // ;// P9: lxssp, xscvdpsxws, xxspltw // ;vector int spltMemValConvdtoi(double *ptr) { // ; return (vector int)*ptr; // ;} // ;/*=================================== int ===================================*/ ;/*=============================== unsigned int ==============================*/ ;// P8: xxlxor // ;// P9: xxlxor // ;vector unsigned int allZeroui() { // ; return (vector unsigned int)0; // ;} // ;// P8: vspltisb -1 // ;// P9: xxspltisb 255 // ;vector unsigned int allOneui() { // ; return (vector unsigned int)-1; // ;} // ;// P8: vspltisw 1 // ;// P9: vspltisw 1 // ;vector unsigned int spltConst1ui() { // ; return (vector unsigned int)1; // ;} // ;// P8: vspltisw -15; vsrw // ;// P9: vspltisw -15; vsrw // ;vector unsigned int spltConst16kui() { // ; return (vector unsigned int)((1<<15) - 1); // ;} // ;// P8: vspltisw -16; vsrw // ;// P9: vspltisw -16; vsrw // ;vector unsigned int spltConst32kui() { // ; return (vector unsigned int)((1<<16) - 1); // ;} // ;// P8: 4 x mtvsrwz, 2 x xxmrghd, vmrgow // ;// P9: 2 x mtvsrdd, vmrgow // ;vector unsigned int fromRegsui(unsigned int a, unsigned int b, // ; unsigned int c, unsigned int d) { // ; return (vector unsigned int){ a, b, c, d }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (or even lxv) // ;vector unsigned int fromDiffConstsui() { // ; return (vector unsigned int) { 242, -113, 889, 19 }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx // ;vector unsigned int fromDiffMemConsAui(unsigned int *arr) { // ; return (vector unsigned int) { arr[0], arr[1], arr[2], arr[3] }; // ;} // ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm // ;// P9: 2 x lxvx, vperm // ;vector unsigned int fromDiffMemConsDui(unsigned int *arr) { // ; return (vector unsigned int) { arr[3], arr[2], arr[1], arr[0] }; // ;} // ;// P8: sldi 2, lxvd2x, xxswapd // ;// P9: sldi 2, lxvx // ;vector unsigned int fromDiffMemVarAui(unsigned int *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem+1], // ; arr[elem+2], arr[elem+3] }; // ;} // ;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm // ;// P9: sldi 2, 2 x lxvx, vperm // ;vector unsigned int fromDiffMemVarDui(unsigned int *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem-1], // ; arr[elem-2], arr[elem-3] }; // ;} // ;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // ;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow // ;vector unsigned int fromRandMemConsui(unsigned int *arr) { // ; return (vector unsigned int) { arr[4], arr[18], arr[2], arr[88] }; // ;} // ;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow // ;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow // ;vector unsigned int fromRandMemVarui(unsigned int *arr, int elem) { // ; return (vector unsigned int) { arr[elem+4], arr[elem+1], // ; arr[elem+2], arr[elem+8] }; // ;} // ;// P8: mtvsrwz, xxspltw // ;// P9: mtvsrws // ;vector unsigned int spltRegValui(unsigned int val) { // ; return (vector unsigned int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // ;vector unsigned int spltMemValui(unsigned int *ptr) { // ; return (vector unsigned int)*ptr; // ;} // ;// P8: vspltisw // ;// P9: vspltisw // ;vector unsigned int spltCnstConvftoui() { // ; return (vector unsigned int) 4.74f; // ;} // ;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { // ; return (vector unsigned int) { a, b, c, d }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector unsigned int fromDiffConstsConvftoui() { // ; return (vector unsigned int) { 24.46f, 234.f, 988.19f, 422.39f }; // ;} // ;// P8: lxvd2x, xxswapd, xvcvspuxws // ;// P9: lxvx, xvcvspuxws // ;vector unsigned int fromDiffMemConsAConvftoui(float *ptr) { // ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // ;} // ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspuxws // ;// P9: 2 x lxvx, vperm, xvcvspuxws // ;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { // ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // ;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, load, xvcvspuxws // ;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem+1], // ; arr[elem+2], arr[elem+3] }; // ;} // ;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, 2 x load, vperm, xvcvspuxws // ;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem-1], // ; arr[elem-2], arr[elem-3] }; // ;} // ;// P8: xscvdpuxws, xxspltw // ;// P9: xscvdpuxws, xxspltw // ;vector unsigned int spltRegValConvftoui(float val) { // ; return (vector unsigned int) val; // ;} // ;// P8: lxsspx, xscvdpuxws, xxspltw // ;// P9: lxvwsx, xvcvspuxws // ;vector unsigned int spltMemValConvftoui(float *ptr) { // ; return (vector unsigned int)*ptr; // ;} // ;// P8: vspltisw // ;// P9: vspltisw // ;vector unsigned int spltCnstConvdtoui() { // ; return (vector unsigned int) 4.74; // ;} // ;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromRegsConvdtoui(double a, double b, // ; double c, double d) { // ; return (vector unsigned int) { a, b, c, d }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector unsigned int fromDiffConstsConvdtoui() { // ; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; // ;} // ;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { // ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // ;} // ;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { // ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem+1], // ; arr[elem+2], arr[elem+3] }; // ;} // ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem-1], // ; arr[elem-2], arr[elem-3] }; // ;} // ;// P8: xscvdpuxws, xxspltw // ;// P9: xscvdpuxws, xxspltw // ;vector unsigned int spltRegValConvdtoui(double val) { // ; return (vector unsigned int) val; // ;} // ;// P8: lxsspx, xscvdpuxws, xxspltw // ;// P9: lfd, xscvdpuxws, xxspltw // ;vector unsigned int spltMemValConvdtoui(double *ptr) { // ; return (vector unsigned int)*ptr; // ;} // ;/*=============================== unsigned int ==============================*/ ;/*=============================== long long =================================*/ ;// P8: xxlxor // ;// P9: xxlxor // ;vector long long allZeroll() { // ; return (vector long long)0; // ;} // ;// P8: vspltisb -1 // ;// P9: xxspltisb 255 // ;vector long long allOnell() { // ; return (vector long long)-1; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;vector long long spltConst1ll() { // ; return (vector long long)1; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;vector long long spltConst16kll() { // ; return (vector long long)((1<<15) - 1); // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;vector long long spltConst32kll() { // ; return (vector long long)((1<<16) - 1); // ;} // ;// P8: 2 x mtvsrd, xxmrghd // ;// P9: mtvsrdd // ;vector long long fromRegsll(long long a, long long b) { // ; return (vector long long){ a, b }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (or even lxv) // ;vector long long fromDiffConstsll() { // ; return (vector long long) { 242, -113 }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx // ;vector long long fromDiffMemConsAll(long long *arr) { // ; return (vector long long) { arr[0], arr[1] }; // ;} // ;// P8: lxvd2x // ;// P9: lxvx, xxswapd (maybe just use lxvd2x) // ;vector long long fromDiffMemConsDll(long long *arr) { // ; return (vector long long) { arr[3], arr[2] }; // ;} // ;// P8: sldi 3, lxvd2x, xxswapd // ;// P9: sldi 3, lxvx // ;vector long long fromDiffMemVarAll(long long *arr, int elem) { // ; return (vector long long) { arr[elem], arr[elem+1] }; // ;} // ;// P8: sldi 3, lxvd2x // ;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) // ;vector long long fromDiffMemVarDll(long long *arr, int elem) { // ; return (vector long long) { arr[elem], arr[elem-1] }; // ;} // ;// P8: 2 x ld, 2 x mtvsrd, xxmrghd // ;// P9: 2 x ld, mtvsrdd // ;vector long long fromRandMemConsll(long long *arr) { // ; return (vector long long) { arr[4], arr[18] }; // ;} // ;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd // ;// P9: sldi 3, add, 2 x ld, mtvsrdd // ;vector long long fromRandMemVarll(long long *arr, int elem) { // ; return (vector long long) { arr[elem+4], arr[elem+1] }; // ;} // ;// P8: mtvsrd, xxspltd // ;// P9: mtvsrdd // ;vector long long spltRegValll(long long val) { // ; return (vector long long) val; // ;} // ;// P8: lxvdsx // ;// P9: lxvdsx // ;vector long long spltMemValll(long long *ptr) { // ; return (vector long long)*ptr; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;vector long long spltCnstConvftoll() { // ; return (vector long long) 4.74f; // ;} // ;// P8: xxmrghd, xvcvdpsxds // ;// P9: xxmrghd, xvcvdpsxds // ;vector long long fromRegsConvftoll(float a, float b) { // ; return (vector long long) { a, b }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector long long fromDiffConstsConvftoll() { // ; return (vector long long) { 24.46f, 234.f }; // ;} // ;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds // ;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds // ;vector long long fromDiffMemConsAConvftoll(float *ptr) { // ; return (vector long long) { ptr[0], ptr[1] }; // ;} // ;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds // ;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds // ;vector long long fromDiffMemConsDConvftoll(float *ptr) { // ; return (vector long long) { ptr[3], ptr[2] }; // ;} // ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds // ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds // ;vector long long fromDiffMemVarAConvftoll(float *arr, int elem) { // ; return (vector long long) { arr[elem], arr[elem+1] }; // ;} // ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds // ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds // ;vector long long fromDiffMemVarDConvftoll(float *arr, int elem) { // ; return (vector long long) { arr[elem], arr[elem-1] }; // ;} // ;// P8: xscvdpsxds, xxspltd // ;// P9: xscvdpsxds, xxspltd // ;vector long long spltRegValConvftoll(float val) { // ; return (vector long long) val; // ;} // ;// P8: lxsspx, xscvdpsxds, xxspltd // ;// P9: lfs, xscvdpsxds, xxspltd // ;vector long long spltMemValConvftoll(float *ptr) { // ; return (vector long long)*ptr; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;vector long long spltCnstConvdtoll() { // ; return (vector long long) 4.74; // ;} // ;// P8: xxmrghd, xvcvdpsxds // ;// P9: xxmrghd, xvcvdpsxds // ;vector long long fromRegsConvdtoll(double a, double b) { // ; return (vector long long) { a, b }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector long long fromDiffConstsConvdtoll() { // ; return (vector long long) { 24.46, 234. }; // ;} // ;// P8: lxvd2x, xxswapd, xvcvdpsxds // ;// P9: lxvx, xvcvdpsxds // ;vector long long fromDiffMemConsAConvdtoll(double *ptr) { // ; return (vector long long) { ptr[0], ptr[1] }; // ;} // ;// P8: lxvd2x, xvcvdpsxds // ;// P9: lxvx, xxswapd, xvcvdpsxds // ;vector long long fromDiffMemConsDConvdtoll(double *ptr) { // ; return (vector long long) { ptr[3], ptr[2] }; // ;} // ;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpsxds // ;// P9: sldi 3, lxvx, xvcvdpsxds // ;vector long long fromDiffMemVarAConvdtoll(double *arr, int elem) { // ; return (vector long long) { arr[elem], arr[elem+1] }; // ;} // ;// P8: sldi 3, lxvd2x, xvcvdpsxds // ;// P9: sldi 3, lxvx, xxswapd, xvcvdpsxds // ;vector long long fromDiffMemVarDConvdtoll(double *arr, int elem) { // ; return (vector long long) { arr[elem], arr[elem-1] }; // ;} // ;// P8: xscvdpsxds, xxspltd // ;// P9: xscvdpsxds, xxspltd // ;vector long long spltRegValConvdtoll(double val) { // ; return (vector long long) val; // ;} // ;// P8: lxvdsx, xvcvdpsxds // ;// P9: lxvdsx, xvcvdpsxds // ;vector long long spltMemValConvdtoll(double *ptr) { // ; return (vector long long)*ptr; // ;} // ;/*=============================== long long =================================*/ ;/*========================== unsigned long long =============================*/ ;// P8: xxlxor // ;// P9: xxlxor // ;vector unsigned long long allZeroull() { // ; return (vector unsigned long long)0; // ;} // ;// P8: vspltisb -1 // ;// P9: xxspltisb 255 // ;vector unsigned long long allOneull() { // ; return (vector unsigned long long)-1; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;vector unsigned long long spltConst1ull() { // ; return (vector unsigned long long)1; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;vector unsigned long long spltConst16kull() { // ; return (vector unsigned long long)((1<<15) - 1); // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) // ;vector unsigned long long spltConst32kull() { // ; return (vector unsigned long long)((1<<16) - 1); // ;} // ;// P8: 2 x mtvsrd, xxmrghd // ;// P9: mtvsrdd // ;vector unsigned long long fromRegsull(unsigned long long a, // ; unsigned long long b) { // ; return (vector unsigned long long){ a, b }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (or even lxv) // ;vector unsigned long long fromDiffConstsull() { // ; return (vector unsigned long long) { 242, -113 }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx // ;vector unsigned long long fromDiffMemConsAull(unsigned long long *arr) { // ; return (vector unsigned long long) { arr[0], arr[1] }; // ;} // ;// P8: lxvd2x // ;// P9: lxvx, xxswapd (maybe just use lxvd2x) // ;vector unsigned long long fromDiffMemConsDull(unsigned long long *arr) { // ; return (vector unsigned long long) { arr[3], arr[2] }; // ;} // ;// P8: sldi 3, lxvd2x, xxswapd // ;// P9: sldi 3, lxvx // ;vector unsigned long long fromDiffMemVarAull(unsigned long long *arr, // ; int elem) { // ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; // ;} // ;// P8: sldi 3, lxvd2x // ;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) // ;vector unsigned long long fromDiffMemVarDull(unsigned long long *arr, // ; int elem) { // ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; // ;} // ;// P8: 2 x ld, 2 x mtvsrd, xxmrghd // ;// P9: 2 x ld, mtvsrdd // ;vector unsigned long long fromRandMemConsull(unsigned long long *arr) { // ; return (vector unsigned long long) { arr[4], arr[18] }; // ;} // ;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd // ;// P9: sldi 3, add, 2 x ld, mtvsrdd // ;vector unsigned long long fromRandMemVarull(unsigned long long *arr, // ; int elem) { // ; return (vector unsigned long long) { arr[elem+4], arr[elem+1] }; // ;} // ;// P8: mtvsrd, xxspltd // ;// P9: mtvsrdd // ;vector unsigned long long spltRegValull(unsigned long long val) { // ; return (vector unsigned long long) val; // ;} // ;// P8: lxvdsx // ;// P9: lxvdsx // ;vector unsigned long long spltMemValull(unsigned long long *ptr) { // ; return (vector unsigned long long)*ptr; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;vector unsigned long long spltCnstConvftoull() { // ; return (vector unsigned long long) 4.74f; // ;} // ;// P8: xxmrghd, xvcvdpuxds // ;// P9: xxmrghd, xvcvdpuxds // ;vector unsigned long long fromRegsConvftoull(float a, float b) { // ; return (vector unsigned long long) { a, b }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector unsigned long long fromDiffConstsConvftoull() { // ; return (vector unsigned long long) { 24.46f, 234.f }; // ;} // ;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds // ;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds // ;vector unsigned long long fromDiffMemConsAConvftoull(float *ptr) { // ; return (vector unsigned long long) { ptr[0], ptr[1] }; // ;} // ;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds // ;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds // ;vector unsigned long long fromDiffMemConsDConvftoull(float *ptr) { // ; return (vector unsigned long long) { ptr[3], ptr[2] }; // ;} // ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds // ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds // ;vector unsigned long long fromDiffMemVarAConvftoull(float *arr, int elem) { // ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; // ;} // ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds // ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds // ;vector unsigned long long fromDiffMemVarDConvftoull(float *arr, int elem) { // ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; // ;} // ;// P8: xscvdpuxds, xxspltd // ;// P9: xscvdpuxds, xxspltd // ;vector unsigned long long spltRegValConvftoull(float val) { // ; return (vector unsigned long long) val; // ;} // ;// P8: lxsspx, xscvdpuxds, xxspltd // ;// P9: lfs, xscvdpuxds, xxspltd // ;vector unsigned long long spltMemValConvftoull(float *ptr) { // ; return (vector unsigned long long)*ptr; // ;} // ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) // ;vector unsigned long long spltCnstConvdtoull() { // ; return (vector unsigned long long) 4.74; // ;} // ;// P8: xxmrghd, xvcvdpuxds // ;// P9: xxmrghd, xvcvdpuxds // ;vector unsigned long long fromRegsConvdtoull(double a, double b) { // ; return (vector unsigned long long) { a, b }; // ;} // ;// P8: lxvd2x, xxswapd // ;// P9: lxvx (even lxv) // ;vector unsigned long long fromDiffConstsConvdtoull() { // ; return (vector unsigned long long) { 24.46, 234. }; // ;} // ;// P8: lxvd2x, xxswapd, xvcvdpuxds // ;// P9: lxvx, xvcvdpuxds // ;vector unsigned long long fromDiffMemConsAConvdtoull(double *ptr) { // ; return (vector unsigned long long) { ptr[0], ptr[1] }; // ;} // ;// P8: lxvd2x, xvcvdpuxds // ;// P9: lxvx, xxswapd, xvcvdpuxds // ;vector unsigned long long fromDiffMemConsDConvdtoull(double *ptr) { // ; return (vector unsigned long long) { ptr[3], ptr[2] }; // ;} // ;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpuxds // ;// P9: sldi 3, lxvx, xvcvdpuxds // ;vector unsigned long long fromDiffMemVarAConvdtoull(double *arr, int elem) { // ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; // ;} // ;// P8: sldi 3, lxvd2x, xvcvdpuxds // ;// P9: sldi 3, lxvx, xxswapd, xvcvdpuxds // ;vector unsigned long long fromDiffMemVarDConvdtoull(double *arr, int elem) { // ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; // ;} // ;// P8: xscvdpuxds, xxspltd // ;// P9: xscvdpuxds, xxspltd // ;vector unsigned long long spltRegValConvdtoull(double val) { // ; return (vector unsigned long long) val; // ;} // ;// P8: lxvdsx, xvcvdpuxds // ;// P9: lxvdsx, xvcvdpuxds // ;vector unsigned long long spltMemValConvdtoull(double *ptr) { // ; return (vector unsigned long long)*ptr; // ;} // ;/*========================== unsigned long long ==============================*/ ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allZeroi() { entry: ret <4 x i32> zeroinitializer ; P9BE-LABEL: allZeroi ; P9LE-LABEL: allZeroi ; P8BE-LABEL: allZeroi ; P8LE-LABEL: allZeroi ; P9BE: xxlxor v2, v2, v2 ; P9BE: blr ; P9LE: xxlxor v2, v2, v2 ; P9LE: blr ; P8BE: xxlxor v2, v2, v2 ; P8BE: blr ; P8LE: xxlxor v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allOnei() { entry: ret <4 x i32> ; P9BE-LABEL: allOnei ; P9LE-LABEL: allOnei ; P8BE-LABEL: allOnei ; P8LE-LABEL: allOnei ; P9BE: xxspltib v2, 255 ; P9BE: blr ; P9LE: xxspltib v2, 255 ; P9LE: blr ; P8BE: vspltisb v2, -1 ; P8BE: blr ; P8LE: vspltisb v2, -1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst1i() { entry: ret <4 x i32> ; P9BE-LABEL: spltConst1i ; P9LE-LABEL: spltConst1i ; P8BE-LABEL: spltConst1i ; P8LE-LABEL: spltConst1i ; P9BE: vspltisw v2, 1 ; P9BE: blr ; P9LE: vspltisw v2, 1 ; P9LE: blr ; P8BE: vspltisw v2, 1 ; P8BE: blr ; P8LE: vspltisw v2, 1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst16ki() { entry: ret <4 x i32> ; P9BE-LABEL: spltConst16ki ; P9LE-LABEL: spltConst16ki ; P8BE-LABEL: spltConst16ki ; P8LE-LABEL: spltConst16ki ; P9BE: vspltisw v2, -15 ; P9BE: vsrw v2, v2, v2 ; P9BE: blr ; P9LE: vspltisw v2, -15 ; P9LE: vsrw v2, v2, v2 ; P9LE: blr ; P8BE: vspltisw v2, -15 ; P8BE: vsrw v2, v2, v2 ; P8BE: blr ; P8LE: vspltisw v2, -15 ; P8LE: vsrw v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst32ki() { entry: ret <4 x i32> ; P9BE-LABEL: spltConst32ki ; P9LE-LABEL: spltConst32ki ; P8BE-LABEL: spltConst32ki ; P8LE-LABEL: spltConst32ki ; P9BE: vspltisw v2, -16 ; P9BE: vsrw v2, v2, v2 ; P9BE: blr ; P9LE: vspltisw v2, -16 ; P9LE: vsrw v2, v2, v2 ; P9LE: blr ; P8BE: vspltisw v2, -16 ; P8BE: vsrw v2, v2, v2 ; P8BE: blr ; P8LE: vspltisw v2, -16 ; P8LE: vsrw v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) { entry: %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3 ret <4 x i32> %vecinit3 ; P9BE-LABEL: fromRegsi ; P9LE-LABEL: fromRegsi ; P8BE-LABEL: fromRegsi ; P8LE-LABEL: fromRegsi ; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5 ; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6 ; P9BE: vmrgow v2, [[REG1]], [[REG2]] ; P9BE: blr ; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3 ; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4 ; P9LE: vmrgow v2, [[REG2]], [[REG1]] ; P9LE: blr ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 ; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]] ; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]] ; P8BE: vmrgow v2, [[REG5]], [[REG6]] ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 ; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]] ; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]] ; P8LE: vmrgow v2, [[REG6]], [[REG5]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsi() { entry: ret <4 x i32> ; P9BE-LABEL: fromDiffConstsi ; P9LE-LABEL: fromDiffConstsi ; P8BE-LABEL: fromDiffConstsi ; P8LE-LABEL: fromDiffConstsi ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lvx ; P8LE-NOT: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAi(i32* nocapture readonly %arr) { entry: %0 = load i32, i32* %arr, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1 %1 = load i32, i32* %arrayidx1, align 4 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 %2 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3 %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromDiffMemConsAi ; P9LE-LABEL: fromDiffMemConsAi ; P8BE-LABEL: fromDiffMemConsAi ; P8LE-LABEL: fromDiffMemConsAi ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2 %1 = load i32, i32* %arrayidx1, align 4 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1 %2 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 %3 = load i32, i32* %arr, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromDiffMemConsDi ; P9LE-LABEL: fromDiffMemConsDi ; P8BE-LABEL: fromDiffMemConsDi ; P8LE-LABEL: fromDiffMemConsDi ; P9BE: lxv ; P9BE: lxv ; P9BE: vperm ; P9BE: blr ; P9LE: lxv ; P9LE: lxv ; P9LE: vperm ; P9LE: blr ; P8BE: lxvw4x ; P8BE: lxvw4x ; P8BE: vperm ; P8BE: blr ; P8LE: lxvd2x ; P8LE-DAG: lvx ; P8LE: xxswapd ; P8LE: vperm ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAi(i32* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 %1 = load i32, i32* %arrayidx2, align 4 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %add4 = add nsw i32 %elem, 2 %idxprom5 = sext i32 %add4 to i64 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 %2 = load i32, i32* %arrayidx6, align 4 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 %add8 = add nsw i32 %elem, 3 %idxprom9 = sext i32 %add8 to i64 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 ; P9BE-LABEL: fromDiffMemVarAi ; P9LE-LABEL: fromDiffMemVarAi ; P8BE-LABEL: fromDiffMemVarAi ; P8LE-LABEL: fromDiffMemVarAi ; P9BE: sldi r4, r4, 2 ; P9BE: lxvx v2, r3, r4 ; P9BE: blr ; P9LE: sldi r4, r4, 2 ; P9LE: lxvx v2, r3, r4 ; P9LE: blr ; P8BE: sldi r4, r4, 2 ; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4 ; P8BE: blr ; P8LE: sldi r4, r4, 2 ; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4 ; P8LE: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 %1 = load i32, i32* %arrayidx2, align 4 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %sub4 = add nsw i32 %elem, -2 %idxprom5 = sext i32 %sub4 to i64 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 %2 = load i32, i32* %arrayidx6, align 4 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 %sub8 = add nsw i32 %elem, -3 %idxprom9 = sext i32 %sub8 to i64 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 ; P9BE-LABEL: fromDiffMemVarDi ; P9LE-LABEL: fromDiffMemVarDi ; P8BE-LABEL: fromDiffMemVarDi ; P8LE-LABEL: fromDiffMemVarDi ; P9BE: sldi {{r[0-9]+}}, r4, 2 ; P9BE-DAG: lxvx {{v[0-9]+}} ; P9BE-DAG: lxvx ; P9BE: vperm ; P9BE: blr ; P9LE: sldi {{r[0-9]+}}, r4, 2 ; P9LE-DAG: lxvx {{v[0-9]+}} ; P9LE-DAG: lxvx ; P9LE: vperm ; P9LE: blr ; P8BE: sldi {{r[0-9]+}}, r4, 2 ; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3 ; P8BE-DAG: lxvw4x ; P8BE: vperm ; P8BE: blr ; P8LE: sldi {{r[0-9]+}}, r4, 2 ; P8LE-DAG: lxvd2x ; P8LE-DAG: lxvd2x ; P8LE: xxswapd ; P8LE: vperm ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4 %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18 %1 = load i32, i32* %arrayidx1, align 4 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 %2 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88 %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromRandMemConsi ; P9LE-LABEL: fromRandMemConsi ; P8BE-LABEL: fromRandMemConsi ; P8LE-LABEL: fromRandMemConsi ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: mtvsrdd ; P9BE: mtvsrdd ; P9BE: vmrgow ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: mtvsrdd ; P9LE: mtvsrdd ; P9LE: vmrgow ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: vmrgow ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: vmrgow } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %elem) { entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %add1 = add nsw i32 %elem, 1 %idxprom2 = sext i32 %add1 to i64 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2 %1 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %add5 = add nsw i32 %elem, 2 %idxprom6 = sext i32 %add5 to i64 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6 %2 = load i32, i32* %arrayidx7, align 4 %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2 %add9 = add nsw i32 %elem, 8 %idxprom10 = sext i32 %add9 to i64 %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10 %3 = load i32, i32* %arrayidx11, align 4 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3 ret <4 x i32> %vecinit12 ; P9BE-LABEL: fromRandMemVari ; P9LE-LABEL: fromRandMemVari ; P8BE-LABEL: fromRandMemVari ; P8LE-LABEL: fromRandMemVari ; P9BE: sldi r4, r4, 2 ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: mtvsrdd ; P9BE: mtvsrdd ; P9BE: vmrgow ; P9LE: sldi r4, r4, 2 ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: mtvsrdd ; P9LE: mtvsrdd ; P9LE: vmrgow ; P8BE: sldi r4, r4, 2 ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: vmrgow ; P8LE: sldi r4, r4, 2 ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: vmrgow } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegVali(i32 signext %val) { entry: %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltRegVali ; P9LE-LABEL: spltRegVali ; P8BE-LABEL: spltRegVali ; P8LE-LABEL: spltRegVali ; P9BE: mtvsrws v2, r3 ; P9BE: blr ; P9LE: mtvsrws v2, r3 ; P9LE: blr ; P8BE: mtvsrwz {{[vsf0-9]+}}, r3 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 ; P8BE: blr ; P8LE: mtvsrwz {{[vsf0-9]+}}, r3 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) { entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltMemVali ; P9LE-LABEL: spltMemVali ; P8BE-LABEL: spltMemVali ; P8LE-LABEL: spltMemVali -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx f0, 0, r3 +; P9BE: xxsldwi vs0, f0, f0, 1 +; P9BE: xxspltw v2, vs0, 0 ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx f0, 0, r3 +; P9LE: xxpermdi vs0, f0, f0, 2 +; P9LE: xxspltw v2, vs0, 3 ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx f0, 0, r3 +; P8BE: xxsldwi vs0, f0, f0, 1 +; P8BE: xxspltw v2, vs0, 0 ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx f0, 0, r3 +; P8LE: xxpermdi vs0, f0, f0, 2 +; P8LE: xxspltw v2, vs0, 3 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvftoi() { entry: ret <4 x i32> ; P9BE-LABEL: spltCnstConvftoi ; P9LE-LABEL: spltCnstConvftoi ; P8BE-LABEL: spltCnstConvftoi ; P8LE-LABEL: spltCnstConvftoi ; P9BE: vspltisw v2, 4 ; P9BE: blr ; P9LE: vspltisw v2, 4 ; P9LE: blr ; P8BE: vspltisw v2, 4 ; P8BE: blr ; P8LE: vspltisw v2, 4 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) { entry: %conv = fptosi float %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %conv1 = fptosi float %b to i32 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 %conv3 = fptosi float %c to i32 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 %conv5 = fptosi float %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromRegsConvftoi ; P9LE-LABEL: fromRegsConvftoi ; P8BE-LABEL: fromRegsConvftoi ; P8LE-LABEL: fromRegsConvftoi ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvftoi() { entry: ret <4 x i32> ; P9BE-LABEL: fromDiffConstsConvftoi ; P9LE-LABEL: fromDiffConstsConvftoi ; P8BE-LABEL: fromDiffConstsConvftoi ; P8LE-LABEL: fromDiffConstsConvftoi ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lvx ; P8LE-NOT: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvftoi(float* nocapture readonly %ptr) { entry: %0 = bitcast float* %ptr to <4 x float>* %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = fptosi <4 x float> %1 to <4 x i32> ret <4 x i32> %2 ; P9BE-LABEL: fromDiffMemConsAConvftoi ; P9LE-LABEL: fromDiffMemConsAConvftoi ; P8BE-LABEL: fromDiffMemConsAConvftoi ; P8LE-LABEL: fromDiffMemConsAConvftoi ; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9BE: xvcvspsxws v2, [[REG1]] ; P9BE: blr ; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9LE: xvcvspsxws v2, [[REG1]] ; P9LE: blr ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 ; P8BE: xvcvspsxws v2, [[REG1]] ; P8BE: blr ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8LE: xxswapd ; P8LE: xvcvspsxws v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 %conv = fptosi float %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 %1 = load float, float* %arrayidx1, align 4 %conv2 = fptosi float %1 to i32 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1 %2 = load float, float* %arrayidx4, align 4 %conv5 = fptosi float %2 to i32 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 %3 = load float, float* %ptr, align 4 %conv8 = fptosi float %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 ; P9BE-LABEL: fromDiffMemConsDConvftoi ; P9LE-LABEL: fromDiffMemConsDConvftoi ; P8BE-LABEL: fromDiffMemConsDConvftoi ; P8LE-LABEL: fromDiffMemConsDConvftoi ; P9BE: lxv ; P9BE: lxv ; P9BE: vperm ; P9BE: xvcvspsxws ; P9BE: blr ; P9LE: lxv ; P9LE: lxv ; P9LE: vperm ; P9LE: xvcvspsxws ; P9LE: blr ; P8BE: lxvw4x ; P8BE: lxvw4x ; P8BE: vperm ; P8BE: xvcvspsxws ; P8BE: blr ; P8LE: lxvd2x ; P8LE-DAG: lvx ; P8LE: xxswapd ; P8LE: vperm ; P8LE: xvcvspsxws ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvftoi(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptosi float %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptosi float %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %add5 = add nsw i32 %elem, 2 %idxprom6 = sext i32 %add5 to i64 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 %2 = load float, float* %arrayidx7, align 4 %conv8 = fptosi float %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %add10 = add nsw i32 %elem, 3 %idxprom11 = sext i32 %add10 to i64 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 %3 = load float, float* %arrayidx12, align 4 %conv13 = fptosi float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarAConvftoi ; P9LE-LABEL: fromDiffMemVarAConvftoi ; P8BE-LABEL: fromDiffMemVarAConvftoi ; P8LE-LABEL: fromDiffMemVarAConvftoi ; FIXME: implement finding consecutive loads with pre-inc ; P9BE: lfsux ; P9LE: lfsux ; P8BE: lfsux ; P8LE: lfsux } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvftoi(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptosi float %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptosi float %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %sub5 = add nsw i32 %elem, -2 %idxprom6 = sext i32 %sub5 to i64 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 %2 = load float, float* %arrayidx7, align 4 %conv8 = fptosi float %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %sub10 = add nsw i32 %elem, -3 %idxprom11 = sext i32 %sub10 to i64 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 %3 = load float, float* %arrayidx12, align 4 %conv13 = fptosi float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarDConvftoi ; P9LE-LABEL: fromDiffMemVarDConvftoi ; P8BE-LABEL: fromDiffMemVarDConvftoi ; P8LE-LABEL: fromDiffMemVarDConvftoi ; FIXME: implement finding consecutive loads with pre-inc ; P9BE: lfsux ; P9LE: lfsux ; P8BE: lfsux ; P8LE: lfsux } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvftoi(float %val) { entry: %conv = fptosi float %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltRegValConvftoi ; P9LE-LABEL: spltRegValConvftoi ; P8BE-LABEL: spltRegValConvftoi ; P8LE-LABEL: spltRegValConvftoi ; P9BE: xscvdpsxws f[[REG1:[0-9]+]], f1 ; P9BE: xxspltw v2, vs[[REG1]], 1 ; P9BE: blr ; P9LE: xscvdpsxws f[[REG1:[0-9]+]], f1 ; P9LE: xxspltw v2, vs[[REG1]], 1 ; P9LE: blr ; P8BE: xscvdpsxws f[[REG1:[0-9]+]], f1 ; P8BE: xxspltw v2, vs[[REG1]], 1 ; P8BE: blr ; P8LE: xscvdpsxws f[[REG1:[0-9]+]], f1 ; P8LE: xxspltw v2, vs[[REG1]], 1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) { entry: %0 = load float, float* %ptr, align 4 %conv = fptosi float %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltMemValConvftoi ; P9LE-LABEL: spltMemValConvftoi ; P8BE-LABEL: spltMemValConvftoi ; P8LE-LABEL: spltMemValConvftoi ; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3 ; P9BE: xvcvspsxws v2, [[REG1]] ; P9LE: [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspsxws v2, [[REG1]] ; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]] ; P8BE: xxspltw v2, vs[[REG2]], 1 ; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]] ; P8LE: xxspltw v2, vs[[REG2]], 1 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvdtoi() { entry: ret <4 x i32> ; P9BE-LABEL: spltCnstConvdtoi ; P9LE-LABEL: spltCnstConvdtoi ; P8BE-LABEL: spltCnstConvdtoi ; P8LE-LABEL: spltCnstConvdtoi ; P9BE: vspltisw v2, 4 ; P9BE: blr ; P9LE: vspltisw v2, 4 ; P9LE: blr ; P8BE: vspltisw v2, 4 ; P8BE: blr ; P8LE: vspltisw v2, 4 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) { entry: %conv = fptosi double %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %conv1 = fptosi double %b to i32 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 %conv3 = fptosi double %c to i32 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 %conv5 = fptosi double %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromRegsConvdtoi ; P9LE-LABEL: fromRegsConvdtoi ; P8BE-LABEL: fromRegsConvdtoi ; P8LE-LABEL: fromRegsConvdtoi ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvdtoi() { entry: ret <4 x i32> ; P9BE-LABEL: fromDiffConstsConvdtoi ; P9LE-LABEL: fromDiffConstsConvdtoi ; P8BE-LABEL: fromDiffConstsConvdtoi ; P8LE-LABEL: fromDiffConstsConvdtoi ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lvx ; P8LE-NOT: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvdtoi(double* nocapture readonly %ptr) { entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 %2 = fptosi <2 x double> %1 to <2 x i32> %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2 %3 = bitcast double* %arrayidx4 to <2 x double>* %4 = load <2 x double>, <2 x double>* %3, align 8 %5 = fptosi <2 x double> %4 to <2 x i32> %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> ret <4 x i32> %vecinit9 ; P9BE-LABEL: fromDiffMemConsAConvdtoi ; P9LE-LABEL: fromDiffMemConsAConvdtoi ; P8BE-LABEL: fromDiffMemConsAConvdtoi ; P8LE-LABEL: fromDiffMemConsAConvdtoi ; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] ; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] ; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] ; P9LE: vmrgew v2, [[REG6]], [[REG5]] ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] ; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] ; P8BE: vmrgew v2, [[REG6]], [[REG5]] ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] ; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]] ; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]] ; P8LE: vmrgew v2, [[REG8]], [[REG7]] } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 %conv = fptosi double %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 %1 = load double, double* %arrayidx1, align 8 %conv2 = fptosi double %1 to i32 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1 %2 = load double, double* %arrayidx4, align 8 %conv5 = fptosi double %2 to i32 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 %3 = load double, double* %ptr, align 8 %conv8 = fptosi double %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 ; P9BE-LABEL: fromDiffMemConsDConvdtoi ; P9LE-LABEL: fromDiffMemConsDConvdtoi ; P8BE-LABEL: fromDiffMemConsDConvdtoi ; P8LE-LABEL: fromDiffMemConsDConvdtoi ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd ; P9BE: xvcvdpsxws ; P9BE: xvcvdpsxws ; P9BE: vmrgew v2 ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd ; P9LE: xvcvdpsxws ; P9LE: xvcvdpsxws ; P9LE: vmrgew v2 ; P8BE: lfdx ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsxws ; P8BE: xvcvdpsxws ; P8BE: vmrgew v2 ; P8LE: lfdx ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsxws ; P8LE: xvcvdpsxws ; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvdtoi(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptosi double %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptosi double %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %add5 = add nsw i32 %elem, 2 %idxprom6 = sext i32 %add5 to i64 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 %2 = load double, double* %arrayidx7, align 8 %conv8 = fptosi double %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %add10 = add nsw i32 %elem, 3 %idxprom11 = sext i32 %add10 to i64 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 %3 = load double, double* %arrayidx12, align 8 %conv13 = fptosi double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarAConvdtoi ; P9LE-LABEL: fromDiffMemVarAConvdtoi ; P8BE-LABEL: fromDiffMemVarAConvdtoi ; P8LE-LABEL: fromDiffMemVarAConvdtoi ; P9BE: lfdux ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd ; P9BE: xvcvdpsxws ; P9BE: xvcvdpsxws ; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd ; P9LE: xvcvdpsxws ; P9LE: xvcvdpsxws ; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsxws ; P8BE: xvcvdpsxws ; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsxws ; P8LE: xvcvdpsxws ; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvdtoi(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptosi double %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptosi double %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %sub5 = add nsw i32 %elem, -2 %idxprom6 = sext i32 %sub5 to i64 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 %2 = load double, double* %arrayidx7, align 8 %conv8 = fptosi double %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %sub10 = add nsw i32 %elem, -3 %idxprom11 = sext i32 %sub10 to i64 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 %3 = load double, double* %arrayidx12, align 8 %conv13 = fptosi double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarDConvdtoi ; P9LE-LABEL: fromDiffMemVarDConvdtoi ; P8BE-LABEL: fromDiffMemVarDConvdtoi ; P8LE-LABEL: fromDiffMemVarDConvdtoi ; P9BE: lfdux ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd ; P9BE: xvcvdpsxws ; P9BE: xvcvdpsxws ; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd ; P9LE: xvcvdpsxws ; P9LE: xvcvdpsxws ; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsxws ; P8BE: xvcvdpsxws ; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsxws ; P8LE: xvcvdpsxws ; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvdtoi(double %val) { entry: %conv = fptosi double %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltRegValConvdtoi ; P9LE-LABEL: spltRegValConvdtoi ; P8BE-LABEL: spltRegValConvdtoi ; P8LE-LABEL: spltRegValConvdtoi ; P9BE: xscvdpsxws ; P9BE: xxspltw ; P9BE: blr ; P9LE: xscvdpsxws ; P9LE: xxspltw ; P9LE: blr ; P8BE: xscvdpsxws ; P8BE: xxspltw ; P8BE: blr ; P8LE: xscvdpsxws ; P8LE: xxspltw ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvdtoi(double* nocapture readonly %ptr) { entry: %0 = load double, double* %ptr, align 8 %conv = fptosi double %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltMemValConvdtoi ; P9LE-LABEL: spltMemValConvdtoi ; P8BE-LABEL: spltMemValConvdtoi ; P8LE-LABEL: spltMemValConvdtoi ; P9BE: lfd ; P9BE: xscvdpsxws ; P9BE: xxspltw ; P9BE: blr ; P9LE: lfd ; P9LE: xscvdpsxws ; P9LE: xxspltw ; P9LE: blr ; P8BE: lfdx ; P8BE: xscvdpsxws ; P8BE: xxspltw ; P8BE: blr ; P8LE: lfdx ; P8LE: xscvdpsxws ; P8LE: xxspltw ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allZeroui() { entry: ret <4 x i32> zeroinitializer ; P9BE-LABEL: allZeroui ; P9LE-LABEL: allZeroui ; P8BE-LABEL: allZeroui ; P8LE-LABEL: allZeroui ; P9BE: xxlxor v2, v2, v2 ; P9BE: blr ; P9LE: xxlxor v2, v2, v2 ; P9LE: blr ; P8BE: xxlxor v2, v2, v2 ; P8BE: blr ; P8LE: xxlxor v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @allOneui() { entry: ret <4 x i32> ; P9BE-LABEL: allOneui ; P9LE-LABEL: allOneui ; P8BE-LABEL: allOneui ; P8LE-LABEL: allOneui ; P9BE: xxspltib v2, 255 ; P9BE: blr ; P9LE: xxspltib v2, 255 ; P9LE: blr ; P8BE: vspltisb v2, -1 ; P8BE: blr ; P8LE: vspltisb v2, -1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst1ui() { entry: ret <4 x i32> ; P9BE-LABEL: spltConst1ui ; P9LE-LABEL: spltConst1ui ; P8BE-LABEL: spltConst1ui ; P8LE-LABEL: spltConst1ui ; P9BE: vspltisw v2, 1 ; P9BE: blr ; P9LE: vspltisw v2, 1 ; P9LE: blr ; P8BE: vspltisw v2, 1 ; P8BE: blr ; P8LE: vspltisw v2, 1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst16kui() { entry: ret <4 x i32> ; P9BE-LABEL: spltConst16kui ; P9LE-LABEL: spltConst16kui ; P8BE-LABEL: spltConst16kui ; P8LE-LABEL: spltConst16kui ; P9BE: vspltisw v2, -15 ; P9BE: vsrw v2, v2, v2 ; P9BE: blr ; P9LE: vspltisw v2, -15 ; P9LE: vsrw v2, v2, v2 ; P9LE: blr ; P8BE: vspltisw v2, -15 ; P8BE: vsrw v2, v2, v2 ; P8BE: blr ; P8LE: vspltisw v2, -15 ; P8LE: vsrw v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltConst32kui() { entry: ret <4 x i32> ; P9BE-LABEL: spltConst32kui ; P9LE-LABEL: spltConst32kui ; P8BE-LABEL: spltConst32kui ; P8LE-LABEL: spltConst32kui ; P9BE: vspltisw v2, -16 ; P9BE: vsrw v2, v2, v2 ; P9BE: blr ; P9LE: vspltisw v2, -16 ; P9LE: vsrw v2, v2, v2 ; P9LE: blr ; P8BE: vspltisw v2, -16 ; P8BE: vsrw v2, v2, v2 ; P8BE: blr ; P8LE: vspltisw v2, -16 ; P8LE: vsrw v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32 zeroext %d) { entry: %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3 ret <4 x i32> %vecinit3 ; P9BE-LABEL: fromRegsui ; P9LE-LABEL: fromRegsui ; P8BE-LABEL: fromRegsui ; P8LE-LABEL: fromRegsui ; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5 ; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6 ; P9BE: vmrgow v2, [[REG1]], [[REG2]] ; P9BE: blr ; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3 ; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4 ; P9LE: vmrgow v2, [[REG2]], [[REG1]] ; P9LE: blr ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 ; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]] ; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]] ; P8BE: vmrgow v2, [[REG5]], [[REG6]] ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6 ; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]] ; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]] ; P8LE: vmrgow v2, [[REG6]], [[REG5]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsui() { entry: ret <4 x i32> ; P9BE-LABEL: fromDiffConstsui ; P9LE-LABEL: fromDiffConstsui ; P8BE-LABEL: fromDiffConstsui ; P8LE-LABEL: fromDiffConstsui ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lvx ; P8LE-NOT: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAui(i32* nocapture readonly %arr) { entry: %0 = load i32, i32* %arr, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1 %1 = load i32, i32* %arrayidx1, align 4 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 %2 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3 %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromDiffMemConsAui ; P9LE-LABEL: fromDiffMemConsAui ; P8BE-LABEL: fromDiffMemConsAui ; P8LE-LABEL: fromDiffMemConsAui ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2 %1 = load i32, i32* %arrayidx1, align 4 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1 %2 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 %3 = load i32, i32* %arr, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromDiffMemConsDui ; P9LE-LABEL: fromDiffMemConsDui ; P8BE-LABEL: fromDiffMemConsDui ; P8LE-LABEL: fromDiffMemConsDui ; P9BE: lxv ; P9BE: lxv ; P9BE: vperm ; P9BE: blr ; P9LE: lxv ; P9LE: lxv ; P9LE: vperm ; P9LE: blr ; P8BE: lxvw4x ; P8BE: lxvw4x ; P8BE: vperm ; P8BE: blr ; P8LE: lxvd2x ; P8LE-DAG: lvx ; P8LE-NOT: xxswapd ; P8LE: xxswapd ; P8LE: vperm ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAui(i32* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 %1 = load i32, i32* %arrayidx2, align 4 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %add4 = add nsw i32 %elem, 2 %idxprom5 = sext i32 %add4 to i64 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 %2 = load i32, i32* %arrayidx6, align 4 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 %add8 = add nsw i32 %elem, 3 %idxprom9 = sext i32 %add8 to i64 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 ; P9BE-LABEL: fromDiffMemVarAui ; P9LE-LABEL: fromDiffMemVarAui ; P8BE-LABEL: fromDiffMemVarAui ; P8LE-LABEL: fromDiffMemVarAui ; P9BE: sldi r4, r4, 2 ; P9BE: lxvx v2, r3, r4 ; P9BE: blr ; P9LE: sldi r4, r4, 2 ; P9LE: lxvx v2, r3, r4 ; P9LE: blr ; P8BE: sldi r4, r4, 2 ; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4 ; P8BE: blr ; P8LE: sldi r4, r4, 2 ; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4 ; P8LE: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1 %1 = load i32, i32* %arrayidx2, align 4 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %sub4 = add nsw i32 %elem, -2 %idxprom5 = sext i32 %sub4 to i64 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5 %2 = load i32, i32* %arrayidx6, align 4 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2 %sub8 = add nsw i32 %elem, -3 %idxprom9 = sext i32 %sub8 to i64 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9 %3 = load i32, i32* %arrayidx10, align 4 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3 ret <4 x i32> %vecinit11 ; P9BE-LABEL: fromDiffMemVarDui ; P9LE-LABEL: fromDiffMemVarDui ; P8BE-LABEL: fromDiffMemVarDui ; P8LE-LABEL: fromDiffMemVarDui ; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2 ; P9BE-DAG: addi r3, r3, -12 ; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3 ; P9BE-DAG: lxvx ; P9BE: vperm ; P9BE: blr ; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2 ; P9LE-DAG: addi r3, r3, -12 ; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3 ; P9LE-DAG: lxv ; P9LE: vperm ; P9LE: blr ; P8BE-DAG: sldi {{r[0-9]+}}, r4, 2 ; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3 ; P8BE-DAG: lxvw4x ; P8BE: vperm ; P8BE: blr ; P8LE-DAG: sldi {{r[0-9]+}}, r4, 2 ; P8LE-DAG: lvx ; P8LE-DAG: lvx ; P8LE: vperm ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4 %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18 %1 = load i32, i32* %arrayidx1, align 4 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2 %2 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88 %3 = load i32, i32* %arrayidx5, align 4 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromRandMemConsui ; P9LE-LABEL: fromRandMemConsui ; P8BE-LABEL: fromRandMemConsui ; P8LE-LABEL: fromRandMemConsui ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: mtvsrdd ; P9BE: mtvsrdd ; P9BE: vmrgow ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: mtvsrdd ; P9LE: mtvsrdd ; P9LE: vmrgow ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: vmrgow ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: vmrgow } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %elem) { entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0 %add1 = add nsw i32 %elem, 1 %idxprom2 = sext i32 %add1 to i64 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2 %1 = load i32, i32* %arrayidx3, align 4 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1 %add5 = add nsw i32 %elem, 2 %idxprom6 = sext i32 %add5 to i64 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6 %2 = load i32, i32* %arrayidx7, align 4 %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2 %add9 = add nsw i32 %elem, 8 %idxprom10 = sext i32 %add9 to i64 %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10 %3 = load i32, i32* %arrayidx11, align 4 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3 ret <4 x i32> %vecinit12 ; P9BE-LABEL: fromRandMemVarui ; P9LE-LABEL: fromRandMemVarui ; P8BE-LABEL: fromRandMemVarui ; P8LE-LABEL: fromRandMemVarui ; P9BE: sldi r4, r4, 2 ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: lwz ; P9BE: mtvsrdd ; P9BE: mtvsrdd ; P9BE: vmrgow ; P9LE: sldi r4, r4, 2 ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: lwz ; P9LE: mtvsrdd ; P9LE: mtvsrdd ; P9LE: vmrgow ; P8BE: sldi r4, r4, 2 ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: lwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: mtvsrwz ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: vmrgow ; P8LE: sldi r4, r4, 2 ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: lwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: mtvsrwz ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: vmrgow } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValui(i32 zeroext %val) { entry: %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltRegValui ; P9LE-LABEL: spltRegValui ; P8BE-LABEL: spltRegValui ; P8LE-LABEL: spltRegValui ; P9BE: mtvsrws v2, r3 ; P9BE: blr ; P9LE: mtvsrws v2, r3 ; P9LE: blr ; P8BE: mtvsrwz {{[vsf0-9]+}}, r3 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 ; P8BE: blr ; P8LE: mtvsrwz {{[vsf0-9]+}}, r3 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) { entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltMemValui ; P9LE-LABEL: spltMemValui ; P8BE-LABEL: spltMemValui ; P8LE-LABEL: spltMemValui -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx f0, 0, r3 +; P9BE: xxsldwi vs0, f0, f0, 1 +; P9BE: xxspltw v2, vs0, 0 ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx f0, 0, r3 +; P9LE: xxpermdi vs0, f0, f0, 2 +; P9LE: xxspltw v2, vs0, 3 ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx f0, 0, r3 +; P8BE: xxsldwi vs0, f0, f0, 1 +; P8BE: xxspltw v2, vs0, 0 ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx f0, 0, r3 +; P8LE: xxpermdi vs0, f0, f0, 2 +; P8LE: xxspltw v2, vs0, 3 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvftoui() { entry: ret <4 x i32> ; P9BE-LABEL: spltCnstConvftoui ; P9LE-LABEL: spltCnstConvftoui ; P8BE-LABEL: spltCnstConvftoui ; P8LE-LABEL: spltCnstConvftoui ; P9BE: vspltisw v2, 4 ; P9BE: blr ; P9LE: vspltisw v2, 4 ; P9LE: blr ; P8BE: vspltisw v2, 4 ; P8BE: blr ; P8LE: vspltisw v2, 4 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) { entry: %conv = fptoui float %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %conv1 = fptoui float %b to i32 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 %conv3 = fptoui float %c to i32 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 %conv5 = fptoui float %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromRegsConvftoui ; P9LE-LABEL: fromRegsConvftoui ; P8BE-LABEL: fromRegsConvftoui ; P8LE-LABEL: fromRegsConvftoui ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvftoui() { entry: ret <4 x i32> ; P9BE-LABEL: fromDiffConstsConvftoui ; P9LE-LABEL: fromDiffConstsConvftoui ; P8BE-LABEL: fromDiffConstsConvftoui ; P8LE-LABEL: fromDiffConstsConvftoui ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lvx ; P8LE-NOT: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvftoui(float* nocapture readonly %ptr) { entry: %0 = bitcast float* %ptr to <4 x float>* %1 = load <4 x float>, <4 x float>* %0, align 4 %2 = fptoui <4 x float> %1 to <4 x i32> ret <4 x i32> %2 ; P9BE-LABEL: fromDiffMemConsAConvftoui ; P9LE-LABEL: fromDiffMemConsAConvftoui ; P8BE-LABEL: fromDiffMemConsAConvftoui ; P8LE-LABEL: fromDiffMemConsAConvftoui ; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9BE: xvcvspuxws v2, [[REG1]] ; P9BE: blr ; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9LE: xvcvspuxws v2, [[REG1]] ; P9LE: blr ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3 ; P8BE: xvcvspuxws v2, [[REG1]] ; P8BE: blr ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8LE: xxswapd v2, [[REG1]] ; P8LE: xvcvspuxws v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 %conv = fptoui float %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 %1 = load float, float* %arrayidx1, align 4 %conv2 = fptoui float %1 to i32 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1 %2 = load float, float* %arrayidx4, align 4 %conv5 = fptoui float %2 to i32 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 %3 = load float, float* %ptr, align 4 %conv8 = fptoui float %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 ; P9BE-LABEL: fromDiffMemConsDConvftoui ; P9LE-LABEL: fromDiffMemConsDConvftoui ; P8BE-LABEL: fromDiffMemConsDConvftoui ; P8LE-LABEL: fromDiffMemConsDConvftoui ; P9BE: lxv ; P9BE: lxv ; P9BE: vperm ; P9BE: xvcvspuxws ; P9BE: blr ; P9LE: lxv ; P9LE: lxv ; P9LE: vperm ; P9LE: xvcvspuxws ; P9LE: blr ; P8BE: lxvw4x ; P8BE: lxvw4x ; P8BE: vperm ; P8BE: xvcvspuxws ; P8BE: blr ; P8LE-DAG: lxvd2x ; P8LE-DAG: lvx ; P8LE: xxswapd ; P8LE: vperm ; P8LE: xvcvspuxws ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvftoui(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptoui float %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptoui float %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %add5 = add nsw i32 %elem, 2 %idxprom6 = sext i32 %add5 to i64 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 %2 = load float, float* %arrayidx7, align 4 %conv8 = fptoui float %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %add10 = add nsw i32 %elem, 3 %idxprom11 = sext i32 %add10 to i64 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 %3 = load float, float* %arrayidx12, align 4 %conv13 = fptoui float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarAConvftoui ; P9LE-LABEL: fromDiffMemVarAConvftoui ; P8BE-LABEL: fromDiffMemVarAConvftoui ; P8LE-LABEL: fromDiffMemVarAConvftoui ; FIXME: implement finding consecutive loads with pre-inc ; P9BE: lfsux ; P9LE: lfsux ; P8BE: lfsux ; P8LE: lfsux } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvftoui(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptoui float %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptoui float %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %sub5 = add nsw i32 %elem, -2 %idxprom6 = sext i32 %sub5 to i64 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6 %2 = load float, float* %arrayidx7, align 4 %conv8 = fptoui float %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %sub10 = add nsw i32 %elem, -3 %idxprom11 = sext i32 %sub10 to i64 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11 %3 = load float, float* %arrayidx12, align 4 %conv13 = fptoui float %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarDConvftoui ; P9LE-LABEL: fromDiffMemVarDConvftoui ; P8BE-LABEL: fromDiffMemVarDConvftoui ; P8LE-LABEL: fromDiffMemVarDConvftoui ; FIXME: implement finding consecutive loads with pre-inc ; P9BE: lfsux ; P9LE: lfsux ; P8BE: lfsux ; P8LE: lfsux } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvftoui(float %val) { entry: %conv = fptoui float %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltRegValConvftoui ; P9LE-LABEL: spltRegValConvftoui ; P8BE-LABEL: spltRegValConvftoui ; P8LE-LABEL: spltRegValConvftoui ; P9BE: xscvdpuxws f[[REG1:[0-9]+]], f1 ; P9BE: xxspltw v2, vs[[REG1]], 1 ; P9BE: blr ; P9LE: xscvdpuxws f[[REG1:[0-9]+]], f1 ; P9LE: xxspltw v2, vs[[REG1]], 1 ; P9LE: blr ; P8BE: xscvdpuxws f[[REG1:[0-9]+]], f1 ; P8BE: xxspltw v2, vs[[REG1]], 1 ; P8BE: blr ; P8LE: xscvdpuxws f[[REG1:[0-9]+]], f1 ; P8LE: xxspltw v2, vs[[REG1]], 1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) { entry: %0 = load float, float* %ptr, align 4 %conv = fptoui float %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltMemValConvftoui ; P9LE-LABEL: spltMemValConvftoui ; P8BE-LABEL: spltMemValConvftoui ; P8LE-LABEL: spltMemValConvftoui ; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3 ; P9BE: xvcvspuxws v2, [[REG1]] ; P9LE: [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspuxws v2, [[REG1]] ; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]] ; P8BE: xxspltw v2, vs[[REG2]], 1 ; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]] ; P8LE: xxspltw v2, vs[[REG2]], 1 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltCnstConvdtoui() { entry: ret <4 x i32> ; P9BE-LABEL: spltCnstConvdtoui ; P9LE-LABEL: spltCnstConvdtoui ; P8BE-LABEL: spltCnstConvdtoui ; P8LE-LABEL: spltCnstConvdtoui ; P9BE: vspltisw v2, 4 ; P9BE: blr ; P9LE: vspltisw v2, 4 ; P9LE: blr ; P8BE: vspltisw v2, 4 ; P8BE: blr ; P8LE: vspltisw v2, 4 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) { entry: %conv = fptoui double %a to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %conv1 = fptoui double %b to i32 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1 %conv3 = fptoui double %c to i32 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2 %conv5 = fptoui double %d to i32 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3 ret <4 x i32> %vecinit6 ; P9BE-LABEL: fromRegsConvdtoui ; P9LE-LABEL: fromRegsConvdtoui ; P8BE-LABEL: fromRegsConvdtoui ; P8LE-LABEL: fromRegsConvdtoui ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 ; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 ; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] ; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @fromDiffConstsConvdtoui() { entry: ret <4 x i32> ; P9BE-LABEL: fromDiffConstsConvdtoui ; P9LE-LABEL: fromDiffConstsConvdtoui ; P8BE-LABEL: fromDiffConstsConvdtoui ; P8LE-LABEL: fromDiffConstsConvdtoui ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvw4x ; P8BE: blr ; P8LE: lvx ; P8LE-NOT: xxswapd ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsAConvdtoui(double* nocapture readonly %ptr) { entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 %2 = fptoui <2 x double> %1 to <2 x i32> %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2 %3 = bitcast double* %arrayidx4 to <2 x double>* %4 = load <2 x double>, <2 x double>* %3, align 8 %5 = fptoui <2 x double> %4 to <2 x i32> %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> ret <4 x i32> %vecinit9 ; P9BE-LABEL: fromDiffMemConsAConvdtoui ; P9LE-LABEL: fromDiffMemConsAConvdtoui ; P8BE-LABEL: fromDiffMemConsAConvdtoui ; P8LE-LABEL: fromDiffMemConsAConvdtoui ; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] ; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] ; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] ; P9LE: vmrgew v2, [[REG6]], [[REG5]] ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] ; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] ; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] ; P8BE: vmrgew v2, [[REG6]], [[REG5]] ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] ; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]] ; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]] ; P8LE: vmrgew v2, [[REG8]], [[REG7]] } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 %conv = fptoui double %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 %1 = load double, double* %arrayidx1, align 8 %conv2 = fptoui double %1 to i32 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1 %2 = load double, double* %arrayidx4, align 8 %conv5 = fptoui double %2 to i32 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 %3 = load double, double* %ptr, align 8 %conv8 = fptoui double %3 to i32 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 ret <4 x i32> %vecinit9 ; P9BE-LABEL: fromDiffMemConsDConvdtoui ; P9LE-LABEL: fromDiffMemConsDConvdtoui ; P8BE-LABEL: fromDiffMemConsDConvdtoui ; P8LE-LABEL: fromDiffMemConsDConvdtoui ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd ; P9BE: xvcvdpuxws ; P9BE: xvcvdpuxws ; P9BE: vmrgew v2 ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd ; P9LE: xvcvdpuxws ; P9LE: xvcvdpuxws ; P9LE: vmrgew v2 ; P8BE: lfdx ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpuxws ; P8BE: xvcvdpuxws ; P8BE: vmrgew v2 ; P8LE: lfdx ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpuxws ; P8LE: xvcvdpuxws ; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarAConvdtoui(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptoui double %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptoui double %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %add5 = add nsw i32 %elem, 2 %idxprom6 = sext i32 %add5 to i64 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 %2 = load double, double* %arrayidx7, align 8 %conv8 = fptoui double %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %add10 = add nsw i32 %elem, 3 %idxprom11 = sext i32 %add10 to i64 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 %3 = load double, double* %arrayidx12, align 8 %conv13 = fptoui double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarAConvdtoui ; P9LE-LABEL: fromDiffMemVarAConvdtoui ; P8BE-LABEL: fromDiffMemVarAConvdtoui ; P8LE-LABEL: fromDiffMemVarAConvdtoui ; P9BE: lfdux ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd ; P9BE: xvcvdpuxws ; P9BE: xvcvdpuxws ; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd ; P9LE: xvcvdpuxws ; P9LE: xvcvdpuxws ; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpuxws ; P8BE: xvcvdpuxws ; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpuxws ; P8LE: xvcvdpuxws ; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @fromDiffMemVarDConvdtoui(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptoui double %0 to i32 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptoui double %1 to i32 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1 %sub5 = add nsw i32 %elem, -2 %idxprom6 = sext i32 %sub5 to i64 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6 %2 = load double, double* %arrayidx7, align 8 %conv8 = fptoui double %2 to i32 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2 %sub10 = add nsw i32 %elem, -3 %idxprom11 = sext i32 %sub10 to i64 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11 %3 = load double, double* %arrayidx12, align 8 %conv13 = fptoui double %3 to i32 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3 ret <4 x i32> %vecinit14 ; P9BE-LABEL: fromDiffMemVarDConvdtoui ; P9LE-LABEL: fromDiffMemVarDConvdtoui ; P8BE-LABEL: fromDiffMemVarDConvdtoui ; P8LE-LABEL: fromDiffMemVarDConvdtoui ; P9BE: lfdux ; P9BE: lfd ; P9BE: lfd ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd ; P9BE: xvcvdpuxws ; P9BE: xvcvdpuxws ; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd ; P9LE: xvcvdpuxws ; P9LE: xvcvdpuxws ; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpuxws ; P8BE: xvcvdpuxws ; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpuxws ; P8LE: xvcvdpuxws ; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @spltRegValConvdtoui(double %val) { entry: %conv = fptoui double %val to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltRegValConvdtoui ; P9LE-LABEL: spltRegValConvdtoui ; P8BE-LABEL: spltRegValConvdtoui ; P8LE-LABEL: spltRegValConvdtoui ; P9BE: xscvdpuxws ; P9BE: xxspltw ; P9BE: blr ; P9LE: xscvdpuxws ; P9LE: xxspltw ; P9LE: blr ; P8BE: xscvdpuxws ; P8BE: xxspltw ; P8BE: blr ; P8LE: xscvdpuxws ; P8LE: xxspltw ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <4 x i32> @spltMemValConvdtoui(double* nocapture readonly %ptr) { entry: %0 = load double, double* %ptr, align 8 %conv = fptoui double %0 to i32 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat ; P9BE-LABEL: spltMemValConvdtoui ; P9LE-LABEL: spltMemValConvdtoui ; P8BE-LABEL: spltMemValConvdtoui ; P8LE-LABEL: spltMemValConvdtoui ; P9BE: lfd ; P9BE: xscvdpuxws ; P9BE: xxspltw ; P9BE: blr ; P9LE: lfd ; P9LE: xscvdpuxws ; P9LE: xxspltw ; P9LE: blr ; P8BE: lfdx ; P8BE: xscvdpuxws ; P8BE: xxspltw ; P8BE: blr ; P8LE: lfdx ; P8LE: xscvdpuxws ; P8LE: xxspltw ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allZeroll() { entry: ret <2 x i64> zeroinitializer ; P9BE-LABEL: allZeroll ; P9LE-LABEL: allZeroll ; P8BE-LABEL: allZeroll ; P8LE-LABEL: allZeroll ; P9BE: xxlxor v2, v2, v2 ; P9BE: blr ; P9LE: xxlxor v2, v2, v2 ; P9LE: blr ; P8BE: xxlxor v2, v2, v2 ; P8BE: blr ; P8LE: xxlxor v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allOnell() { entry: ret <2 x i64> ; P9BE-LABEL: allOnell ; P9LE-LABEL: allOnell ; P8BE-LABEL: allOnell ; P8LE-LABEL: allOnell ; P9BE: xxspltib v2, 255 ; P9BE: blr ; P9LE: xxspltib v2, 255 ; P9LE: blr ; P8BE: vspltisb v2, -1 ; P8BE: blr ; P8LE: vspltisb v2, -1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst1ll() { entry: ret <2 x i64> ; P9BE-LABEL: spltConst1ll ; P9LE-LABEL: spltConst1ll ; P8BE-LABEL: spltConst1ll ; P8LE-LABEL: spltConst1ll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst16kll() { entry: ret <2 x i64> ; P9BE-LABEL: spltConst16kll ; P9LE-LABEL: spltConst16kll ; P8BE-LABEL: spltConst16kll ; P8LE-LABEL: spltConst16kll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst32kll() { entry: ret <2 x i64> ; P9BE-LABEL: spltConst32kll ; P9LE-LABEL: spltConst32kll ; P8BE-LABEL: spltConst32kll ; P8LE-LABEL: spltConst32kll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsll(i64 %a, i64 %b) { entry: %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 ret <2 x i64> %vecinit1 ; P9BE-LABEL: fromRegsll ; P9LE-LABEL: fromRegsll ; P8BE-LABEL: fromRegsll ; P8LE-LABEL: fromRegsll ; P9BE: mtvsrdd v2, r3, r4 ; P9BE: blr ; P9LE: mtvsrdd v2, r4, r3 ; P9LE: blr ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4 ; P8BE: xxmrghd v2 ; P8BE: blr ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4 ; P8LE: xxmrghd v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsll() { entry: ret <2 x i64> ; P9BE-LABEL: fromDiffConstsll ; P9LE-LABEL: fromDiffConstsll ; P8BE-LABEL: fromDiffConstsll ; P8LE-LABEL: fromDiffConstsll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAll(i64* nocapture readonly %arr) { entry: %0 = load i64, i64* %arr, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1 %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromDiffMemConsAll ; P9LE-LABEL: fromDiffMemConsAll ; P8BE-LABEL: fromDiffMemConsAll ; P8LE-LABEL: fromDiffMemConsAll ; P9BE: lxv v2 ; P9BE: blr ; P9LE: lxv v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr ; P8LE: lxvd2x ; P8LE: xxswapd v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3 %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2 %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromDiffMemConsDll ; P9LE-LABEL: fromDiffMemConsDll ; P8BE-LABEL: fromDiffMemConsDll ; P8LE-LABEL: fromDiffMemConsDll ; P9BE: lxv v2 ; P9BE: blr ; P9LE: lxv ; P9LE: xxswapd v2 ; P9LE: blr ; P8BE: lxvd2x ; P8BE: xxswapd v2 ; P8BE-NEXT: blr ; P8LE: lxvd2x v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAll(i64* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemVarAll ; P9LE-LABEL: fromDiffMemVarAll ; P8BE-LABEL: fromDiffMemVarAll ; P8LE-LABEL: fromDiffMemVarAll ; P9BE: sldi ; P9BE: lxvx v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxvx v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x ; P8LE: xxswapd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemVarDll ; P9LE-LABEL: fromDiffMemVarDll ; P8BE-LABEL: fromDiffMemVarDll ; P8LE-LABEL: fromDiffMemVarDll ; P9BE: sldi ; P9BE: lxv ; P9BE: xxswapd v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxv ; P9LE: xxswapd v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x ; P8BE: xxswapd v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4 %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18 %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromRandMemConsll ; P9LE-LABEL: fromRandMemConsll ; P8BE-LABEL: fromRandMemConsll ; P8LE-LABEL: fromRandMemConsll ; P9BE: ld ; P9BE: ld ; P9BE: mtvsrdd v2 ; P9BE-NEXT: blr ; P9LE: ld ; P9LE: ld ; P9LE: mtvsrdd v2 ; P9LE-NEXT: blr ; P8BE: ld ; P8BE: ld ; P8BE-DAG: mtvsrd ; P8BE-DAG: mtvsrd ; P8BE: xxmrghd v2 ; P8BE-NEXT: blr ; P8LE: ld ; P8LE: ld ; P8LE-DAG: mtvsrd ; P8LE-DAG: mtvsrd ; P8LE: xxmrghd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %elem) { entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %add1 = add nsw i32 %elem, 1 %idxprom2 = sext i32 %add1 to i64 %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2 %1 = load i64, i64* %arrayidx3, align 8 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromRandMemVarll ; P9LE-LABEL: fromRandMemVarll ; P8BE-LABEL: fromRandMemVarll ; P8LE-LABEL: fromRandMemVarll ; P9BE: sldi ; P9BE: ld ; P9BE: ld ; P9BE: mtvsrdd v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: ld ; P9LE: ld ; P9LE: mtvsrdd v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: ld ; P8BE: ld ; P8BE: mtvsrd ; P8BE: mtvsrd ; P8BE: xxmrghd v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: ld ; P8LE: ld ; P8LE: mtvsrd ; P8LE: mtvsrd ; P8LE: xxmrghd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValll(i64 %val) { entry: %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltRegValll ; P9LE-LABEL: spltRegValll ; P8BE-LABEL: spltRegValll ; P8LE-LABEL: spltRegValll ; P9BE: mtvsrdd v2, r3, r3 ; P9BE-NEXT: blr ; P9LE: mtvsrdd v2, r3, r3 ; P9LE-NEXT: blr ; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 ; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 ; P8BE-NEXT: blr ; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 ; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValll(i64* nocapture readonly %ptr) { entry: %0 = load i64, i64* %ptr, align 8 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltMemValll ; P9LE-LABEL: spltMemValll ; P8BE-LABEL: spltMemValll ; P8LE-LABEL: spltMemValll ; P9BE: lxvdsx v2 ; P9BE-NEXT: blr ; P9LE: lxvdsx v2 ; P9LE-NEXT: blr ; P8BE: lxvdsx v2 ; P8BE-NEXT: blr ; P8LE: lxvdsx v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvftoll() { entry: ret <2 x i64> ; P9BE-LABEL: spltCnstConvftoll ; P9LE-LABEL: spltCnstConvftoll ; P8BE-LABEL: spltCnstConvftoll ; P8LE-LABEL: spltCnstConvftoll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvftoll(float %a, float %b) { entry: %conv = fptosi float %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptosi float %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromRegsConvftoll ; P9LE-LABEL: fromRegsConvftoll ; P8BE-LABEL: fromRegsConvftoll ; P8LE-LABEL: fromRegsConvftoll ; P9BE: xxmrghd ; P9BE: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: xxmrghd ; P9LE: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: xxmrghd ; P8BE: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: xxmrghd ; P8LE: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvftoll() { entry: ret <2 x i64> ; P9BE-LABEL: fromDiffConstsConvftoll ; P9LE-LABEL: fromDiffConstsConvftoll ; P8BE-LABEL: fromDiffConstsConvftoll ; P8LE-LABEL: fromDiffConstsConvftoll ; P9BE: lxvx v2 ; P9BE: blr ; P9LE: lxvx v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr ; P8LE: lxvd2x ; P8LE: xxswapd v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvftoll(float* nocapture readonly %ptr) { entry: %0 = load float, float* %ptr, align 4 %conv = fptosi float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1 %1 = load float, float* %arrayidx1, align 4 %conv2 = fptosi float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemConsAConvftoll ; P9LE-LABEL: fromDiffMemConsAConvftoll ; P8BE-LABEL: fromDiffMemConsAConvftoll ; P8LE-LABEL: fromDiffMemConsAConvftoll ; P9BE: lfs ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: lfs ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: lfs ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: lfs ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvftoll(float* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 %conv = fptosi float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 %1 = load float, float* %arrayidx1, align 4 %conv2 = fptosi float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemConsDConvftoll ; P9LE-LABEL: fromDiffMemConsDConvftoll ; P8BE-LABEL: fromDiffMemConsDConvftoll ; P8LE-LABEL: fromDiffMemConsDConvftoll ; P9BE: lfs ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: lfs ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: lfs ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: lfs ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvftoll(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptosi float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptosi float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarAConvftoll ; P9LE-LABEL: fromDiffMemVarAConvftoll ; P8BE-LABEL: fromDiffMemVarAConvftoll ; P8LE-LABEL: fromDiffMemVarAConvftoll ; P9BE: sldi ; P9BE: lfsux ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lfsux ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvftoll(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptosi float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptosi float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarDConvftoll ; P9LE-LABEL: fromDiffMemVarDConvftoll ; P8BE-LABEL: fromDiffMemVarDConvftoll ; P8LE-LABEL: fromDiffMemVarDConvftoll ; P9BE: sldi ; P9BE: lfsux ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lfsux ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvftoll(float %val) { entry: %conv = fptosi float %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltRegValConvftoll ; P9LE-LABEL: spltRegValConvftoll ; P8BE-LABEL: spltRegValConvftoll ; P8LE-LABEL: spltRegValConvftoll ; P9BE: xscvdpsxds ; P9BE-NEXT: xxspltd v2 ; P9BE-NEXT: blr ; P9LE: xscvdpsxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr ; P8BE: xscvdpsxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr ; P8LE: xscvdpsxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) { entry: %0 = load float, float* %ptr, align 4 %conv = fptosi float %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltMemValConvftoll ; P9LE-LABEL: spltMemValConvftoll ; P8BE-LABEL: spltMemValConvftoll ; P8LE-LABEL: spltMemValConvftoll ; P9BE: lfs ; P9BE-NEXT: xscvdpsxds ; P9BE-NEXT: xxspltd v2 ; P9BE-NEXT: blr ; P9LE: lfs ; P9LE-NEXT: xscvdpsxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr ; P8BE: lfs ; P8BE-NEXT: xscvdpsxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr ; P8LE: lfs ; P8LE-NEXT: xscvdpsxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvdtoll() { entry: ret <2 x i64> ; P9BE-LABEL: spltCnstConvdtoll ; P9LE-LABEL: spltCnstConvdtoll ; P8BE-LABEL: spltCnstConvdtoll ; P8LE-LABEL: spltCnstConvdtoll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvdtoll(double %a, double %b) { entry: %conv = fptosi double %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptosi double %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromRegsConvdtoll ; P9LE-LABEL: fromRegsConvdtoll ; P8BE-LABEL: fromRegsConvdtoll ; P8LE-LABEL: fromRegsConvdtoll ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpsxds ; P9BE-NEXT: blr ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds ; P9LE-NEXT: blr ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds ; P8BE-NEXT: blr ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvdtoll() { entry: ret <2 x i64> ; P9BE-LABEL: fromDiffConstsConvdtoll ; P9LE-LABEL: fromDiffConstsConvdtoll ; P8BE-LABEL: fromDiffConstsConvdtoll ; P8LE-LABEL: fromDiffConstsConvdtoll ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvdtoll(double* nocapture readonly %ptr) { entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 %2 = fptosi <2 x double> %1 to <2 x i64> ret <2 x i64> %2 ; P9BE-LABEL: fromDiffMemConsAConvdtoll ; P9LE-LABEL: fromDiffMemConsAConvdtoll ; P8BE-LABEL: fromDiffMemConsAConvdtoll ; P8LE-LABEL: fromDiffMemConsAConvdtoll ; P9BE: lxv ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: lxv ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: lxvd2x ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: lxvd2x ; P8LE: xxswapd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 %conv = fptosi double %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 %1 = load double, double* %arrayidx1, align 8 %conv2 = fptosi double %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemConsDConvdtoll ; P9LE-LABEL: fromDiffMemConsDConvdtoll ; P8BE-LABEL: fromDiffMemConsDConvdtoll ; P8LE-LABEL: fromDiffMemConsDConvdtoll ; P9BE: lxv ; P9BE-NEXT: xxswapd ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: lxv ; P9LE-NEXT: xxswapd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: lxvd2x ; P8BE-NEXT: xxswapd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: lxvd2x ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvdtoll(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptosi double %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptosi double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarAConvdtoll ; P9LE-LABEL: fromDiffMemVarAConvdtoll ; P8BE-LABEL: fromDiffMemVarAConvdtoll ; P8LE-LABEL: fromDiffMemVarAConvdtoll ; P9BE: sldi ; P9BE: lxvx ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxvx ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x ; P8LE-NEXT: xxswapd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptosi double %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptosi double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarDConvdtoll ; P9LE-LABEL: fromDiffMemVarDConvdtoll ; P8BE-LABEL: fromDiffMemVarDConvdtoll ; P8LE-LABEL: fromDiffMemVarDConvdtoll ; P9BE: sldi ; P9BE: lxv ; P9BE-NEXT: xxswapd ; P9BE-NEXT: xvcvdpsxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxv ; P9LE-NEXT: xxswapd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x ; P8BE-NEXT: xxswapd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvdtoll(double %val) { entry: %conv = fptosi double %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltRegValConvdtoll ; P9LE-LABEL: spltRegValConvdtoll ; P8BE-LABEL: spltRegValConvdtoll ; P8LE-LABEL: spltRegValConvdtoll ; P9BE: xscvdpsxds ; P9BE-NEXT: xxspltd v2 ; P9BE-NEXT: blr ; P9LE: xscvdpsxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr ; P8BE: xscvdpsxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr ; P8LE: xscvdpsxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvdtoll(double* nocapture readonly %ptr) { entry: %0 = load double, double* %ptr, align 8 %conv = fptosi double %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltMemValConvdtoll ; P9LE-LABEL: spltMemValConvdtoll ; P8BE-LABEL: spltMemValConvdtoll ; P8LE-LABEL: spltMemValConvdtoll ; P9BE: lxvdsx ; P9BE-NEXT: xvcvdpsxds ; P9BE-NEXT: blr ; P9LE: lxvdsx ; P9LE-NEXT: xvcvdpsxds ; P9LE-NEXT: blr ; P8BE: lxvdsx ; P8BE-NEXT: xvcvdpsxds ; P8BE-NEXT: blr ; P8LE: lxvdsx ; P8LE-NEXT: xvcvdpsxds ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allZeroull() { entry: ret <2 x i64> zeroinitializer ; P9BE-LABEL: allZeroull ; P9LE-LABEL: allZeroull ; P8BE-LABEL: allZeroull ; P8LE-LABEL: allZeroull ; P9BE: xxlxor v2, v2, v2 ; P9BE: blr ; P9LE: xxlxor v2, v2, v2 ; P9LE: blr ; P8BE: xxlxor v2, v2, v2 ; P8BE: blr ; P8LE: xxlxor v2, v2, v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @allOneull() { entry: ret <2 x i64> ; P9BE-LABEL: allOneull ; P9LE-LABEL: allOneull ; P8BE-LABEL: allOneull ; P8LE-LABEL: allOneull ; P9BE: xxspltib v2, 255 ; P9BE: blr ; P9LE: xxspltib v2, 255 ; P9LE: blr ; P8BE: vspltisb v2, -1 ; P8BE: blr ; P8LE: vspltisb v2, -1 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst1ull() { entry: ret <2 x i64> ; P9BE-LABEL: spltConst1ull ; P9LE-LABEL: spltConst1ull ; P8BE-LABEL: spltConst1ull ; P8LE-LABEL: spltConst1ull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst16kull() { entry: ret <2 x i64> ; P9BE-LABEL: spltConst16kull ; P9LE-LABEL: spltConst16kull ; P8BE-LABEL: spltConst16kull ; P8LE-LABEL: spltConst16kull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltConst32kull() { entry: ret <2 x i64> ; P9BE-LABEL: spltConst32kull ; P9LE-LABEL: spltConst32kull ; P8BE-LABEL: spltConst32kull ; P8LE-LABEL: spltConst32kull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsull(i64 %a, i64 %b) { entry: %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 ret <2 x i64> %vecinit1 ; P9BE-LABEL: fromRegsull ; P9LE-LABEL: fromRegsull ; P8BE-LABEL: fromRegsull ; P8LE-LABEL: fromRegsull ; P9BE: mtvsrdd v2, r3, r4 ; P9BE: blr ; P9LE: mtvsrdd v2, r4, r3 ; P9LE: blr ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4 ; P8BE: xxmrghd v2 ; P8BE: blr ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4 ; P8LE: xxmrghd v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsull() { entry: ret <2 x i64> ; P9BE-LABEL: fromDiffConstsull ; P9LE-LABEL: fromDiffConstsull ; P8BE-LABEL: fromDiffConstsull ; P8LE-LABEL: fromDiffConstsull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAull(i64* nocapture readonly %arr) { entry: %0 = load i64, i64* %arr, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1 %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromDiffMemConsAull ; P9LE-LABEL: fromDiffMemConsAull ; P8BE-LABEL: fromDiffMemConsAull ; P8LE-LABEL: fromDiffMemConsAull ; P9BE: lxv v2 ; P9BE: blr ; P9LE: lxv v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr ; P8LE: lxvd2x ; P8LE: xxswapd v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3 %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2 %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromDiffMemConsDull ; P9LE-LABEL: fromDiffMemConsDull ; P8BE-LABEL: fromDiffMemConsDull ; P8LE-LABEL: fromDiffMemConsDull ; P9BE: lxv v2 ; P9BE: blr ; P9LE: lxv ; P9LE: xxswapd v2 ; P9LE: blr ; P8BE: lxvd2x ; P8BE: xxswapd v2 ; P8BE-NEXT: blr ; P8LE: lxvd2x v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAull(i64* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemVarAull ; P9LE-LABEL: fromDiffMemVarAull ; P8BE-LABEL: fromDiffMemVarAull ; P8LE-LABEL: fromDiffMemVarAull ; P9BE: sldi ; P9BE: lxvx v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxvx v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x ; P8LE: xxswapd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1 %1 = load i64, i64* %arrayidx2, align 8 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemVarDull ; P9LE-LABEL: fromDiffMemVarDull ; P8BE-LABEL: fromDiffMemVarDull ; P8LE-LABEL: fromDiffMemVarDull ; P9BE: sldi ; P9BE: lxv ; P9BE: xxswapd v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxv ; P9LE: xxswapd v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x ; P8BE: xxswapd v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) { entry: %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4 %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18 %1 = load i64, i64* %arrayidx1, align 8 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromRandMemConsull ; P9LE-LABEL: fromRandMemConsull ; P8BE-LABEL: fromRandMemConsull ; P8LE-LABEL: fromRandMemConsull ; P9BE: ld ; P9BE: ld ; P9BE: mtvsrdd v2 ; P9BE-NEXT: blr ; P9LE: ld ; P9LE: ld ; P9LE: mtvsrdd v2 ; P9LE-NEXT: blr ; P8BE: ld ; P8BE: ld ; P8BE-DAG: mtvsrd ; P8BE-DAG: mtvsrd ; P8BE: xxmrghd v2 ; P8BE-NEXT: blr ; P8LE: ld ; P8LE: ld ; P8LE-DAG: mtvsrd ; P8LE-DAG: mtvsrd ; P8LE: xxmrghd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %elem) { entry: %add = add nsw i32 %elem, 4 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom %0 = load i64, i64* %arrayidx, align 8 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0 %add1 = add nsw i32 %elem, 1 %idxprom2 = sext i32 %add1 to i64 %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2 %1 = load i64, i64* %arrayidx3, align 8 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromRandMemVarull ; P9LE-LABEL: fromRandMemVarull ; P8BE-LABEL: fromRandMemVarull ; P8LE-LABEL: fromRandMemVarull ; P9BE: sldi ; P9BE: ld ; P9BE: ld ; P9BE: mtvsrdd v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: ld ; P9LE: ld ; P9LE: mtvsrdd v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: ld ; P8BE: ld ; P8BE: mtvsrd ; P8BE: mtvsrd ; P8BE: xxmrghd v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: ld ; P8LE: ld ; P8LE: mtvsrd ; P8LE: mtvsrd ; P8LE: xxmrghd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValull(i64 %val) { entry: %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltRegValull ; P9LE-LABEL: spltRegValull ; P8BE-LABEL: spltRegValull ; P8LE-LABEL: spltRegValull ; P9BE: mtvsrdd v2, r3, r3 ; P9BE-NEXT: blr ; P9LE: mtvsrdd v2, r3, r3 ; P9LE-NEXT: blr ; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 ; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 ; P8BE-NEXT: blr ; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3 ; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValull(i64* nocapture readonly %ptr) { entry: %0 = load i64, i64* %ptr, align 8 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltMemValull ; P9LE-LABEL: spltMemValull ; P8BE-LABEL: spltMemValull ; P8LE-LABEL: spltMemValull ; P9BE: lxvdsx v2 ; P9BE-NEXT: blr ; P9LE: lxvdsx v2 ; P9LE-NEXT: blr ; P8BE: lxvdsx v2 ; P8BE-NEXT: blr ; P8LE: lxvdsx v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvftoull() { entry: ret <2 x i64> ; P9BE-LABEL: spltCnstConvftoull ; P9LE-LABEL: spltCnstConvftoull ; P8BE-LABEL: spltCnstConvftoull ; P8LE-LABEL: spltCnstConvftoull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvftoull(float %a, float %b) { entry: %conv = fptoui float %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptoui float %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromRegsConvftoull ; P9LE-LABEL: fromRegsConvftoull ; P8BE-LABEL: fromRegsConvftoull ; P8LE-LABEL: fromRegsConvftoull ; P9BE: xxmrghd ; P9BE: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: xxmrghd ; P9LE: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: xxmrghd ; P8BE: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: xxmrghd ; P8LE: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvftoull() { entry: ret <2 x i64> ; P9BE-LABEL: fromDiffConstsConvftoull ; P9LE-LABEL: fromDiffConstsConvftoull ; P8BE-LABEL: fromDiffConstsConvftoull ; P8LE-LABEL: fromDiffConstsConvftoull ; P9BE: lxvx v2 ; P9BE: blr ; P9LE: lxvx v2 ; P9LE: blr ; P8BE: lxvd2x v2 ; P8BE: blr ; P8LE: lxvd2x ; P8LE: xxswapd v2 ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvftoull(float* nocapture readonly %ptr) { entry: %0 = load float, float* %ptr, align 4 %conv = fptoui float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1 %1 = load float, float* %arrayidx1, align 4 %conv2 = fptoui float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemConsAConvftoull ; P9LE-LABEL: fromDiffMemConsAConvftoull ; P8BE-LABEL: fromDiffMemConsAConvftoull ; P8LE-LABEL: fromDiffMemConsAConvftoull ; P9BE: lfs ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: lfs ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: lfs ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: lfs ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvftoull(float* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 %0 = load float, float* %arrayidx, align 4 %conv = fptoui float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 %1 = load float, float* %arrayidx1, align 4 %conv2 = fptoui float %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemConsDConvftoull ; P9LE-LABEL: fromDiffMemConsDConvftoull ; P8BE-LABEL: fromDiffMemConsDConvftoull ; P8LE-LABEL: fromDiffMemConsDConvftoull ; P9BE: lfs ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: lfs ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: lfs ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: lfs ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvftoull(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptoui float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptoui float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarAConvftoull ; P9LE-LABEL: fromDiffMemVarAConvftoull ; P8BE-LABEL: fromDiffMemVarAConvftoull ; P8LE-LABEL: fromDiffMemVarAConvftoull ; P9BE: sldi ; P9BE: lfsux ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lfsux ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvftoull(float* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom %0 = load float, float* %arrayidx, align 4 %conv = fptoui float %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1 %1 = load float, float* %arrayidx2, align 4 %conv3 = fptoui float %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarDConvftoull ; P9LE-LABEL: fromDiffMemVarDConvftoull ; P8BE-LABEL: fromDiffMemVarDConvftoull ; P8LE-LABEL: fromDiffMemVarDConvftoull ; P9BE: sldi ; P9BE: lfsux ; P9BE: lfs ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lfsux ; P9LE: lfs ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux ; P8BE: lfs ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux ; P8LE: lfs ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvftoull(float %val) { entry: %conv = fptoui float %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltRegValConvftoull ; P9LE-LABEL: spltRegValConvftoull ; P8BE-LABEL: spltRegValConvftoull ; P8LE-LABEL: spltRegValConvftoull ; P9BE: xscvdpuxds ; P9BE-NEXT: xxspltd v2 ; P9BE-NEXT: blr ; P9LE: xscvdpuxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr ; P8BE: xscvdpuxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr ; P8LE: xscvdpuxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) { entry: %0 = load float, float* %ptr, align 4 %conv = fptoui float %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltMemValConvftoull ; P9LE-LABEL: spltMemValConvftoull ; P8BE-LABEL: spltMemValConvftoull ; P8LE-LABEL: spltMemValConvftoull ; P9BE: lfs ; P9BE-NEXT: xscvdpuxds ; P9BE-NEXT: xxspltd v2 ; P9BE-NEXT: blr ; P9LE: lfs ; P9LE-NEXT: xscvdpuxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr ; P8BE: lfs ; P8BE-NEXT: xscvdpuxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr ; P8LE: lfs ; P8LE-NEXT: xscvdpuxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltCnstConvdtoull() { entry: ret <2 x i64> ; P9BE-LABEL: spltCnstConvdtoull ; P9LE-LABEL: spltCnstConvdtoull ; P8BE-LABEL: spltCnstConvdtoull ; P8LE-LABEL: spltCnstConvdtoull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromRegsConvdtoull(double %a, double %b) { entry: %conv = fptoui double %a to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %conv1 = fptoui double %b to i64 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1 ret <2 x i64> %vecinit2 ; P9BE-LABEL: fromRegsConvdtoull ; P9LE-LABEL: fromRegsConvdtoull ; P8BE-LABEL: fromRegsConvdtoull ; P8LE-LABEL: fromRegsConvdtoull ; P9BE: xxmrghd ; P9BE-NEXT: xvcvdpuxds ; P9BE-NEXT: blr ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds ; P9LE-NEXT: blr ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds ; P8BE-NEXT: blr ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @fromDiffConstsConvdtoull() { entry: ret <2 x i64> ; P9BE-LABEL: fromDiffConstsConvdtoull ; P9LE-LABEL: fromDiffConstsConvdtoull ; P8BE-LABEL: fromDiffConstsConvdtoull ; P8LE-LABEL: fromDiffConstsConvdtoull ; P9BE: lxv ; P9BE: blr ; P9LE: lxv ; P9LE: blr ; P8BE: lxvd2x ; P8BE: blr ; P8LE: lxvd2x ; P8LE: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsAConvdtoull(double* nocapture readonly %ptr) { entry: %0 = bitcast double* %ptr to <2 x double>* %1 = load <2 x double>, <2 x double>* %0, align 8 %2 = fptoui <2 x double> %1 to <2 x i64> ret <2 x i64> %2 ; P9BE-LABEL: fromDiffMemConsAConvdtoull ; P9LE-LABEL: fromDiffMemConsAConvdtoull ; P8BE-LABEL: fromDiffMemConsAConvdtoull ; P8LE-LABEL: fromDiffMemConsAConvdtoull ; P9BE: lxv ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: lxv ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: lxvd2x ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: lxvd2x ; P8LE: xxswapd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) { entry: %arrayidx = getelementptr inbounds double, double* %ptr, i64 3 %0 = load double, double* %arrayidx, align 8 %conv = fptoui double %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2 %1 = load double, double* %arrayidx1, align 8 %conv2 = fptoui double %1 to i64 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 ret <2 x i64> %vecinit3 ; P9BE-LABEL: fromDiffMemConsDConvdtoull ; P9LE-LABEL: fromDiffMemConsDConvdtoull ; P8BE-LABEL: fromDiffMemConsDConvdtoull ; P8LE-LABEL: fromDiffMemConsDConvdtoull ; P9BE: lxv ; P9BE-NEXT: xxswapd ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: lxv ; P9LE-NEXT: xxswapd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: lxvd2x ; P8BE-NEXT: xxswapd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: lxvd2x ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarAConvdtoull(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptoui double %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %add = add nsw i32 %elem, 1 %idxprom1 = sext i32 %add to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptoui double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarAConvdtoull ; P9LE-LABEL: fromDiffMemVarAConvdtoull ; P8BE-LABEL: fromDiffMemVarAConvdtoull ; P8LE-LABEL: fromDiffMemVarAConvdtoull ; P9BE: sldi ; P9BE: lxvx ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxvx ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x ; P8LE-NEXT: xxswapd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 signext %elem) { entry: %idxprom = sext i32 %elem to i64 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom %0 = load double, double* %arrayidx, align 8 %conv = fptoui double %0 to i64 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 %sub = add nsw i32 %elem, -1 %idxprom1 = sext i32 %sub to i64 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1 %1 = load double, double* %arrayidx2, align 8 %conv3 = fptoui double %1 to i64 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1 ret <2 x i64> %vecinit4 ; P9BE-LABEL: fromDiffMemVarDConvdtoull ; P9LE-LABEL: fromDiffMemVarDConvdtoull ; P8BE-LABEL: fromDiffMemVarDConvdtoull ; P8LE-LABEL: fromDiffMemVarDConvdtoull ; P9BE: sldi ; P9BE: lxv ; P9BE-NEXT: xxswapd ; P9BE-NEXT: xvcvdpuxds v2 ; P9BE-NEXT: blr ; P9LE: sldi ; P9LE: lxv ; P9LE-NEXT: xxswapd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lxvd2x ; P8BE-NEXT: xxswapd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lxvd2x ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @spltRegValConvdtoull(double %val) { entry: %conv = fptoui double %val to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltRegValConvdtoull ; P9LE-LABEL: spltRegValConvdtoull ; P8BE-LABEL: spltRegValConvdtoull ; P8LE-LABEL: spltRegValConvdtoull ; P9BE: xscvdpuxds ; P9BE-NEXT: xxspltd v2 ; P9BE-NEXT: blr ; P9LE: xscvdpuxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr ; P8BE: xscvdpuxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr ; P8LE: xscvdpuxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr } ; Function Attrs: norecurse nounwind readonly define <2 x i64> @spltMemValConvdtoull(double* nocapture readonly %ptr) { entry: %0 = load double, double* %ptr, align 8 %conv = fptoui double %0 to i64 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat ; P9BE-LABEL: spltMemValConvdtoull ; P9LE-LABEL: spltMemValConvdtoull ; P8BE-LABEL: spltMemValConvdtoull ; P8LE-LABEL: spltMemValConvdtoull ; P9BE: lxvdsx ; P9BE-NEXT: xvcvdpuxds ; P9BE-NEXT: blr ; P9LE: lxvdsx ; P9LE-NEXT: xvcvdpuxds ; P9LE-NEXT: blr ; P8BE: lxvdsx ; P8BE-NEXT: xvcvdpuxds ; P8BE-NEXT: blr ; P8LE: lxvdsx ; P8LE-NEXT: xvcvdpuxds ; P8LE-NEXT: blr } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/load-v4i8-improved.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/load-v4i8-improved.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/load-v4i8-improved.ll (revision 341365) @@ -1,15 +1,27 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s define <16 x i8> @test(i32* %s, i32* %t) { +; CHECK-LE-LABEL: test: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-LE-NEXT: xxspltw v2, vs0, 3 +; CHECK-LE-NEXT: blr + +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-NEXT: xxspltw v2, vs0, 0 +; CHECK-NEXT: blr entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> ret <16 x i8> %2 -; CHECK-LABEL: test -; CHECK: lxsiwax 34, 0, 3 -; CHECK: xxspltw 34, 34, 1 } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/power9-moves-and-splats.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/power9-moves-and-splats.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/power9-moves-and-splats.ll (revision 341365) @@ -1,178 +1,270 @@ -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-BE +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=CHECK-BE @Globi = external global i32, align 4 @Globf = external global float, align 4 define <2 x i64> @test1(i64 %a, i64 %b) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd v2, r4, r3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 +; CHECK-BE-NEXT: blr entry: ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp ; which will happen in a subsequent patch. -; CHECK-LABEL: test1 -; CHECK: mtvsrdd 34, 4, 3 -; CHECK-BE-LABEL: test1 -; CHECK-BE: mtvsrdd 34, 3, 4 %vecins = insertelement <2 x i64> undef, i64 %a, i32 0 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1 ret <2 x i64> %vecins1 } define i64 @test2(<2 x i64> %a) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrld r3, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mfvsrd r3, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test2 -; CHECK: mfvsrld 3, 34 %0 = extractelement <2 x i64> %a, i32 0 ret i64 %0 } define i64 @test3(<2 x i64> %a) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mfvsrld r3, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-BE-LABEL: test3 -; CHECK-BE: mfvsrld 3, 34 %0 = extractelement <2 x i64> %a, i32 1 ret i64 %0 } define <4 x i32> @test4(i32* nocapture readonly %in) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test4 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test4 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load i32, i32* %in, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat } define <4 x float> @test5(float* nocapture readonly %in) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test5 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test5 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load float, float* %in, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %splat.splat } define <4 x i32> @test6() { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test6 -; CHECK: addis -; CHECK: ld [[TOC:[0-9]+]], .LC0 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test6 -; CHECK-BE: addis -; CHECK-BE: ld [[TOC:[0-9]+]], .LC0 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load i32, i32* @Globi, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat } define <4 x float> @test7() { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test7 -; CHECK: addis -; CHECK: ld [[TOC:[0-9]+]], .LC1 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test7 -; CHECK-BE: addis -; CHECK-BE: ld [[TOC:[0-9]+]], .LC1 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load float, float* @Globf, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %splat.splat } define <16 x i8> @test8() { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v2, v2, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlxor v2, v2, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test8 -; CHECK: xxlxor 34, 34, 34 -; CHECK-BE-LABEL: test8 -; CHECK-BE: xxlxor 34, 34, 34 ret <16 x i8> zeroinitializer } define <16 x i8> @test9() { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 1 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test9: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 1 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test9 -; CHECK: xxspltib 34, 1 -; CHECK-BE-LABEL: test9 -; CHECK-BE: xxspltib 34, 1 ret <16 x i8> } define <16 x i8> @test10() { +; CHECK-LABEL: test10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 127 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test10: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 127 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test10 -; CHECK: xxspltib 34, 127 -; CHECK-BE-LABEL: test10 -; CHECK-BE: xxspltib 34, 127 ret <16 x i8> } define <16 x i8> @test11() { +; CHECK-LABEL: test11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 128 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test11: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 128 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test11 -; CHECK: xxspltib 34, 128 -; CHECK-BE-LABEL: test11 -; CHECK-BE: xxspltib 34, 128 ret <16 x i8> } define <16 x i8> @test12() { +; CHECK-LABEL: test12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 255 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test12: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 255 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test12 -; CHECK: xxspltib 34, 255 -; CHECK-BE-LABEL: test12 -; CHECK-BE: xxspltib 34, 255 ret <16 x i8> } define <16 x i8> @test13() { +; CHECK-LABEL: test13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 129 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test13: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 129 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test13 -; CHECK: xxspltib 34, 129 -; CHECK-BE-LABEL: test13 -; CHECK-BE: xxspltib 34, 129 ret <16 x i8> } define <16 x i8> @test13E127() { +; CHECK-LABEL: test13E127: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 200 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test13E127: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 200 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test13E127 -; CHECK: xxspltib 34, 200 -; CHECK-BE-LABEL: test13E127 -; CHECK-BE: xxspltib 34, 200 ret <16 x i8> } define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) { +; CHECK-LABEL: test14: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz r3, 0(r5) +; CHECK-NEXT: mtvsrws v2, r3 +; CHECK-NEXT: addi r3, r3, 5 +; CHECK-NEXT: stw r3, 0(r5) +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test14: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lwz r3, 0(r5) +; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: addi r3, r3, 5 +; CHECK-BE-NEXT: stw r3, 0(r5) +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test14 -; CHECK: lwz [[LD:[0-9]+]], -; CHECK: mtvsrws 34, [[LD]] -; CHECK-BE-LABEL: test14 -; CHECK-BE: lwz [[LD:[0-9]+]], -; CHECK-BE: mtvsrws 34, [[LD]] %0 = load i32, i32* %b, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %1 = add i32 %0, 5 store i32 %1, i32* %b, align 4 ret <4 x i32> %splat.splat } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/pr38087.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/pr38087.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/pr38087.ll (revision 341365) @@ -1,56 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names < %s | \ ; RUN: FileCheck %s ; Function Attrs: nounwind readnone speculatable declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 ; Function Attrs: nounwind readnone speculatable declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 define void @draw_llvm_vs_variant0() { ; CHECK-LABEL: draw_llvm_vs_variant0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ldx r3, 0, r3 -; CHECK-NEXT: mtvsrd f0, r3 -; CHECK-NEXT: xxswapd v2, vs0 +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: xxpermdi v2, f0, f0, 2 ; CHECK-NEXT: vmrglh v2, v2, v2 ; CHECK-NEXT: vextsh2w v2, v2 ; CHECK-NEXT: xvcvsxwsp vs0, v2 ; CHECK-NEXT: xxspltw vs0, vs0, 2 ; CHECK-NEXT: xvmaddasp vs0, vs0, vs0 ; CHECK-NEXT: stxvx vs0, 0, r3 ; CHECK-NEXT: blr entry: %.size = load i32, i32* undef %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %.size, i32 7) %1 = extractvalue { i32, i1 } %0, 0 %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %1, i32 0) %3 = extractvalue { i32, i1 } %2, 0 %4 = select i1 false, i32 0, i32 %3 %5 = xor i1 false, true %6 = sext i1 %5 to i32 %7 = load <4 x i16>, <4 x i16>* undef, align 2 %8 = extractelement <4 x i16> %7, i32 0 %9 = sext i16 %8 to i32 %10 = insertelement <4 x i32> undef, i32 %9, i32 0 %11 = extractelement <4 x i16> %7, i32 1 %12 = sext i16 %11 to i32 %13 = insertelement <4 x i32> %10, i32 %12, i32 1 %14 = extractelement <4 x i16> %7, i32 2 %15 = sext i16 %14 to i32 %16 = insertelement <4 x i32> %13, i32 %15, i32 2 %17 = extractelement <4 x i16> %7, i32 3 %18 = sext i16 %17 to i32 %19 = insertelement <4 x i32> %16, i32 %18, i32 3 %20 = sitofp <4 x i32> %19 to <4 x float> %21 = insertelement <4 x i32> undef, i32 %6, i32 0 %22 = shufflevector <4 x i32> %21, <4 x i32> undef, <4 x i32> zeroinitializer %23 = bitcast <4 x float> %20 to <4 x i32> %24 = and <4 x i32> %23, %22 %25 = bitcast <4 x i32> %24 to <4 x float> %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> %27 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %26) store <4 x float> %27, <4 x float>* undef ret void } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/qpx-load-splat.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/qpx-load-splat.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/qpx-load-splat.ll (revision 341365) @@ -1,75 +1,81 @@ -; RUN: llc -verify-machineinstrs < %s | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-bgq-linux" +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s ; Function Attrs: norecurse nounwind readonly define <4 x double> @foo(double* nocapture readonly %a) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvdsx v2, 0, r3 +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %0 = load double, double* %a, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foo -; CHECK: lfd 1, 0(3) -; CHECK: blr } define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 { +; CHECK-LABEL: foox: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: lxvdsx v2, r3, r4 +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %p = getelementptr double, double* %a, i64 %idx %0 = load double, double* %p, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foox -; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdx 1, 3, [[REG1]] -; CHECK: blr } define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 { +; CHECK-LABEL: fooxu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: lfdux f0, r3, r4 +; CHECK-NEXT: xxspltd v2, vs0, 0 +; CHECK-NEXT: std r3, 0(r5) +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %p = getelementptr double, double* %a, i64 %idx %0 = load double, double* %p, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer store double* %p, double** %pptr, align 8 ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foox -; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdux 1, 3, [[REG1]] -; CHECK: std 3, 0(5) -; CHECK: blr } define <4 x float> @foof(float* nocapture readonly %a) #0 { +; CHECK-LABEL: foof: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr entry: %0 = load float, float* %a, align 4 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %shuffle.i - -; CHECK-LABEL: @foof -; CHECK: lfs 1, 0(3) -; CHECK: blr } define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 { +; CHECK-LABEL: foofx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 2 +; CHECK-NEXT: lfiwzx f0, r3, r4 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr entry: %p = getelementptr float, float* %a, i64 %idx %0 = load float, float* %p, align 4 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %shuffle.i - -; CHECK-LABEL: @foofx -; CHECK: sldi [[REG1:[0-9]+]], 4, 2 -; CHECK: lfsx 1, 3, [[REG1]] -; CHECK: blr } -attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" } Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_1.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_1.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_1.ll (revision 341365) @@ -0,0 +1,292 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %0 = load i64, i64* %int64, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 3 +; P9LE-NEXT: lfdx f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test3 +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 3 +; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r5) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %0 = load i64, i64* %ptr1, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load double, double* %f64, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 8 +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 3 +; P9LE-NEXT: lfdx f0, r3, r4 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 3 +; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 3 +; P8LE-NEXT: lfdx f0, r3, r4 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 3 +; P8BE-NEXT: lfdx f0, r3, r4 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 8 +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r5) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r5 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f0, 0, r5 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load double, double* %ptr1, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_2.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_2.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_2.ll (revision 341365) @@ -0,0 +1,118 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9LE-LABEL: test_liwzx1: +; P9LE: # %bb.0: +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lfiwzx f1, 0, r4 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P9LE-NEXT: xvaddsp vs0, vs0, vs1 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: blr + +; P9BE-LABEL: test_liwzx1: +; P9BE: # %bb.0: +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: lfiwzx f1, 0, r4 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P9BE-NEXT: xvaddsp vs0, vs0, vs1 +; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: blr + +; P8LE-LABEL: test_liwzx1: +; P8LE: # %bb.0: +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P8LE-NEXT: xvaddsp vs0, vs0, vs1 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8LE-NEXT: xscvspdpn f0, vs0 +; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: blr + +; P8BE-LABEL: test_liwzx1: +; P8BE: # %bb.0: +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P8BE-NEXT: xvaddsp vs0, vs0, vs1 +; P8BE-NEXT: xscvspdpn f0, vs0 +; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fadd <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret void +} + +define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9LE-LABEL: test_liwzx2: +; P9LE: # %bb.0: +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lfiwzx f1, 0, r4 +; P9LE-NEXT: mr r3, r5 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P9LE-NEXT: xvsubsp vs0, vs0, vs1 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: blr + +; P9BE-LABEL: test_liwzx2: +; P9BE: # %bb.0: +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: lfiwzx f1, 0, r4 +; P9BE-NEXT: mr r3, r5 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P9BE-NEXT: xvsubsp vs0, vs0, vs1 +; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: blr + +; P8LE-LABEL: test_liwzx2: +; P8LE: # %bb.0: +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: mr r3, r5 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P8LE-NEXT: xvsubsp vs0, vs0, vs1 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8LE-NEXT: xscvspdpn f0, vs0 +; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: blr + +; P8BE-LABEL: test_liwzx2: +; P8BE: # %bb.0: +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: mr r3, r5 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P8BE-NEXT: xvsubsp vs0, vs0, vs1 +; P8BE-NEXT: xscvspdpn f0, vs0 +; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fsub <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret <1 x float>* %C +} Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_3.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_3.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_3.ll (revision 341365) @@ -0,0 +1,265 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %int32, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addi r3, r3, 4 +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 2 +; P9LE-NEXT: lfiwax f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 2 +; P9BE-NEXT: lfiwax f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: lfiwax f0, r3, r4 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 2 +; P8BE-NEXT: lfiwax f0, r3, r4 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addi r3, r3, 4 +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r5 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r5 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r5 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r5 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test6: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v2, f0, f0, 2 +; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test6: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test6: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v2, f0, f0, 2 +; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test6: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test7: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v2, f0, f0, 2 +; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test7: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test7: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v2, f0, f0, 2 +; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test7: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_4.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_4.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/scalar_vector_test_4.ll (revision 341365) @@ -0,0 +1,341 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test1: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %int32, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test2: +; P8BE: # %bb.0: # %entry +; P8BE: addi r3, r3, 4 +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { +; P8LE-LABEL: s2v_test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r5, r7, 2 +; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; P8LE-NEXT: lfiwzx f0, r3, r5 +; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test3: +; P8BE: # %bb.0: # %entry +; P8BE: sldi r4, r7, 2 +; P8BE: lfiwzx f0, r3, r4 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test4: +; P8BE: # %bb.0: # %entry +; P8BE: addi r3, r3, 4 +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { +; P8LE-LABEL: s2v_test5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r5 +; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test5: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { +; P8LE-LABEL: s2v_test_f1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f1: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load float, float* %f64, align 4 + %vecins = insertelement <4 x float> %vec, float %0, i32 0 + ret <4 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) { +; P9LE-LABEL: s2v_test_f2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f2: +; P9BE: # %bb.0: # %entry +; P9BE: addi r3, r3, 4 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 2 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f3: +; P9BE: # %bb.0: # %entry +; P9BE: sldi r4, r7, 2 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, r3, r4 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, r3, r4 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 2 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, r3, r4 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) { +; P9LE-LABEL: s2v_test_f4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f4: +; P9BE: # %bb.0: # %entry +; P9BE: addi r3, r3, 4 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwzx f0, 0, r5 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f5: +; P9BE: # %bb.0: # %entry +; P9BE: lfiwzx f0, 0, r5 +; P9BE: xxspltw v2, v2, 1 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r5 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwzx f0, 0, r5 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %0 = load float, float* %ptr1, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/swaps-le-6.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/swaps-le-6.ll (revision 341365) @@ -1,64 +1,89 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -O3 < %s | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \ ; RUN: --implicit-check-not xxswapd ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mattr=-power9-vector < %s | FileCheck %s ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. @x = global <2 x double> , align 16 @z = global <2 x double> , align 16 @y = global double 1.780000e+00, align 8 define void @bar0() { +; CHECK-LABEL: bar0: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxpermdi vs0, vs0, vs1, 1 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar0: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxpermdi vs0, vs1, vs0, 1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 %vecins = insertelement <2 x double> %0, double %1, i32 0 store <2 x double> %vecins, <2 x double>* @z, align 16 ret void } -; CHECK-LABEL: @bar0 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lfdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar0 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1 -; CHECK-P9: stxvx [[REG5]] - define void @bar1() { +; CHECK-LABEL: bar1: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxmrghd vs0, vs1, vs0 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar1: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxmrgld vs0, vs0, vs1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 %vecins = insertelement <2 x double> %0, double %1, i32 1 store <2 x double> %vecins, <2 x double>* @z, align 16 ret void } - -; CHECK-LABEL: @bar1 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lfdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar1 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK-P9: stxvx [[REG5]] Index: vendor/llvm/dist-release_70/test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/PowerPC/vsx_insert_extract_le.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/PowerPC/vsx_insert_extract_le.ll (revision 341365) @@ -1,74 +1,125 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9-VECTOR %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { +; CHECK-LABEL: testi0: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: lfdx f1, 0, r4 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: xxspltd vs1, vs1, 0 +; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-NEXT: blr +; +; CHECK-P9-VECTOR-LABEL: testi0: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4 +; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0 +; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0 +; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-P9-VECTOR-NEXT: blr +; +; CHECK-P9-LABEL: testi0: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1 +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 %r = insertelement <2 x double> %v, double %s, i32 0 ret <2 x double> %r -; CHECK-LABEL: testi0 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: lfdx 1, 0, 4 -; CHECK-DAG: xxspltd 1, 1, 0 -; CHECK-DAG: xxswapd 0, 0 -; CHECK: xxpermdi 34, 0, 1, 1 -; CHECK-P9-LABEL: testi0 -; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) -; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { +; CHECK-LABEL: testi1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: lfdx f1, 0, r4 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: xxspltd vs1, vs1, 0 +; CHECK-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-NEXT: blr +; +; CHECK-P9-VECTOR-LABEL: testi1: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4 +; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0 +; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0 +; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P9-VECTOR-NEXT: blr +; +; CHECK-P9-LABEL: testi1: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 %r = insertelement <2 x double> %v, double %s, i32 1 ret <2 x double> %r -; CHECK-LABEL: testi1 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: lfdx 1, 0, 4 -; CHECK-DAG: xxspltd 1, 1, 0 -; CHECK-DAG: xxswapd 0, 0 -; CHECK: xxmrgld 34, 1, 0 -; CHECK-P9-LABEL: testi1 -; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) -; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]] } define double @teste0(<2 x double>* %p1) { +; CHECK-LABEL: teste0: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK: blr +; +; CHECK-P9-VECTOR-LABEL: teste0: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P9-VECTOR: blr +; +; CHECK-P9-LABEL: teste0: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f1, 0(r3) +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %r = extractelement <2 x double> %v, i32 0 ret double %r -; CHECK-LABEL: teste0 -; CHECK: lxvd2x 1, 0, 3 -; CHECK-P9-LABEL: teste0 -; CHECK-P9: lfd 1, 0(3) } define double @teste1(<2 x double>* %p1) { +; CHECK-LABEL: teste1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd vs1, vs0 +; CHECK: blr +; +; CHECK-P9-VECTOR-LABEL: teste1: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0 +; CHECK-P9-VECTOR: blr +; +; CHECK-P9-LABEL: teste1: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f1, 8(r3) +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %r = extractelement <2 x double> %v, i32 1 ret double %r -; CHECK-LABEL: teste1 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: xxswapd 1, 0 -; CHECK-P9-LABEL: teste1 -; CHECK-P9: lfd 1, 8(3) } Index: vendor/llvm/dist-release_70/test/CodeGen/X86/mingw-comdats.ll =================================================================== --- vendor/llvm/dist-release_70/test/CodeGen/X86/mingw-comdats.ll (revision 341364) +++ vendor/llvm/dist-release_70/test/CodeGen/X86/mingw-comdats.ll (revision 341365) @@ -1,70 +1,87 @@ -; RUN: llc -mtriple=x86_64-windows-itanium < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU -; RUN: llc -mtriple=i686-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU32 -; RUN: llc -mtriple=x86_64-w64-windows-gnu < %s -filetype=obj | llvm-objdump - -headers | FileCheck %s --check-prefix=GNUOBJ +; RUN: llc -function-sections -mtriple=x86_64-windows-itanium < %s | FileCheck %s +; RUN: llc -function-sections -mtriple=x86_64-windows-msvc < %s | FileCheck %s +; RUN: llc -function-sections -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU +; RUN: llc -function-sections -mtriple=i686-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU32 +; RUN: llc -function-sections -mtriple=x86_64-w64-windows-gnu < %s -filetype=obj | llvm-objdump - -headers | FileCheck %s --check-prefix=GNUOBJ ; GCC and MSVC handle comdats completely differently. Make sure we do the right ; thing for each. -; Generated with this C++ source: +; Modeled on this C++ source, with additional modifications for +; -ffunction-sections: ; int bar(int); ; __declspec(selectany) int gv = 42; ; inline int foo(int x) { return bar(x) + gv; } ; int main() { return foo(1); } $_Z3fooi = comdat any $gv = comdat any @gv = weak_odr dso_local global i32 42, comdat, align 4 ; Function Attrs: norecurse uwtable define dso_local i32 @main() #0 { entry: %call = tail call i32 @_Z3fooi(i32 1) ret i32 %call } +; CHECK: .section .text,"xr",one_only,main ; CHECK: main: +; GNU: .section .text$main,"xr",one_only,main ; GNU: main: +; GNU32: .section .text$main,"xr",one_only,_main +; GNU32: _main: +define dso_local x86_fastcallcc i32 @fastcall(i32 %x, i32 %y) { + %rv = add i32 %x, %y + ret i32 %rv +} + +; CHECK: .section .text,"xr",one_only,fastcall +; CHECK: fastcall: +; GNU: .section .text$fastcall,"xr",one_only,fastcall +; GNU: fastcall: +; GNU32: .section .text$fastcall,"xr",one_only,@fastcall@8 +; GNU32: @fastcall@8: + ; Function Attrs: inlinehint uwtable define linkonce_odr dso_local i32 @_Z3fooi(i32 %x) #1 comdat { entry: %call = tail call i32 @_Z3bari(i32 %x) %0 = load i32, i32* @gv, align 4 %add = add nsw i32 %0, %call ret i32 %add } ; CHECK: .section .text,"xr",discard,_Z3fooi ; CHECK: _Z3fooi: ; CHECK: .section .data,"dw",discard,gv ; CHECK: gv: ; CHECK: .long 42 ; GNU: .section .text$_Z3fooi,"xr",discard,_Z3fooi ; GNU: _Z3fooi: ; GNU: .section .data$gv,"dw",discard,gv ; GNU: gv: ; GNU: .long 42 -; GNU32: .section .text$__Z3fooi,"xr",discard,__Z3fooi +; GNU32: .section .text$_Z3fooi,"xr",discard,__Z3fooi ; GNU32: __Z3fooi: -; GNU32: .section .data$_gv,"dw",discard,_gv +; GNU32: .section .data$gv,"dw",discard,_gv ; GNU32: _gv: ; GNU32: .long 42 ; Make sure the assembler puts the .xdata and .pdata in sections with the right ; names. ; GNUOBJ: .text$_Z3fooi ; GNUOBJ: .xdata$_Z3fooi ; GNUOBJ: .data$gv ; GNUOBJ: .pdata$_Z3fooi declare dso_local i32 @_Z3bari(i32) attributes #0 = { norecurse uwtable } attributes #1 = { inlinehint uwtable } Index: vendor/llvm/dist-release_70/test/DebugInfo/Mips/eh_frame.ll =================================================================== --- vendor/llvm/dist-release_70/test/DebugInfo/Mips/eh_frame.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/DebugInfo/Mips/eh_frame.ll (revision 341365) @@ -0,0 +1,38 @@ +; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -O3 -filetype=obj -o - %s | llvm-readelf -r | FileCheck %s + +; CHECK: .rel.eh_frame +; CHECK: DW.ref.__gxx_personality_v0 +; CHECK-NEXT: .text +; CHECK-NEXT: .gcc_except_table + +@_ZTIi = external constant i8* + +define dso_local i32 @main() local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind + %0 = bitcast i8* %exception.i to i32* + store i32 5, i32* %0, align 16 + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn + to label %.noexc unwind label %return + +.noexc: + unreachable + +return: + %1 = landingpad { i8*, i32 } + catch i8* null + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind + tail call void @__cxa_end_catch() + ret i32 0 +} + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +declare i8* @__cxa_allocate_exception(i32) local_unnamed_addr + +declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr Index: vendor/llvm/dist-release_70/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll =================================================================== --- vendor/llvm/dist-release_70/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll (nonexistent) +++ vendor/llvm/dist-release_70/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll (revision 341365) @@ -0,0 +1,69 @@ +; RUN: opt -S -lcssa < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Reproducer for PR39019. +; +; Verify that the llvm.dbg.value in the %for.cond.cleanup2 block is rewritten +; to use the PHI node for %add that is created by LCSSA. + +; CHECK-LABEL: for.cond.cleanup2: +; CHECK-NEXT: [[PN:%[^ ]*]] = phi i32 [ %add.lcssa, %for.cond.cleanup1 ] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR:![0-9]+]], metadata !DIExpression()) +; CHECK-NEXT: call void @bar(i32 [[PN]]) + +; CHECK-LABEL: for.body: +; CHECK: %add = add nsw i32 0, 2 +; CHECK: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR]], metadata !DIExpression()) + +; CHECK: [[VAR]] = !DILocalVariable(name: "sum", + +; Function Attrs: nounwind +define void @foo() #0 !dbg !6 { +entry: + br label %for.cond.preheader, !dbg !12 + +for.cond.preheader: ; preds = %for.cond.cleanup1, %entry + br label %for.body, !dbg !12 + +for.cond.cleanup2: ; preds = %for.cond.cleanup1 + call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12 + tail call void @bar(i32 %add) #0, !dbg !12 + ret void, !dbg !12 + +for.cond.cleanup1: ; preds = %for.body + br i1 false, label %for.cond.preheader, label %for.cond.cleanup2, !dbg !12 + +for.body: ; preds = %for.body, %for.cond.preheader + %add = add nsw i32 0, 2, !dbg !12 + call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12 + br i1 false, label %for.body, label %for.cond.cleanup1, !dbg !12 +} + +; Function Attrs: nounwind +declare void @bar(i32) #0 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2) +!1 = !DIFile(filename: "foo.c", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 8.0.0"} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 10, type: !7, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{!9} +!9 = !DILocalVariable(name: "sum", scope: !10, file: !1, line: 11, type: !11) +!10 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 0) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocation(line: 0, scope: !10) Index: vendor/llvm/dist-release_70/tools/llvm-exegesis/lib/CMakeLists.txt =================================================================== --- vendor/llvm/dist-release_70/tools/llvm-exegesis/lib/CMakeLists.txt (revision 341364) +++ vendor/llvm/dist-release_70/tools/llvm-exegesis/lib/CMakeLists.txt (revision 341365) @@ -1,46 +1,50 @@ +set(TARGETS_TO_APPEND "") + if (LLVM_TARGETS_TO_BUILD MATCHES "X86") add_subdirectory(X86) - set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} X86" PARENT_SCOPE) + set(TARGETS_TO_APPEND "${TARGETS_TO_APPEND} X86") endif() if (LLVM_TARGETS_TO_BUILD MATCHES "AArch64") add_subdirectory(AArch64) - set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} AArch64" PARENT_SCOPE) + set(TARGETS_TO_APPEND "${TARGETS_TO_APPEND} AArch64") endif() + +set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} ${TARGETS_TO_APPEND}" PARENT_SCOPE) add_library(LLVMExegesis STATIC Analysis.cpp Assembler.cpp BenchmarkResult.cpp BenchmarkRunner.cpp Clustering.cpp Latency.cpp LlvmState.cpp MCInstrDescView.cpp PerfHelper.cpp RegisterAliasing.cpp Target.cpp Uops.cpp ) llvm_update_compile_flags(LLVMExegesis) llvm_map_components_to_libnames(libs Analysis CodeGen Core ExecutionEngine GlobalISel MC MCDisassembler MCJIT Object ObjectYAML Support ) if(LLVM_ENABLE_LIBPFM AND HAVE_LIBPFM) list(APPEND libs pfm) endif() target_link_libraries(LLVMExegesis ${libs}) set_target_properties(LLVMExegesis PROPERTIES FOLDER "Libraries")