Index: projects/clang380-import/contrib/llvm/lib/CodeGen/RegAllocFast.cpp =================================================================== --- projects/clang380-import/contrib/llvm/lib/CodeGen/RegAllocFast.cpp (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/CodeGen/RegAllocFast.cpp (revision 296011) @@ -1,1109 +1,1111 @@ //===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This register allocator allocates registers to a basic block at a time, // attempting to keep values in registers and reusing registers as appropriate. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; #define DEBUG_TYPE "regalloc" STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumCopies, "Number of copies coalesced"); static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); namespace { class RAFast : public MachineFunctionPass { public: static char ID; RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), isBulkSpilling(false) {} private: MachineFunction *MF; MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; // Basic block currently being allocated. MachineBasicBlock *MBB; // StackSlotForVirtReg - Maps virtual regs to the frame index where these // values are spilled. IndexedMap StackSlotForVirtReg; // Everything we know about a live virtual register. struct LiveReg { MachineInstr *LastUse; // Last instr to use reg. unsigned VirtReg; // Virtual register number. unsigned PhysReg; // Currently held here. unsigned short LastOpNum; // OpNum on LastUse. bool Dirty; // Register needs spill. explicit LiveReg(unsigned v) : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; typedef SparseSet LiveRegMap; // LiveVirtRegs - This map contains entries for each virtual register // that is currently available in a physical register. LiveRegMap LiveVirtRegs; DenseMap > LiveDbgValueMap; // RegState - Track the state of a physical register. enum RegState { // A disabled register is not available for allocation, but an alias may // be in use. A register can only be moved out of the disabled state if // all aliases are disabled. regDisabled, // A free register is not currently in use and can be allocated // immediately without checking aliases. regFree, // A reserved register has been assigned explicitly (e.g., setting up a // call parameter), and it remains reserved until it is used. regReserved // A register state may also be a virtual register number, indication that // the physical register is currently allocated to a virtual register. In // that case, LiveVirtRegs contains the inverse mapping. }; // PhysRegState - One of the RegState enums, or a virtreg. std::vector PhysRegState; // Set of register units. typedef SparseSet UsedInInstrSet; // Set of register units that are used in the current instruction, and so // cannot be allocated. UsedInInstrSet UsedInInstr; // Mark a physreg as used in this instruction. void markRegUsedInInstr(unsigned PhysReg) { for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) UsedInInstr.insert(*Units); } // Check if a physreg or any of its aliases are used in this instruction. bool isRegUsedInInstr(unsigned PhysReg) const { for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) if (UsedInInstr.count(*Units)) return true; return false; } // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. SmallPtrSet SkippedInstrs; // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should // not be erased. bool isBulkSpilling; enum : unsigned { spillClean = 1, spillDirty = 100, spillImpossible = ~0u }; public: const char *getPassName() const override { return "Fast Register Allocator"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } private: bool runOnMachineFunction(MachineFunction &Fn) override; void AllocateBasicBlock(); void handleThroughOperands(MachineInstr *MI, SmallVectorImpl &VirtDead); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); bool isLastUseOfLocalReg(MachineOperand&); void addKillFlag(const LiveReg&); void killVirtReg(LiveRegMap::iterator); void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand&); void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); unsigned calcSpillCost(unsigned PhysReg) const; void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator, unsigned Hint); LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); }; char RAFast::ID = 0; } /// getStackSpaceFor - This allocates space for the specified virtual register /// to be held on the stack. int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { // Find the location Reg would belong... int SS = StackSlotForVirtReg[VirtReg]; if (SS != -1) return SS; // Already has space allocated? // Allocate a new stack object for this spill location... int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; return FrameIdx; } /// isLastUseOfLocalReg - Return true if MO is the only remaining reference to /// its virtual register, and it is guaranteed to be a block-local register. /// bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) return false; // Check that the use/def chain has exactly one operand - MO. MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); if (&*I != &MO) return false; return ++I == MRI->reg_nodbg_end(); } /// addKillFlag - Set kill flags on last use of a virtual register. void RAFast::addKillFlag(const LiveReg &LR) { if (!LR.LastUse) return; MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { if (MO.getReg() == LR.PhysReg) MO.setIsKill(); else LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); } } /// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(LiveRegMap::iterator LRI) { addKillFlag(*LRI); assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && "Broken RegState mapping"); PhysRegState[LRI->PhysReg] = regFree; // Erase from LiveVirtRegs unless we're spilling in bulk. if (!isBulkSpilling) LiveVirtRegs.erase(LRI); } /// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) killVirtReg(LRI); } /// spillVirtReg - This method spills the value specified by VirtReg into the /// corresponding stack slot if needed. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); spillVirtReg(MI, LRI); } /// spillVirtReg - Do the actual work of spilling. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator LRI) { LiveReg &LR = *LRI; assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. bool SpillKill = LR.LastUse != MI; LR.Dirty = false; DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) << " in " << PrintReg(LR.PhysReg, TRI)); const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg); int FI = getStackSpaceFor(LRI->VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FI << "\n"); TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); ++NumStores; // Update statistics // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. SmallVectorImpl &LRIDbgValues = LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; const MDNode *Var = DBG->getDebugVariable(); const MDNode *Expr = DBG->getDebugExpression(); bool IsIndirect = DBG->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL = DBG->getDebugLoc(); assert(cast(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); MachineInstr *NewDV = BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(FI) .addImm(Offset) .addMetadata(Var) .addMetadata(Expr); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value // now. LRIDbgValues.clear(); if (SpillKill) LR.LastUse = nullptr; // Don't kill register again } killVirtReg(LRI); } /// spillAll - Spill all dirty virtregs without killing them. void RAFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) spillVirtReg(MI, i); LiveVirtRegs.clear(); isBulkSpilling = false; } /// usePhysReg - Handle the direct use of a physical register. /// Check that the register is not used by a virtreg. /// Kill the physreg, marking it free. /// This may add implicit kills to MO->getParent() and invalidate MO. void RAFast::usePhysReg(MachineOperand &MO) { unsigned PhysReg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: break; case regReserved: PhysRegState[PhysReg] = regFree; // Fall through case regFree: MO.setIsKill(); return; default: // The physreg was allocated to a virtual register. That means the value we // wanted has been clobbered. llvm_unreachable("Instruction uses an allocated register"); } // Maybe a superregister is reserved? for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; switch (PhysRegState[Alias]) { case regDisabled: break; case regReserved: // Either PhysReg is a subregister of Alias and we mark the // whole register as free, or PhysReg is the superregister of // Alias and we mark all the aliases as disabled before freeing // PhysReg. // In the latter case, since PhysReg was disabled, this means that // its value is defined only by physical sub-registers. This check // is performed by the assert of the default case in this loop. // Note: The value of the superregister may only be partial // defined, that is why regDisabled is a valid state for aliases. assert((TRI->isSuperRegister(PhysReg, Alias) || TRI->isSuperRegister(Alias, PhysReg)) && "Instruction is not using a subregister of a reserved register"); // Fall through. case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. PhysRegState[Alias] = regFree; MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } // Some other alias was in the working set - clear it. PhysRegState[Alias] = regDisabled; break; default: llvm_unreachable("Instruction uses an alias of an allocated register"); } } // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; MO.setIsKill(); } /// definePhysReg - Mark PhysReg as reserved or free after spilling any /// virtregs. This is very similar to defineVirtReg except the physreg is /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; default: spillVirtReg(MI, VirtReg); // Fall through. case regFree: case regReserved: PhysRegState[PhysReg] = NewState; return; } // This is a disabled register, disable all aliases. PhysRegState[PhysReg] = NewState; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; default: spillVirtReg(MI, VirtReg); // Fall through. case regFree: case regReserved: PhysRegState[Alias] = regDisabled; if (TRI->isSuperRegister(PhysReg, Alias)) return; break; } } } // calcSpillCost - Return the cost of spilling clearing out PhysReg and // aliases so it is free for allocation. // Returns 0 when PhysReg is free or disabled with all aliases disabled - it // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { if (isRegUsedInInstr(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; case regFree: return 0; case regReserved: DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " << PrintReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); return I->Dirty ? spillDirty : spillClean; } } // This is a disabled register, add up cost of aliases. DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; case regFree: ++Cost; break; case regReserved: return spillImpossible; default: { LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); Cost += I->Dirty ? spillDirty : spillClean; break; } } } return Cost; } /// assignVirtToPhysReg - This method updates local state so that we know /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. /// void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) { DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to " << PrintReg(PhysReg, TRI) << "\n"); PhysRegState[PhysReg] = LR.VirtReg; assert(!LR.PhysReg && "Already assigned a physreg"); LR.PhysReg = PhysReg; } RAFast::LiveRegMap::iterator RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); assignVirtToPhysReg(*LRI, PhysReg); return LRI; } /// allocVirtReg - Allocate a physical register for VirtReg. RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, LiveRegMap::iterator LRI, unsigned Hint) { const unsigned VirtReg = LRI->VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || !RC->contains(Hint) || !MRI->isAllocatable(Hint))) Hint = 0; // Take hint when possible. if (Hint) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint); if (Cost < spillDirty) { if (Cost) definePhysReg(MI, Hint, regFree); // definePhysReg may kill virtual registers and modify LiveVirtRegs. // That invalidates LRI, so run a new lookup for VirtReg. return assignVirtToPhysReg(VirtReg, Hint); } } ArrayRef AO = RegClassInfo.getOrder(RC); // First try to find a completely free register. for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ unsigned PhysReg = *I; if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } } DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " << TRI->getRegClassName(RC) << "\n"); unsigned BestReg = 0, BestCost = spillImpossible; for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ unsigned Cost = calcSpillCost(*I); DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) { assignVirtToPhysReg(*LRI, *I); return LRI; } if (Cost < BestCost) BestReg = *I, BestCost = Cost; } if (BestReg) { definePhysReg(MI, BestReg, regFree); // definePhysReg may kill virtual registers and modify LiveVirtRegs. // That invalidates LRI, so run a new lookup for VirtReg. return assignVirtToPhysReg(VirtReg, BestReg); } // Nothing we can do. Report an error and keep going with a bad allocation. if (MI->isInlineAsm()) MI->emitError("inline assembly requires more registers than available"); else MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); return assignVirtToPhysReg(VirtReg, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); } LRI = allocVirtReg(MI, LRI, Hint); } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) addKillFlag(*LRI); } assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; markRegUsedInInstr(LRI->PhysReg); return LRI; } /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI->getOperand(OpNum); if (New) { LRI = allocVirtReg(MI, LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " << PrintReg(LRI->PhysReg, TRI) << "\n"); TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI); ++NumLoads; } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); if (MO.isUse()) MO.setIsKill(); else MO.setIsDead(); } else if (MO.isKill()) { DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); MO.setIsKill(false); } else if (MO.isDead()) { DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); MO.setIsDead(false); } } else if (MO.isKill()) { // We must remove kill flags from uses of reloaded registers because the // register would be killed immediately, and there might be a second use: // %foo = OR %x, %x // This would cause a second reload of %x into a different register. DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); } else if (MO.isDead()) { DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); MO.setIsDead(false); } assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); return LRI; } // setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering // subregs. This may invalidate any operand pointers. // Return true if the operand kills its register. bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { MachineOperand &MO = MI->getOperand(OpNum); bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); return MO.isKill() || Dead; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super // register kill. if (MO.isKill()) { MI->addRegisterKilled(PhysReg, TRI, true); return true; } // A of a sub-register requires an implicit def of the full // register. if (MO.isDef() && MO.isUndef()) MI->addRegisterDefined(PhysReg, TRI); return Dead; } // Handle special instruction operand like early clobbers and tied ops when // there are additional physreg defines. void RAFast::handleThroughOperands(MachineInstr *MI, SmallVectorImpl &VirtDead) { DEBUG(dbgs() << "Scanning for through registers:"); SmallSet ThroughRegs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { if (ThroughRegs.insert(Reg).second) DEBUG(dbgs() << ' ' << PrintReg(Reg)); } } // If any physreg defines collide with preallocated through registers, // we must spill and reallocate. DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } } SmallVector PartialDefs; DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { unsigned DefIdx = 0; if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " << DefIdx << ".\n"); LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); unsigned PhysReg = LRI->PhysReg; setPhysReg(MI, i, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); PartialDefs.push_back(LRI->PhysReg); } } DEBUG(dbgs() << "Allocating early clobbers.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); unsigned PhysReg = LRI->PhysReg; if (setPhysReg(MI, i, PhysReg)) VirtDead.push_back(Reg); } // Restore UsedInInstr to a state usable for allocating normal virtual uses. UsedInInstr.clear(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); markRegUsedInInstr(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) markRegUsedInInstr(PartialDefs[i]); } void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB->begin(); // Add live-in registers as live. for (const auto &LI : MBB->liveins()) if (MRI->isAllocatable(LI.PhysReg)) definePhysReg(MII, LI.PhysReg, regReserved); SmallVector VirtDead; SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB->end()) { MachineInstr *MI = MII++; const MCInstrDesc &MCID = MI->getDesc(); DEBUG({ dbgs() << "\n>> " << *MI << "Regs:"; for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { if (PhysRegState[Reg] == regDisabled) continue; dbgs() << " " << TRI->getName(Reg); switch(PhysRegState[Reg]) { case regFree: break; case regReserved: dbgs() << "*"; break; default: { dbgs() << '=' << PrintReg(PhysRegState[Reg]); LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); if (I->Dirty) dbgs() << "*"; assert(I->PhysReg == Reg && "Bad inverse map"); break; } } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) { assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && "Bad map key"); assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && "Bad map value"); assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } }); // Debug values are not allowed to change codegen in any way. if (MI->isDebugValue()) { bool ScanDbgValue = true; while (ScanDbgValue) { ScanDbgValue = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; LiveRegMap::iterator LRI = findLiveVirtReg(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->PhysReg); else { int SS = StackSlotForVirtReg[Reg]; if (SS == -1) { // We can't allocate a physreg for a DebugValue, sorry! DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); MO.setReg(0); } else { // Modify DBG_VALUE now that the value is in a spill slot. bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getDebugVariable(); const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *MBB = MI->getParent(); assert( cast(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SS) .addImm(Offset) .addMetadata(Var) .addMetadata(Expr); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *NewDV); // Scan NewDV operands from the beginning. MI = NewDV; ScanDbgValue = true; break; } } LiveDbgValueMap[Reg].push_back(MI); } } // Next instruction. continue; } // If this is a copy, we may be able to coalesce. unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; if (MI->isCopy()) { CopyDst = MI->getOperand(0).getReg(); CopySrc = MI->getOperand(1).getReg(); CopyDstSub = MI->getOperand(0).getSubReg(); CopySrcSub = MI->getOperand(1).getSubReg(); } // Track registers used by instruction. UsedInInstr.clear(); // First scan. // Mark physreg uses and early clobbers as used. // Find the end of the virtreg operands unsigned VirtOpEnd = 0; bool hasTiedOps = false; bool hasEarlyClobbers = false; bool hasPartialRedefs = false; bool hasPhysDefs = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); // Make sure MRI knows about registers clobbered by regmasks. if (MO.isRegMask()) { MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); continue; } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; } else { if (MO.isEarlyClobber()) hasEarlyClobbers = true; if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) hasPartialRedefs = true; } continue; } if (!MRI->isAllocatable(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); hasEarlyClobbers = true; } else hasPhysDefs = true; } // The instruction may have virtual register operands that must be allocated // the same register at use-time and def-time: early clobbers and tied // operands. If there are also physical defs, these registers must avoid // both physical defs and uses, making them more constrained than normal // operands. // Similarly, if there are multiple defs and tied operands, we must make // sure the same register is allocated to uses and defs. // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. CopyDst = 0; // Pretend we have early clobbers so the use operands get marked below. // This is not necessary for the common case of a single tied use. hasEarlyClobbers = true; } // Second scan. // Allocate virtreg uses. for (unsigned i = 0; i != VirtOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); unsigned PhysReg = LRI->PhysReg; CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) killVirtReg(LRI); } } // Track registers defined by instruction - early clobbers and tied uses at // this point. UsedInInstr.clear(); if (hasEarlyClobbers) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; markRegUsedInInstr(Reg); } } unsigned DefOpEnd = MI->getNumOperands(); if (MI->isCall()) { - // Spill all virtregs before a call. This serves two purposes: 1. If an + // Spill all virtregs before a call. This serves one purpose: If an // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots, and 2. we don't have to wade through - // all the operands on the call instruction. - DefOpEnd = VirtOpEnd; + // registers in their spill slots. + // Note: although this is appealing to just consider all definitions + // as call-clobbered, this is not correct because some of those + // definitions may be used later on and we do not want to reuse + // those for virtual registers in between. DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); // The imp-defs are skipped below, but we still need to mark those // registers as used by the function. SkippedInstrs.insert(&MCID); } // Third scan. // Allocate defs and collect dead defs. for (unsigned i = 0; i != DefOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (!MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); continue; } LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); unsigned PhysReg = LRI->PhysReg; if (setPhysReg(MI, i, PhysReg)) { VirtDead.push_back(Reg); CopyDst = 0; // cancel coalescing; } else CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; } // Kill dead defs after the scan to ensure that multiple defs of the same // register are allocated identically. We didn't need to do this for uses // because we are crerating our own kill flags, and they are always at the // last use. for (unsigned i = 0, e = VirtDead.size(); i != e; ++i) killVirtReg(VirtDead[i]); VirtDead.clear(); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); Coalesced.push_back(MI); } else { DEBUG(dbgs() << "<< " << *MI); } } // Spill all physical registers holding virtual registers now. DEBUG(dbgs() << "Spilling live registers at end of block.\n"); spillAll(MBB->getFirstTerminator()); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); DEBUG(MBB->dump()); } /// runOnMachineFunction - Register allocate the whole function /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.clear(); UsedInInstr.setUniverse(TRI->getNumRegUnits()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); // Loop over all of the basic blocks, eliminating virtual register references for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); MBBi != MBBe; ++MBBi) { MBB = &*MBBi; AllocateBasicBlock(); } // All machine operands and other references to virtual registers have been // replaced. Remove the virtual registers. MRI->clearVirtRegs(); SkippedInstrs.clear(); StackSlotForVirtReg.clear(); LiveDbgValueMap.clear(); return true; } FunctionPass *llvm::createFastRegisterAllocator() { return new RAFast(); } Index: projects/clang380-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (revision 296011) @@ -1,1012 +1,1024 @@ //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the AArch64 implementation of TargetFrameLowering class. // // On AArch64, stack frames are structured as follows: // // The stack grows downward. // // All of the individual frame areas on the frame below are optional, i.e. it's // possible to create a function so that the particular area isn't present // in the frame. // // At function entry, the "frame" looks as follows: // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| <- sp // | | Lower address // // // After the prologue has run, the frame has the following general structure. // Note that this doesn't depict the case where a red-zone is used. Also, // technically the last frame area (VLAs) doesn't get created until in the // main function body, after the prologue is run. However, it's depicted here // for completeness. // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| // | | // | prev_fp, prev_lr | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) // | | // | other callee-saved registers | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at // |.the.standard.16-byte.alignment....| compile time; if present) // |-----------------------------------| // | | // | local variables of fixed size | // | including spill slots | // |-----------------------------------| <- bp(not defined by ABI, // |.variable-sized.local.variables....| LLVM chooses X19) // |.(VLAs)............................| (size of this area is unknown at // |...................................| compile time) // |-----------------------------------| <- sp // | | Lower address // // // To access the data in a frame, at-compile time, a constant offset must be // computable from one of the pointers (fp, bp, sp) to access it. The size // of the areas with a dotted background cannot be computed at compile-time // if they are present, making it required to have all three of fp, bp and // sp to be set up to be able to access all contents in the frame areas, // assuming all of the frame areas are non-empty. // // For most functions, some of the frame areas are empty. For those functions, // it may not be necessary to set up fp or bp: // * A base pointer is definitely needed when there are both VLAs and local // variables with more-than-default alignment requirements. // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads // or stores. // // FIXME: also explain the redzone concept. // FIXME: also explain the concept of reserved call frames. // //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "frame-info" static cl::opt EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; // Don't use the red zone if the function explicitly asks us not to. // This is typically used for kernel code. if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) return false; const MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); unsigned NumBytes = AFI->getLocalStackSize(); // Note: currently hasFP() is always true for hasCalls(), but that's an // implementation detail of the current code, not a strict requirement, // so stay safe here and check both. if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128) return false; return true; } /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); return (MFI->hasCalls() || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasStackMap() || MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } void AArch64FrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const AArch64InstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); DebugLoc DL = I->getDebugLoc(); unsigned Opc = I->getOpcode(); bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (!TFI->hasReservedCallFrame(MF)) { unsigned Align = getStackAlignment(); int64_t Amount = I->getOperand(0).getImm(); Amount = RoundUpToAlignment(Amount, Align); if (!IsDestroy) Amount = -Amount; // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it // doesn't have to pop anything), then the first operand will be zero too so // this adjustment is a no-op. if (CalleePopAmount == 0) { // FIXME: in-function stack adjustment for calls is limited to 24-bits // because there's no guaranteed temporary register available. // // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. // 1) For offset <= 12-bit, we use LSL #0 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses // LSL #0, and the other uses LSL #12. // // Mostly call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, TII); } MBB.erase(I); } void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned FramePtr) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; const DataLayout &TD = MF.getDataLayout(); bool HasFP = hasFP(MF); // Calculate amount of bytes used for return address storing. int stackGrowth = -TD.getPointerSize(0); // Calculate offsets. int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth; unsigned TotalSkipped = 0; for (const auto &Info : CSI) { unsigned Reg = Info.getReg(); int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea() + saveAreaOffset; // Don't output a new CFI directive if we're re-saving the frame pointer or // link register. This happens when the PrologEpilogInserter has inserted an // extra "STP" of the frame pointer and link register -- the "emitPrologue" // method automatically generates the directives when frame pointers are // used. If we generate CFI directives for the extra "STP"s, the linker will // lose track of the correct values for the frame pointer and link register. if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) { TotalSkipped += stackGrowth; continue; } unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, DwarfReg, Offset - TotalSkipped)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } } /// Get FPOffset by analyzing the first instruction. static int getFPOffsetInPrologue(MachineInstr *MBBI) { // First instruction must a) allocate the stack and b) have an immediate // that is a multiple of -2. assert(((MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) && MBBI->getOperand(3).getReg() == AArch64::SP && MBBI->getOperand(4).getImm() < 0 && (MBBI->getOperand(4).getImm() & 1) == 0)); // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space // required for the callee saved register area we get the frame pointer // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. int FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; assert(FPOffset >= 0 && "Bad Framepointer Offset"); return FPOffset; } static bool isCSSave(MachineInstr *MBBI) { return MBBI->getOpcode() == AArch64::STPXi || MBBI->getOpcode() == AArch64::STPDi || MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre; } +bool AArch64FrameLowering::canUseAsPrologue( + const MachineBasicBlock &MBB) const { + const MachineFunction *MF = MBB.getParent(); + const AArch64Subtarget &Subtarget = MF->getSubtarget(); + const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + + // Don't need a scratch register if we're not going to re-align the stack. + // Otherwise, we may need a scratch register to be available and we do not + // support that for now. + return !RegInfo->needsStackRealignment(*MF); +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) FPOffset = getFPOffsetInPrologue(MBBI); // Move past the saves of the callee-saved registers. while (isCSSave(MBBI)) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. const unsigned Alignment = MFI->getMaxAlignment(); const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NumBytes && NeedsRealignment) { // Use the first callee-saved register as a scratch register. scratchSPReg = AArch64::X9; } // If we're a leaf function, try using the red zone. if (NumBytes && !canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); if (NumBytes && NeedsRealignment) { const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 <<12) // = N | ((64-NrBitsToZero) << 6) // immr | ((64-NrBitsToZero-1) << 0) // imms ; BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); } if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } } static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; return false; } /// Checks whether the given instruction restores callee save registers /// and if so returns how many. static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) { unsigned RtIdx = 0; switch (MI.getOpcode()) { case AArch64::LDPXpost: case AArch64::LDPDpost: RtIdx = 1; // FALLTHROUGH case AArch64::LDPXi: case AArch64::LDPDi: if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) || !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) || MI.getOperand(RtIdx + 2).getReg() != AArch64::SP) return 0; return 2; } return 0; } void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 8) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; unsigned Restores = getNumCSRestores(*LastPopI, CSRegs); NumRestores += Restores; if (Restores == 0) { ++LastPopI; break; } } NumBytes -= NumRestores * 8; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for /// debug info. It's the same as what we use for resolving the code-gen /// references for now. FIXME: This can go wrong when references are /// SP-relative and simple call frames aren't used. int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { return resolveFrameIndexReference(MF, FI, FrameReg); } int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); const AArch64FunctionInfo *AFI = MF.getInfo(); int FPOffset = MFI->getObjectOffset(FI) + 16; int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't // reliable as a base). Make sure useFPForScavengingIndex() does the // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. // If the FPOffset is negative, we have to keep in mind that the // available offset range for negative offsets is smaller than for // positive ones. If we have variable sized objects, we're stuck with // using the FP regardless, though, as the SP offset is unknown // and we don't have a base pointer available. If an offset is // available via the FP and the SP, use whichever is closest. if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || (FPOffset >= -256 && Offset > -FPOffset)) UseFP = true; } } assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && "In the presence of dynamic stack pointer realignment, " "non-argument objects cannot be accessed through the frame pointer"); if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } // Use the base pointer if we have one. if (RegInfo->hasBasePointer(MF)) FrameReg = RegInfo->getBaseRegister(); else { FrameReg = AArch64::SP; // If we're using the red zone for this function, the SP won't actually // be adjusted, so the offsets will be negative. They're also all // within range of the signed 9-bit immediate instructions. if (canUseRedZone(MF)) Offset -= AFI->getLocalStackSize(); } return Offset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { if (Reg != AArch64::LR) return getKillRegState(true); // LR maybe referred to later by an @llvm.returnaddress intrinsic. bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR); bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); return getKillRegState(LRKill); } bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Count = CSI.size(); DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); for (unsigned i = 0; i < Count; i += 2) { unsigned idx = Count - i - 2; unsigned Reg1 = CSI[idx].getReg(); unsigned Reg2 = CSI[idx + 1].getReg(); // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && "Out of order callee saved regs!"); unsigned StrOpc; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); // Issue sequence of non-sp increment and pi sp spills for cs regs. The // first spill is a pre-increment that allocates the stack. // For example: // stp x22, x21, [sp, #-48]! // addImm(-6) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rational and sequence for restores in epilog. if (AArch64::GPR64RegClass.contains(Reg1)) { assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) StrOpc = AArch64::STPXpre; else StrOpc = AArch64::STPXi; } else if (AArch64::FPR64RegClass.contains(Reg1)) { assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) StrOpc = AArch64::STPDpre; else StrOpc = AArch64::STPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); // Compute offset: i = 0 => offset = -Count; // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. const int Offset = (i == 0) ? -Count : i; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for STP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) MIB.addReg(AArch64::SP, RegState::Define); MBB.addLiveIn(Reg1); MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); } return true; } bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Count = CSI.size(); DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); if (MI != MBB.end()) DL = MI->getDebugLoc(); for (unsigned i = 0; i < Count; i += 2) { unsigned Reg1 = CSI[i].getReg(); unsigned Reg2 = CSI[i + 1].getReg(); // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && "Out of order callee saved regs!"); // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only // the last load is sp-pi post-increment and de-allocates the stack: // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); if (AArch64::GPR64RegClass.contains(Reg1)) { assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); if (i == Count - 2) LdrOpc = AArch64::LDPXpost; else LdrOpc = AArch64::LDPXi; } else if (AArch64::FPR64RegClass.contains(Reg1)) { assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); if (i == Count - 2) LdrOpc = AArch64::LDPDpost; else LdrOpc = AArch64::LDPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() << ", " << CSI[i + 1].getFrameIdx() << ")\n"); // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; // etc. const int Offset = (i == Count - 2) ? Count : Count - i - 2; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for LDP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost) MIB.addReg(AArch64::SP, RegState::Define); MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit } return true; } void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo(); SmallVector UnspilledCSGPRs; SmallVector UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { SavedRegs.set(AArch64::FP); SavedRegs.set(AArch64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the // getCalleeSavedRegs() below will get the right answer. if (RegInfo->hasBasePointer(MF)) SavedRegs.set(RegInfo->getBaseRegister()); if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) SavedRegs.set(AArch64::X9); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; bool CanEliminateFrame = true; DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Check pairs of consecutive callee-saved registers. for (unsigned i = 0; CSRegs[i]; i += 2) { assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); const unsigned OddReg = CSRegs[i]; const unsigned EvenReg = CSRegs[i + 1]; assert((AArch64::GPR64RegClass.contains(OddReg) && AArch64::GPR64RegClass.contains(EvenReg)) ^ (AArch64::FPR64RegClass.contains(OddReg) && AArch64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); const bool OddRegUsed = SavedRegs.test(OddReg); const bool EvenRegUsed = SavedRegs.test(EvenReg); // Early exit if none of the registers in the register pair is actually // used. if (!OddRegUsed && !EvenRegUsed) { if (AArch64::GPR64RegClass.contains(OddReg)) { UnspilledCSGPRs.push_back(OddReg); UnspilledCSGPRs.push_back(EvenReg); } else { UnspilledCSFPRs.push_back(OddReg); UnspilledCSFPRs.push_back(EvenReg); } continue; } unsigned Reg = AArch64::NoRegister; // If only one of the registers of the register pair is used, make sure to // mark the other one as used as well. if (OddRegUsed ^ EvenRegUsed) { // Find out which register is the additional spill. Reg = OddRegUsed ? EvenReg : OddReg; SavedRegs.set(Reg); } DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || (RegInfo->getEncodingValue(OddReg) + 1 == RegInfo->getEncodingValue(EvenReg))) && "Register pair of non-adjacent registers!"); if (AArch64::GPR64RegClass.contains(OddReg)) { NumGPRSpilled += 2; // If it's not a reserved register, we can use it in lieu of an // emergency spill slot for the register scavenger. // FIXME: It would be better to instead keep looking and choose another // unspilled register that isn't reserved, if there is one. if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } else NumFPRSpilled += 2; CanEliminateFrame = false; } // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing // range. We'll end up allocating an unnecessary spill slot a lot, but // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned CFSize = MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { // If we're adding a register to spill here, we have to add two of them // to keep the number of regs to spill even. assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); unsigned Count = 0; while (!UnspilledCSGPRs.empty() && Count < 2) { unsigned Reg = UnspilledCSGPRs.back(); UnspilledCSGPRs.pop_back(); DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(Reg); ExtraCSSpill = true; ++Count; } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill) { const TargetRegisterClass *RC = &AArch64::GPR64RegClass; int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); } } } Index: projects/clang380-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h (revision 296011) @@ -1,72 +1,74 @@ //==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #include "llvm/Target/TargetFrameLowering.h" namespace llvm { class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned FramePtr) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; int resolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP = false) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override; /// \brief Can this function use the red zone for local allocations. bool canUseRedZone(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; /// Returns true if the target will correctly handle shrink wrapping. bool enableShrinkWrapping(const MachineFunction &MF) const override { return true; } }; } // End llvm namespace #endif Index: projects/clang380-import/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 296011) @@ -1,1780 +1,1903 @@ //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the PPC implementation of TargetFrameLowering class. // //===----------------------------------------------------------------------===// #include "PPCFrameLowering.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// static const MCPhysReg VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { if (STI.isDarwinABI()) return STI.isPPC64() ? 16 : 8; // SVR4 ABI: return STI.isPPC64() ? 16 : 4; } static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { return STI.isELFv2ABI() ? 24 : 40; } static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { // For the Darwin ABI: // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area // for saving the frame pointer (if needed.) While the published ABI has // not used this slot since at least MacOSX 10.2, there is older code // around that does use it, and that needs to continue to work. if (STI.isDarwinABI()) return STI.isPPC64() ? -8U : -4U; // SVR4 ABI: First slot in the general register save area. return STI.isPPC64() ? -8U : -4U; } static unsigned computeLinkageSize(const PPCSubtarget &STI) { if (STI.isDarwinABI() || STI.isPPC64()) return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); // SVR4 ABI: return 8; } static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { if (STI.isDarwinABI()) return STI.isPPC64() ? -16U : -8U; // SVR4 ABI: First slot in the general register save area. return STI.isPPC64() ? -16U : (STI.getTargetMachine().getRelocationModel() == Reloc::PIC_) ? -12U : -8U; } PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, STI.getPlatformStackAlignment(), 0), Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), TOCSaveOffset(computeTOCSaveOffset(Subtarget)), FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), LinkageSize(computeLinkageSize(Subtarget)), BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( unsigned &NumEntries) const { if (Subtarget.isDarwinABI()) { NumEntries = 1; if (Subtarget.isPPC64()) { static const SpillSlot darwin64Offsets = {PPC::X31, -8}; return &darwin64Offsets; } else { static const SpillSlot darwinOffsets = {PPC::R31, -4}; return &darwinOffsets; } } // Early exit if not using the SVR4 ABI. if (!Subtarget.isSVR4ABI()) { NumEntries = 0; return nullptr; } // Note that the offsets here overlap, but this is fixed up in // processFunctionBeforeFrameFinalized. static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, -32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC::F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17, -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144}, // General register save area offsets. {PPC::R31, -4}, {PPC::R30, -8}, {PPC::R29, -12}, {PPC::R28, -16}, {PPC::R27, -20}, {PPC::R26, -24}, {PPC::R25, -28}, {PPC::R24, -32}, {PPC::R23, -36}, {PPC::R22, -40}, {PPC::R21, -44}, {PPC::R20, -48}, {PPC::R19, -52}, {PPC::R18, -56}, {PPC::R17, -60}, {PPC::R16, -64}, {PPC::R15, -68}, {PPC::R14, -72}, // CR save area offset. We map each of the nonvolatile CR fields // to the slot for CR2, which is the first of the nonvolatile CR // fields to be assigned, so that we only allocate one save slot. // See PPCRegisterInfo::hasReservedSpillSlot() for more information. {PPC::CR2, -4}, // VRSAVE save area offset. {PPC::VRSAVE, -4}, // Vector register save area {PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, -176}, {PPC::V20, -192}}; static const SpillSlot Offsets64[] = { // Floating-point register save area offsets. {PPC::F31, -8}, {PPC::F30, -16}, {PPC::F29, -24}, {PPC::F28, -32}, {PPC::F27, -40}, {PPC::F26, -48}, {PPC::F25, -56}, {PPC::F24, -64}, {PPC::F23, -72}, {PPC::F22, -80}, {PPC::F21, -88}, {PPC::F20, -96}, {PPC::F19, -104}, {PPC::F18, -112}, {PPC::F17, -120}, {PPC::F16, -128}, {PPC::F15, -136}, {PPC::F14, -144}, // General register save area offsets. {PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, {PPC::X28, -32}, {PPC::X27, -40}, {PPC::X26, -48}, {PPC::X25, -56}, {PPC::X24, -64}, {PPC::X23, -72}, {PPC::X22, -80}, {PPC::X21, -88}, {PPC::X20, -96}, {PPC::X19, -104}, {PPC::X18, -112}, {PPC::X17, -120}, {PPC::X16, -128}, {PPC::X15, -136}, {PPC::X14, -144}, // VRSAVE save area offset. {PPC::VRSAVE, -4}, // Vector register save area {PPC::V31, -16}, {PPC::V30, -32}, {PPC::V29, -48}, {PPC::V28, -64}, {PPC::V27, -80}, {PPC::V26, -96}, {PPC::V25, -112}, {PPC::V24, -128}, {PPC::V23, -144}, {PPC::V22, -160}, {PPC::V21, -176}, {PPC::V20, -192}}; if (Subtarget.isPPC64()) { NumEntries = array_lengthof(Offsets64); return Offsets64; } else { NumEntries = array_lengthof(Offsets); return Offsets; } } /// RemoveVRSaveCode - We have found that this function does not need any code /// to manipulate the VRSAVE register, even though it uses vector registers. /// This can happen when the only registers used are known to be live in or out /// of the function. Remove all of the VRSAVE related code from the function. /// FIXME: The removal of the code results in a compile failure at -O0 when the /// function contains a function call, as the GPR containing original VRSAVE /// contents is spilled and reloaded around the call. Without the prolog code, /// the spill instruction refers to an undefined register. This code needs /// to account for all uses of that GPR. static void RemoveVRSaveCode(MachineInstr *MI) { MachineBasicBlock *Entry = MI->getParent(); MachineFunction *MF = Entry->getParent(); // We know that the MTVRSAVE instruction immediately follows MI. Remove it. MachineBasicBlock::iterator MBBI = MI; ++MBBI; assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); MBBI->eraseFromParent(); bool RemovedAllMTVRSAVEs = true; // See if we can find and remove the MTVRSAVE instruction from all of the // epilog blocks. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue if (I->isReturnBlock()) { bool FoundIt = false; for (MBBI = I->end(); MBBI != I->begin(); ) { --MBBI; if (MBBI->getOpcode() == PPC::MTVRSAVE) { MBBI->eraseFromParent(); // remove it. FoundIt = true; break; } } RemovedAllMTVRSAVEs &= FoundIt; } } // If we found and removed all MTVRSAVE instructions, remove the read of // VRSAVE as well. if (RemovedAllMTVRSAVEs) { MBBI = MI; assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); --MBBI; assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); MBBI->eraseFromParent(); } // Finally, nuke the UPDATE_VRSAVE. MI->eraseFromParent(); } // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the // instruction selector. Based on the vector registers that have been used, // transform this into the appropriate ORI instruction. static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MachineFunction *MF = MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); const MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UsedRegMask = 0; for (unsigned i = 0; i != 32; ++i) if (MRI.isPhysRegModified(VRRegNo[i])) UsedRegMask |= 1 << (31-i); // Live in and live out values already must be in the mask, so don't bother // marking them. for (MachineRegisterInfo::livein_iterator I = MF->getRegInfo().livein_begin(), E = MF->getRegInfo().livein_end(); I != E; ++I) { unsigned RegNo = TRI->getEncodingValue(I->first); if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } // Live out registers appear as use operands on return instructions. for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); UsedRegMask != 0 && BI != BE; ++BI) { const MachineBasicBlock &MBB = *BI; if (!MBB.isReturnBlock()) continue; const MachineInstr &Ret = MBB.back(); for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { const MachineOperand &MO = Ret.getOperand(I); if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) continue; unsigned RegNo = TRI->getEncodingValue(MO.getReg()); UsedRegMask &= ~(1 << (31-RegNo)); } } // If no registers are used, turn this into a copy. if (UsedRegMask == 0) { // Remove all VRSAVE code. RemoveVRSaveCode(MI); return; } unsigned SrcReg = MI->getOperand(1).getReg(); unsigned DstReg = MI->getOperand(0).getReg(); if ((UsedRegMask & 0xFFFF) == UsedRegMask) { if (DstReg != SrcReg) BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) .addReg(SrcReg) .addImm(UsedRegMask); else BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) .addReg(SrcReg, RegState::Kill) .addImm(UsedRegMask); } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { if (DstReg != SrcReg) BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) .addReg(SrcReg) .addImm(UsedRegMask >> 16); else BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) .addReg(SrcReg, RegState::Kill) .addImm(UsedRegMask >> 16); } else { if (DstReg != SrcReg) BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) .addReg(SrcReg) .addImm(UsedRegMask >> 16); else BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) .addReg(SrcReg, RegState::Kill) .addImm(UsedRegMask >> 16); BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) .addReg(DstReg, RegState::Kill) .addImm(UsedRegMask & 0xFFFF); } // Remove the old UPDATE_VRSAVE instruction. MI->eraseFromParent(); } static bool spillsCR(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo(); return FuncInfo->isCRSpilled(); } static bool spillsVRSAVE(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo(); return FuncInfo->isVRSAVESpilled(); } static bool hasSpills(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo(); return FuncInfo->hasSpills(); } static bool hasNonRISpills(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo(); return FuncInfo->hasNonRISpills(); } /// MustSaveLR - Return true if this function requires that we save the LR /// register onto the stack in the prolog and restore it in the epilog of the /// function. static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { const PPCFunctionInfo *MFI = MF.getInfo(); // We need a save/restore of LR if there is any def of LR (which is // defined by calls, including the PIC setup sequence), or if there is // some use of the LR stack slot (e.g. for builtin_return_address). // (LR comes in 32 and 64 bit versions.) MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); } /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, bool UpdateMF, bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo unsigned FrameSize = UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get stack alignments. The frame must be aligned to the greatest of these: unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); unsigned LR = RegInfo->getRARegister(); if (!DisableRedZone && (Subtarget.isPPC64() || // 32-bit SVR4, no stack- !Subtarget.isSVR4ABI() || // allocated locals. FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !MustSaveLR(MF, LR) && !RegInfo->hasBasePointer(MF)) { // No special alignment. // No need for frame if (UpdateMF) MFI->setStackSize(0); return 0; } // Get the maximum call frame size of all the calls. unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); // Maximum call frame needs to be at least big enough for linkage area. unsigned minCallFrameSize = getLinkageSize(); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so // that allocations will be aligned. if (MFI->hasVarSizedObjects()) maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. if (UpdateMF) MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; // Make sure the frame is aligned. FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. if (UpdateMF) MFI->setStackSize(FrameSize); return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame // pointer register. bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // FIXME: This is pretty much broken by design: hasFP() might be called really // early, before the stack layout was calculated and thus hasFP() might return // true or false here depending on the time of call. return (MFI->getStackSize()) && needsFP(MF); } // needsFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Naked functions have no stack frame pushed, so we don't have a frame // pointer. if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || MFI->hasStackMap() || MFI->hasPatchPoint() || (MF.getTarget().Options.GuaranteedTailCallOpt && MF.getInfo()->hasFastCall()); } void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { bool is31 = needsFP(MF); unsigned FPReg = is31 ? PPC::R31 : PPC::R1; unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); bool HasBP = RegInfo->hasBasePointer(MF); unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { --MBBI; for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { MachineOperand &MO = MBBI->getOperand(I); if (!MO.isReg()) continue; switch (MO.getReg()) { case PPC::FP: MO.setReg(FPReg); break; case PPC::FP8: MO.setReg(FP8Reg); break; case PPC::BP: MO.setReg(BPReg); break; case PPC::BP8: MO.setReg(BP8Reg); break; } } } } -bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, - bool UseAtEnd, - unsigned *ScratchRegister) const { +/* This function will do the following: + - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 + respectively (defaults recommended by the ABI) and return true + - If MBB is not an entry block, initialize the register scavenger and look + for available registers. + - If the defaults (R0/R12) are available, return true + - If TwoUniqueRegsRequired is set to true, it looks for two unique + registers. Otherwise, look for a single available register. + - If the required registers are found, set SR1 and SR2 and return true. + - If the required registers are not found, set SR2 or both SR1 and SR2 to + PPC::NoRegister and return false. + + Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired + is not set, this function will attempt to find two different registers, but + still return true if only one register is available (and set SR1 == SR2). +*/ +bool +PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, + bool UseAtEnd, + bool TwoUniqueRegsRequired, + unsigned *SR1, + unsigned *SR2) const { RegScavenger RS; - unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; - if (ScratchRegister) - *ScratchRegister = R0; + // Set the defaults for the two scratch registers. + if (SR1) + *SR1 = R0; - // If MBB is an entry or exit block, use R0 as the scratch register + if (SR2) { + assert (SR1 && "Asking for the second scratch register but not the first?"); + *SR2 = R12; + } + + // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. if ((UseAtEnd && MBB->isReturnBlock()) || (!UseAtEnd && (&MBB->getParent()->front() == MBB))) return true; RS.enterBasicBlock(MBB); if (UseAtEnd && !MBB->empty()) { - // The scratch register will be used at the end of the block, so must consider - // all registers used within the block + // The scratch register will be used at the end of the block, so must + // consider all registers used within the block MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); // If no terminator, back iterator up to previous instruction. if (MBBI == MBB->end()) MBBI = std::prev(MBBI); if (MBBI != MBB->begin()) RS.forward(MBBI); } - - if (!RS.isRegUsed(R0)) + + // If the two registers are available, we're all good. + // Note that we only return here if both R0 and R12 are available because + // although the function may not require two unique registers, it may benefit + // from having two so we should try to provide them. + if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) return true; - unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass - : &PPC::GPRCRegClass); - - // Make sure the register scavenger was able to find an available register - // If not, use R0 but return false to indicate no register was available and - // R0 must be used (as recommended by the ABI) - if (Reg == 0) - return false; + // Get the list of callee-saved registers for the target. + const PPCRegisterInfo *RegInfo = + static_cast(Subtarget.getRegisterInfo()); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); - if (ScratchRegister) - *ScratchRegister = Reg; + // Get all the available registers in the block. + BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + // We shouldn't use callee-saved registers as scratch registers as they may be + // available when looking for a candidate block for shrink wrapping but not + // available when the actual prologue/epilogue is being emitted because they + // were added as live-in to the prologue block by PrologueEpilogueInserter. + for (int i = 0; CSRegs[i]; ++i) + BV.reset(CSRegs[i]); + + // Set the first scratch register to the first available one. + if (SR1) { + int FirstScratchReg = BV.find_first(); + *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; + } + + // If there is another one available, set the second scratch register to that. + // Otherwise, set it to either PPC::NoRegister if this function requires two + // or to whatever SR1 is set to if this function doesn't require two. + if (SR2) { + int SecondScratchReg = BV.find_next(*SR1); + if (SecondScratchReg != -1) + *SR2 = SecondScratchReg; + else + *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; + } + + // Now that we've done our best to provide both registers, double check + // whether we were unable to provide enough. + if (BV.count() < (TwoUniqueRegsRequired ? 2 : 1)) + return false; + return true; } +// We need a scratch register for spilling LR and for spilling CR. By default, +// we use two scratch registers to hide latency. However, if only one scratch +// register is available, we can adjust for that by not overlapping the spill +// code. However, if we need to realign the stack (i.e. have a base pointer) +// and the stack frame is large, we need two scratch registers. +bool +PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { + const PPCRegisterInfo *RegInfo = + static_cast(Subtarget.getRegisterInfo()); + MachineFunction &MF = *(MBB->getParent()); + bool HasBP = RegInfo->hasBasePointer(MF); + unsigned FrameSize = determineFrameLayout(MF, false); + int NegFrameSize = -FrameSize; + bool IsLargeFrame = !isInt<16>(NegFrameSize); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + return IsLargeFrame && HasBP && MaxAlign > 1; +} + bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast(&MBB); - return findScratchRegister(TmpMBB, false, nullptr); + return findScratchRegister(TmpMBB, false, + twoUniqueScratchRegsRequired(TmpMBB)); } bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast(&MBB); - return findScratchRegister(TmpMBB, true, nullptr); + return findScratchRegister(TmpMBB, true); } void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const PPCInstrInfo &TII = *static_cast(Subtarget.getInstrInfo()); const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); DebugLoc dl; bool needsCFI = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); assert((Subtarget.isDarwinABI() || isSVR4ABI) && "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. if (!isSVR4ABI) for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { HandleVRSaveUpdate(MBBI, TII); break; } } // Move MBBI back to the beginning of the prologue block. MBBI = MBB.begin(); // Work out frame sizes. unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; if (!isInt<32>(NegFrameSize)) llvm_unreachable("Unhandled stack size!"); if (MFI->isFrameAddressTaken()) replaceFPWithRealFP(MF); // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); + bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 : PPC::MFLR ); const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD : PPC::STW ); const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU : PPC::STWU ); const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX); const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS ); const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI ); const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR ); const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC); const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 : PPC::SUBFIC); // Regarding this assert: Even though LR is saved in the caller's frame (i.e., // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no // Red Zone, an asynchronous event (a form of "callee") could claim a frame & // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); - findScratchRegister(&MBB, false, &ScratchReg); - assert(ScratchReg && "No scratch register!"); - + // Using the same bool variable as below to supress compiler warnings. + bool SingleScratchReg = + findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), + &ScratchReg, &TempReg); + assert(SingleScratchReg && + "Required number of registers not available in this block"); + + SingleScratchReg = ScratchReg == TempReg; + int LROffset = getReturnSaveOffset(); int FPOffset = 0; if (HasFP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { FPOffset = getFramePointerSaveOffset(); } } int BPOffset = 0; if (HasBP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = getBasePointerSaveOffset(); } } int PBPOffset = 0; if (FI->usesPICBase()) { MachineFrameInfo *FFI = MF.getFrameInfo(); int PBPIndex = FI->getPICBasePointerSaveIndex(); assert(PBPIndex && "No PIC Base Pointer Save Slot!"); PBPOffset = FFI->getObjectOffset(PBPIndex); } // Get stack alignments. unsigned MaxAlign = MFI->getMaxAlignment(); if (HasBP && MaxAlign > 1) assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && "Invalid alignment!"); // Frames of 32KB & larger require special handling because they cannot be // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. bool isLargeFrame = !isInt<16>(NegFrameSize); + assert((isPPC64 || !MustSaveCR) && + "Prologue CR saving supported only in 64-bit mode"); + + // If we need to spill the CR and the LR but we don't have two separate + // registers available, we must spill them one at a time + if (MustSaveCR && SingleScratchReg && MustSaveLR) { + // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it could be more + // efficient to use mfocrf to selectively save just those fields. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + .addReg(TempReg, getKillRegState(true)) + .addImm(8) + .addReg(SPReg); + } + if (MustSaveLR) BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); - assert((isPPC64 || MustSaveCRs.empty()) && - "Prologue CR saving supported only in 64-bit mode"); - - if (!MustSaveCRs.empty()) { // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. // If only one or two CR fields are clobbered, it could be more // efficient to use mfocrf to selectively save just those fields. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); } if (HasFP) // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, StoreInst) .addReg(FPReg) .addImm(FPOffset) .addReg(SPReg); if (FI->usesPICBase()) // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, StoreInst) .addReg(PPC::R30) .addImm(PBPOffset) .addReg(SPReg); if (HasBP) // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, StoreInst) .addReg(BPReg) .addImm(BPOffset) .addReg(SPReg); if (MustSaveLR) // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, StoreInst) .addReg(ScratchReg) .addImm(LROffset) .addReg(SPReg); - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) .addImm(8) .addReg(SPReg); // Skip the rest if this is a leaf function & all spills fit in the Red Zone. if (!FrameSize) return; // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now if (HasBP) { // Save a copy of r1 as the base pointer. BuildMI(MBB, MBBI, dl, OrInst, BPReg) .addReg(SPReg) .addReg(SPReg); } + // This condition must be kept in sync with canUseAsPrologue. if (HasBP && MaxAlign > 1) { if (isPPC64) BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) .addReg(SPReg) .addImm(0) .addImm(64 - Log2_32(MaxAlign)); else // PPC32... BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) .addReg(SPReg) .addImm(0) .addImm(32 - Log2_32(MaxAlign)) .addImm(31); if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addImm(NegFrameSize); } else { + assert(!SingleScratchReg && "Only a single scratch reg available"); BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) .addImm(NegFrameSize >> 16); BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) .addReg(TempReg, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addReg(TempReg, RegState::Kill); } BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) .addReg(SPReg, RegState::Kill) .addReg(SPReg) .addReg(ScratchReg); } else if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); } else { BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(NegFrameSize >> 16); BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) .addReg(SPReg, RegState::Kill) .addReg(SPReg) .addReg(ScratchReg); } // Add Call Frame Information for the instructions we generated above. if (needsCFI) { unsigned CFIIndex; if (HasBP) { // Define CFA in terms of BP. Do this in preference to using FP/SP, // because if the stack needed aligning then CFA won't be at a fixed // offset from FP/SP. unsigned Reg = MRI->getDwarfRegNum(BPReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); } else { // Adjust the definition of CFA to account for the change in SP. assert(NegFrameSize); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); } BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (HasFP) { // Describe where FP was saved, at a fixed offset from CFA. unsigned Reg = MRI->getDwarfRegNum(FPReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } if (FI->usesPICBase()) { // Describe where FP was saved, at a fixed offset from CFA. unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } if (HasBP) { // Describe where BP was saved, at a fixed offset from CFA. unsigned Reg = MRI->getDwarfRegNum(BPReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } if (MustSaveLR) { // Describe where LR was saved, at a fixed offset from CFA. unsigned Reg = MRI->getDwarfRegNum(LRReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // If there is a frame pointer, copy R1 into R31 if (HasFP) { BuildMI(MBB, MBBI, dl, OrInst, FPReg) .addReg(SPReg) .addReg(SPReg); if (!HasBP && needsCFI) { // Change the definition of CFA from SP+offset to FP+offset, because SP // will change at every alloca. unsigned Reg = MRI->getDwarfRegNum(FPReg, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } if (needsCFI) { // Describe where callee saved registers were saved, at fixed offsets from // CFA. const std::vector &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just // subregisters of CR2. We just need to emit a move of CR2. if (PPC::CRBITRCRegClass.contains(Reg)) continue; // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) - && MustSaveCRs.empty()) + && !MustSaveCR) continue; // For 64-bit SVR4 when we have spilled CRs, the spill location // is SP+8, not a frame-relative slot. if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for // the whole CR word. In the ELFv2 ABI, every CR that was // actually saved gets its own CFI record. unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } void PPCFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl; if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); const PPCInstrInfo &TII = *static_cast(Subtarget.getInstrInfo()); const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); // Get alignment info so we know how to restore the SP. const MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes allocated from the FrameInfo. int FrameSize = MFI->getStackSize(); // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); + bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 : PPC::MTLR ); const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD : PPC::LWZ ); const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 : PPC::LIS ); const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 : PPC::ORI ); const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 : PPC::ADDI ); const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 : PPC::ADD4 ); - + int LROffset = getReturnSaveOffset(); int FPOffset = 0; - findScratchRegister(&MBB, true, &ScratchReg); - assert(ScratchReg && "No scratch register!"); - + // Using the same bool variable as below to supress compiler warnings. + bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, + &TempReg); + assert(SingleScratchReg && + "Could not find an available scratch register"); + + SingleScratchReg = ScratchReg == TempReg; + if (HasFP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { FPOffset = getFramePointerSaveOffset(); } } int BPOffset = 0; if (HasBP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = getBasePointerSaveOffset(); } } int PBPOffset = 0; if (FI->usesPICBase()) { MachineFrameInfo *FFI = MF.getFrameInfo(); int PBPIndex = FI->getPICBasePointerSaveIndex(); assert(PBPIndex && "No PIC Base Pointer Save Slot!"); PBPOffset = FFI->getObjectOffset(PBPIndex); } bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); if (IsReturnBlock) { unsigned RetOpcode = MBBI->getOpcode(); bool UsesTCRet = RetOpcode == PPC::TCRETURNri || RetOpcode == PPC::TCRETURNdi || RetOpcode == PPC::TCRETURNai || RetOpcode == PPC::TCRETURNri8 || RetOpcode == PPC::TCRETURNdi8 || RetOpcode == PPC::TCRETURNai8; if (UsesTCRet) { int MaxTCRetDelta = FI->getTailCallSPDelta(); MachineOperand &StackAdjust = MBBI->getOperand(1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); int Delta = StackAdj - MaxTCRetDelta; assert((Delta >= 0) && "Delta must be positive"); if (MaxTCRetDelta>0) FrameSize += (StackAdj +Delta); else FrameSize += StackAdj; } } // Frames of 32KB & larger require special handling because they cannot be // indexed into with a simple LD/LWZ immediate offset operand. bool isLargeFrame = !isInt<16>(FrameSize); if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is offset // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now. // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the // value of R31 in this case. if (FI->hasFastCall()) { assert(HasFP && "Expecting a valid frame pointer."); if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) .addReg(FPReg).addImm(FrameSize); } else { BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(FrameSize >> 16); BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addImm(FrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, AddInst) .addReg(SPReg) .addReg(FPReg) .addReg(ScratchReg); } } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { BuildMI(MBB, MBBI, dl, LoadInst, SPReg) .addImm(0) .addReg(SPReg); } } + assert((isPPC64 || !MustSaveCR) && + "Epilogue CR restoring supported only in 64-bit mode"); + + // If we need to save both the LR and the CR and we only have one available + // scratch register, we must do them one at a time. + if (MustSaveCR && SingleScratchReg && MustSaveLR) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) + .addImm(8) + .addReg(SPReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + .addReg(TempReg, getKillRegState(i == e-1)); + } + if (MustSaveLR) BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) .addImm(LROffset) .addReg(SPReg); - assert((isPPC64 || MustSaveCRs.empty()) && - "Epilogue CR restoring supported only in 64-bit mode"); - - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) .addImm(8) .addReg(SPReg); if (HasFP) BuildMI(MBB, MBBI, dl, LoadInst, FPReg) .addImm(FPOffset) .addReg(SPReg); if (FI->usesPICBase()) // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, LoadInst) .addReg(PPC::R30) .addImm(PBPOffset) .addReg(SPReg); if (HasBP) BuildMI(MBB, MBBI, dl, LoadInst, BPReg) .addImm(BPOffset) .addReg(SPReg); - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization if (IsReturnBlock) { unsigned RetOpcode = MBBI->getOpcode(); if (MF.getTarget().Options.GuaranteedTailCallOpt && (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) .addReg(SPReg).addImm(CallerAllocatedAmt); } else { BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(CallerAllocatedAmt >> 16); BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addImm(CallerAllocatedAmt & 0xFFFF); BuildMI(MBB, MBBI, dl, AddInst) .addReg(SPReg) .addReg(FPReg) .addReg(ScratchReg); } } else if (RetOpcode == PPC::TCRETURNdi) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); } else if (RetOpcode == PPC::TCRETURNri) { MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); } else if (RetOpcode == PPC::TCRETURNai) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); } else if (RetOpcode == PPC::TCRETURNdi8) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); } else if (RetOpcode == PPC::TCRETURNri8) { MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); } else if (RetOpcode == PPC::TCRETURNai8) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); } } } void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); SavedRegs.reset(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. if (!FPSI && needsFP(MF)) { // Find out what the fix offset of the frame pointer save area. int FPOffset = getFramePointerSaveOffset(); // Allocate the frame index for frame pointer save area. FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } int BPSI = FI->getBasePointerSaveIndex(); if (!BPSI && RegInfo->hasBasePointer(MF)) { int BPOffset = getBasePointerSaveOffset(); // Allocate the frame index for the base pointer save area. BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); // Save the result. FI->setBasePointerSaveIndex(BPSI); } // Reserve stack space for the PIC Base register (R30). // Only used in SVR4 32-bit. if (FI->usesPICBase()) { int PBPSI = MFI->CreateFixedObject(4, -8, true); FI->setPICBasePointerSaveIndex(PBPSI); } // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (MF.getTarget().Options.GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. if (!isPPC64 && !isDarwinABI && (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || SavedRegs.test(PPC::CR4))) { int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } } void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. if (!Subtarget.isSVR4ABI()) { addScavengingSpillSlot(MF, RS); return; } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); const std::vector &CSI = FFI->getCalleeSavedInfo(); // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { addScavengingSpillSlot(MF, RS); return; } unsigned MinGPR = PPC::R31; unsigned MinG8R = PPC::X31; unsigned MinFPR = PPC::F31; unsigned MinVR = PPC::V31; bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; SmallVector GPRegs; SmallVector G8Regs; SmallVector FPRegs; SmallVector VRegs; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); if (Reg < MinGPR) { MinGPR = Reg; } } else if (PPC::G8RCRegClass.contains(Reg)) { HasG8SaveArea = true; G8Regs.push_back(CSI[i]); if (Reg < MinG8R) { MinG8R = Reg; } } else if (PPC::F8RCRegClass.contains(Reg)) { HasFPSaveArea = true; FPRegs.push_back(CSI[i]); if (Reg < MinFPR) { MinFPR = Reg; } } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { ; // do nothing, as we already know whether CRs are spilled } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg)) { HasVRSaveArea = true; VRegs.push_back(CSI[i]); if (Reg < MinVR) { MinVR = Reg; } } else { llvm_unreachable("Unknown RegisterClass!"); } } PPCFunctionInfo *PFI = MF.getInfo(); const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); int64_t LowerBound = 0; // Take into account stack space reserved for tail calls. int TCSPDelta = 0; if (MF.getTarget().Options.GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { LowerBound = TCSPDelta; } // The Floating-point register save area is right below the back chain word // of the previous stack frame. if (HasFPSaveArea) { for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { int FI = FPRegs[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; } // Check whether the frame pointer register is allocated. If so, make sure it // is spilled to the correct offset. if (needsFP(MF)) { HasGPSaveArea = true; int FI = PFI->getFramePointerSaveIndex(); assert(FI && "No Frame Pointer Save Slot!"); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } if (PFI->usesPICBase()) { HasGPSaveArea = true; int FI = PFI->getPICBasePointerSaveIndex(); assert(FI && "No PIC Base Pointer Save Slot!"); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); if (RegInfo->hasBasePointer(MF)) { HasGPSaveArea = true; int FI = PFI->getBasePointerSaveIndex(); assert(FI && "No Base Pointer Save Slot!"); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } // General register save area starts right below the Floating-point // register save area. if (HasGPSaveArea || HasG8SaveArea) { // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { int FI = GPRegs[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { int FI = G8Regs[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } unsigned MinReg = std::min(TRI->getEncodingValue(MinGPR), TRI->getEncodingValue(MinG8R)); if (Subtarget.isPPC64()) { LowerBound -= (31 - MinReg + 1) * 8; } else { LowerBound -= (31 - MinReg + 1) * 4; } } // For 32-bit only, the CR save area is below the general register // save area. For 64-bit SVR4, the CR save area is addressed relative // to the stack pointer and hence does not need an adjustment here. // Only CR2 (the first nonvolatile spilled) has an associated frame // index so that we have a single uniform save area. if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) // Leave Darwin logic as-is. || (!Subtarget.isSVR4ABI() && (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } LowerBound -= 4; // The CR save area is always 4 bytes long. } if (HasVRSAVESaveArea) { // FIXME SVR4: Is it actually possible to have multiple elements in CSI // which have the VRSAVE register class? // Adjust the frame index of the VRSAVE spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (PPC::VRSAVERCRegClass.contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. } if (HasVRSaveArea) { // Insert alignment padding, we need 16-byte alignment. LowerBound = (LowerBound - 15) & ~(15); for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { int FI = VRegs[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } addScavengingSpillSlot(MF, RS); } void PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const { // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. // We need to have a scavenger spill slot for spills if the frame size is // large. In case there is no free register for large-offset addressing, // this slot is used for the necessary emergency spill. Also, we need the // slot for dynamic stack allocations. // The scavenger might be invoked if the frame offset does not fit into // the 16-bit immediate. We don't know the complete frame size here // because we've not yet computed callee-saved register spills or the // needed alignment padding. unsigned StackSize = determineFrameLayout(MF, false, true); MachineFrameInfo *MFI = MF.getFrameInfo(); if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); // Might we have over-aligned allocas? bool HasAlVars = MFI->hasVarSizedObjects() && MFI->getMaxAlignment() > getStackAlignment(); // These kinds of spills might need two registers. if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } } bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. // Return false otherwise to maintain pre-existing behavior. if (!Subtarget.isSVR4ABI()) return false; MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *static_cast(Subtarget.getInstrInfo()); DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); // Only Darwin actually uses the VRSAVE register, but it can still appear // here if, for example, @llvm.eh.unwind.init() is used. If we're not on // Darwin, ignore it. if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) continue; // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; // Add the callee-saved register as live-in; it's killed at the spill. MBB.addLiveIn(Reg); if (CRSpilled && IsCRField) { CRMIB.addReg(Reg, RegState::ImplicitKill); continue; } // Insert the spill to the stack frame. if (IsCRField) { PPCFunctionInfo *FuncInfo = MF->getInfo(); if (Subtarget.isPPC64()) { // The actual spill will happen at the start of the prologue. FuncInfo->addMustSaveCR(Reg); } else { CRSpilled = true; FuncInfo->setSpillsCR(); // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) .addReg(Reg, RegState::ImplicitKill); MBB.insert(MI, CRMIB); MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) .addReg(PPC::R12, getKillRegState(true)), CSI[i].getFrameIdx())); } } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } return true; } static void restoreCRs(bool isPPC64, bool is31, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, unsigned CSIIndex) { MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); DebugLoc DL; unsigned RestoreOp, MoveReg; if (isPPC64) // This is handled during epilogue generation. return; else { // 32-bit: FP-relative MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::R12), CSI[CSIIndex].getFrameIdx())); RestoreOp = PPC::MTOCRF; MoveReg = PPC::R12; } if (CR2Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); if (CR3Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) .addReg(MoveReg, getKillRegState(!CR4Spilled))); if (CR4Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) .addReg(MoveReg, getKillRegState(true))); } void PPCFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); if (MF.getTarget().Options.GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { // Add (actually subtract) back the amount the callee popped on return. if (int CalleeAmt = I->getOperand(1).getImm()) { bool is64Bit = Subtarget.isPPC64(); CalleeAmt *= -1; unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; MachineInstr *MI = I; DebugLoc dl = MI->getDebugLoc(); if (isInt<16>(CalleeAmt)) { BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) .addReg(StackReg, RegState::Kill) .addImm(CalleeAmt); } else { MachineBasicBlock::iterator MBBI = I; BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) .addImm(CalleeAmt >> 16); BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) .addReg(TmpReg, RegState::Kill) .addImm(CalleeAmt & 0xFFFF); BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) .addReg(StackReg, RegState::Kill) .addReg(TmpReg); } } } // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. MBB.erase(I); } bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. // Return false otherwise to maintain pre-existing behavior. if (!Subtarget.isSVR4ABI()) return false; MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = *static_cast(Subtarget.getInstrInfo()); bool CR2Spilled = false; bool CR3Spilled = false; bool CR4Spilled = false; unsigned CSIIndex = 0; // Initialize insertion-point logic; we will be restoring in reverse // order of spill. MachineBasicBlock::iterator I = MI, BeforeI = I; bool AtStart = I == MBB.begin(); if (!AtStart) --BeforeI; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); // Only Darwin actually uses the VRSAVE register, but it can still appear // here if, for example, @llvm.eh.unwind.init() is used. If we're not on // Darwin, ignore it. if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) continue; if (Reg == PPC::CR2) { CR2Spilled = true; // The spill slot is associated only with CR2, which is the // first nonvolatile spilled. Save it here. CSIIndex = i; continue; } else if (Reg == PPC::CR3) { CR3Spilled = true; continue; } else if (Reg == PPC::CR4) { CR4Spilled = true; continue; } else { // When we first encounter a non-CR register after seeing at // least one CR register, restore all spilled CRs together. if ((CR2Spilled || CR3Spilled || CR4Spilled) && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { bool is31 = needsFP(*MF); restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); CR2Spilled = CR3Spilled = CR4Spilled = false; } // Default behavior for non-CR saves. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); assert(I != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } // Insert in reverse order. if (AtStart) I = MBB.begin(); else { I = BeforeI; ++I; } } // If we haven't yet spilled the CRs, do so now. if (CR2Spilled || CR3Spilled || CR4Spilled) { bool is31 = needsFP(*MF); restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); } return true; } bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { return (MF.getSubtarget().isSVR4ABI() && MF.getSubtarget().isPPC64()); } Index: projects/clang380-import/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h (revision 296011) @@ -1,129 +1,142 @@ //===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H #define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H #include "PPC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { class PPCSubtarget; class PPCFrameLowering: public TargetFrameLowering { const PPCSubtarget &Subtarget; const unsigned ReturnSaveOffset; const unsigned TOCSaveOffset; const unsigned FramePointerSaveOffset; const unsigned LinkageSize; const unsigned BasePointerSaveOffset; /** - * \brief Find a register that can be used in function prologue and epilogue + * \brief Find register[s] that can be used in function prologue and epilogue * - * Find a register that can be use as the scratch register in function + * Find register[s] that can be use as scratch register[s] in function * prologue and epilogue to save various registers (Link Register, Base - * Pointer, etc.). Prefer R0, if it is available. If it is not available, - * then choose a different register. + * Pointer, etc.). Prefer R0/R12, if available. Otherwise choose whatever + * register[s] are available. * - * This method will return true if an available register was found (including - * R0). If no available registers are found, the method returns false and sets - * ScratchRegister to R0, as per the recommendation in the ABI. + * This method will return true if it is able to find enough unique scratch + * registers (1 or 2 depending on the requirement). If it is unable to find + * enough available registers in the block, it will return false and set + * any passed output parameter that corresponds to a required unique register + * to PPC::NoRegister. * * \param[in] MBB The machine basic block to find an available register for * \param[in] UseAtEnd Specify whether the scratch register will be used at * the end of the basic block (i.e., will the scratch * register kill a register defined in the basic block) - * \param[out] ScratchRegister The scratch register to use - * \return true if a scratch register was found. false of a scratch register - * was not found and R0 is being used as the default. + * \param[in] TwoUniqueRegsRequired Specify whether this basic block will + * require two unique scratch registers. + * \param[out] SR1 The scratch register to use + * \param[out] SR2 The second scratch register. If this pointer is not null + * the function will attempt to set it to an available + * register regardless of whether there is a hard requirement + * for two unique scratch registers. + * \return true if the required number of registers was found. + * false if the required number of scratch register weren't available. + * If either output parameter refers to a required scratch register + * that isn't available, it will be set to an invalid value. */ bool findScratchRegister(MachineBasicBlock *MBB, bool UseAtEnd, - unsigned *ScratchRegister) const; + bool TwoUniqueRegsRequired = false, + unsigned *SR1 = nullptr, + unsigned *SR2 = nullptr) const; + bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const; public: PPCFrameLowering(const PPCSubtarget &STI); unsigned determineFrameLayout(MachineFunction &MF, bool UpdateMF = true, bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; bool needsFP(const MachineFunction &MF) const; void replaceFPWithRealFP(MachineFunction &MF) const; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const override; void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const override; /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). bool targetHandlesStackFrameRounding() const override { return true; } /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. unsigned getReturnSaveOffset() const { return ReturnSaveOffset; } /// getTOCSaveOffset - Return the previous frame offset to save the /// TOC register -- 64-bit SVR4 ABI only. unsigned getTOCSaveOffset() const { return TOCSaveOffset; } /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; } /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// unsigned getLinkageSize() const { return LinkageSize; } const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override; bool enableShrinkWrapping(const MachineFunction &MF) const override; /// Methods used by shrink wrapping to determine if MBB can be used for the /// function prologue/epilogue. bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; }; } // End llvm namespace #endif Index: projects/clang380-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 296011) @@ -1,28922 +1,28956 @@ //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that X86 uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86CallingConv.h" #include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86ShuffleDecodeConstantPool.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include "X86IntrinsicsInfo.h" #include #include #include using namespace llvm; #define DEBUG_TYPE "x86-isel" STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt ExperimentalVectorWideningLegalization( "x86-experimental-vector-widening-legalization", cl::init(false), cl::desc("Enable an experimental vector type legalization through widening " "rather than promotion."), cl::Hidden); X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); // Set up the TargetLowering object. // X86 is weird. It always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. if (Subtarget->isAtom()) setSchedulingPreference(Sched::ILP); else if (Subtarget->is64Bit()) setSchedulingPreference(Sched::ILP); else setSchedulingPreference(Sched::RegPressure); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); // Bypass expensive divides on Atom when compiling with O2. if (TM.getOptLevel() >= CodeGenOpt::Default) { if (Subtarget->hasSlowDivide32()) addBypassSlowDiv(32, 8); if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit()) addBypassSlowDiv(64, 16); } if (Subtarget->isTargetKnownWindowsMSVC()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); setLibcallName(RTLIB::SREM_I64, "_allrem"); setLibcallName(RTLIB::UREM_I64, "_aullrem"); setLibcallName(RTLIB::MUL_I64, "_allmul"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); } if (Subtarget->isTargetDarwin()) { // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(false); setUseUnderscoreLongJmp(false); } else if (Subtarget->isTargetWindowsGNU()) { // MS runtime is weird: it exports _setjmp, but longjmp! setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(false); } else { setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); } // Set up the register classes. addRegisterClass(MVT::i8, &X86::GR8RegClass); addRegisterClass(MVT::i16, &X86::GR16RegClass); addRegisterClass(MVT::i32, &X86::GR32RegClass); if (Subtarget->is64Bit()) addRegisterClass(MVT::i64, &X86::GR64RegClass); for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i8 , Expand); setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SETOEQ and SETUNE require checking two conditions. setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); if (Subtarget->is64Bit()) { if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) // f32/f64 are legal, f80 is custom. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); else setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); } else if (!Subtarget->useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD or VCVTUSI2SS/SD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have // this operation. setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); if (!Subtarget->useSoftFloat()) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); // f32 and f64 cases are Legal, f80 case is not setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); } else { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); } } else { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); } // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have // this operation. setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); if (!Subtarget->useSoftFloat()) { // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 // are Legal, f80 is custom lowered. setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); if (X86ScalarSSEf32) { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); // f32 and f64 cases are Legal, f80 case is not setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); } else { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); } } else { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); } // Handle FP_TO_UINT by promoting the destination to a larger signed // conversion. setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); if (Subtarget->is64Bit()) { if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); } else { setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); } } else if (!Subtarget->useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); else // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. // With SSE3 we can use fisttpll to convert to a signed i64; without // SSE, we're stuck with a fistpll. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); setOperationAction(ISD::BITCAST , MVT::i32 , Expand); if (Subtarget->is64Bit()) { setOperationAction(ISD::BITCAST , MVT::f64 , Expand); // Without SSE, i64->f64 goes through memory. setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } } else if (!Subtarget->is64Bit()) setOperationAction(ISD::BITCAST , MVT::i64 , Custom); // Scalar integer divide and remainder are lowered to use operations that // produce two results, to match the available instructions. This exposes // the two-result form to trivial CSE, which is able to combine x/y and x%y // into a single instruction. // // Scalar integer multiply-high is also lowered to use two-result // operations, to match the available instructions. However, plain multiply // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. setOperationAction(ISD::ADDC, VT, Custom); setOperationAction(ISD::ADDE, VT, Custom); setOperationAction(ISD::SUBC, VT, Custom); setOperationAction(ISD::SUBE, VT, Custom); } setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); setOperationAction(ISD::BR_CC , MVT::f32, Expand); setOperationAction(ISD::BR_CC , MVT::f64, Expand); setOperationAction(ISD::BR_CC , MVT::f80, Expand); setOperationAction(ISD::BR_CC , MVT::f128, Expand); setOperationAction(ISD::BR_CC , MVT::i8, Expand); setOperationAction(ISD::BR_CC , MVT::i16, Expand); setOperationAction(ISD::BR_CC , MVT::i32, Expand); setOperationAction(ISD::BR_CC , MVT::i64, Expand); setOperationAction(ISD::SELECT_CC , MVT::f32, Expand); setOperationAction(ISD::SELECT_CC , MVT::f64, Expand); setOperationAction(ISD::SELECT_CC , MVT::f80, Expand); setOperationAction(ISD::SELECT_CC , MVT::f128, Expand); setOperationAction(ISD::SELECT_CC , MVT::i8, Expand); setOperationAction(ISD::SELECT_CC , MVT::i16, Expand); setOperationAction(ISD::SELECT_CC , MVT::i32, Expand); setOperationAction(ISD::SELECT_CC , MVT::i64, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) { // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` // is. We should promote the value to 64-bits to solve this. // This is what the CRT headers do - `fmodf` is an inline header // function casting to f64 and calling `fmod`. setOperationAction(ISD::FREM , MVT::f32 , Promote); } else { setOperationAction(ISD::FREM , MVT::f32 , Expand); } setOperationAction(ISD::FREM , MVT::f64 , Expand); setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); // Promote the i8 variants and force them on up to i32 which has a shorter // encoding. setOperationAction(ISD::CTTZ , MVT::i8 , Promote); AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32); setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote); AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32); if (Subtarget->hasBMI()) { setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); } else { setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) setOperationAction(ISD::CTTZ , MVT::i64 , Custom); } if (Subtarget->hasLZCNT()) { // When promoting the i8 variants, force them to i32 for a shorter // encoding. setOperationAction(ISD::CTLZ , MVT::i8 , Promote); AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote); AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); } else { setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::CTLZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); } } // Special handling for half-precision floating point conversions. // If we don't have F16C support, then lower half float conversions // into library calls. if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } // There's never any support for operations beyond MVT::f32. setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f80, MVT::f16, Expand); if (Subtarget->hasPOPCNT()) { setOperationAction(ISD::CTPOP , MVT::i8 , Promote); } else { setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTPOP , MVT::i32 , Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::CTPOP , MVT::i64 , Expand); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); if (!Subtarget->hasMOVBE()) setOperationAction(ISD::BSWAP , MVT::i16 , Expand); // These should be promoted to a larger select which is supported. setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. setOperationAction(ISD::SELECT , MVT::i8 , Custom); setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); setOperationAction(ISD::SELECT , MVT::f128 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); setOperationAction(ISD::SETCC , MVT::f128 , Custom); setOperationAction(ISD::SETCCE , MVT::i8 , Custom); setOperationAction(ISD::SETCCE , MVT::i16 , Custom); setOperationAction(ISD::SETCCE , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); setOperationAction(ISD::SETCCE , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build // your own exception handling based on them. // LLVM/Clang supports zero-cost DWARF exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // Darwin ABI issue. setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); setOperationAction(ISD::JumpTable , MVT::i32 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); if (Subtarget->is64Bit()) setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); setOperationAction(ISD::BlockAddress , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); setOperationAction(ISD::JumpTable , MVT::i64 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); setOperationAction(ISD::BlockAddress , MVT::i64 , Custom); } // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); // Expand certain atomics for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } if (Subtarget->hasCmpxchg16b()) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); if (Subtarget->is64Bit()) { setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { // TargetInfo::CharPtrBuiltinVaList setOperationAction(ISD::VAARG , MVT::Other, Expand); setOperationAction(ISD::VACOPY , MVT::Other, Expand); } setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); addRegisterClass(MVT::f64, &X86::FR64RegClass); // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS , MVT::f64, Custom); setOperationAction(ISD::FABS , MVT::f32, Custom); // Use XORP to simulate FNEG. setOperationAction(ISD::FNEG , MVT::f64, Custom); setOperationAction(ISD::FNEG , MVT::f32, Custom); // Use ANDPD and ORPD to simulate FCOPYSIGN. setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // Lower this to FGETSIGNx86 plus an AND. setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); // We don't support sin/cos/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Expand FP immediates into loads from the stack, except for the special // cases we handle. addLegalFPImmediate(APFloat(+0.0)); // xorpd addLegalFPImmediate(APFloat(+0.0f)); // xorps } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. setOperationAction(ISD::FABS , MVT::f32, Custom); // Use XORP to simulate FNEG. setOperationAction(ISD::FNEG , MVT::f32, Custom); setOperationAction(ISD::UNDEF, MVT::f64, Expand); // Use ANDPS and ORPS to simulate FCOPYSIGN. setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Special cases we handle for FP constants. addLegalFPImmediate(APFloat(+0.0f)); // xorps addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+1.0)); // FLD1 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } } else if (!Subtarget->useSoftFloat()) { // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, &X86::RFP64RegClass); addRegisterClass(MVT::f32, &X86::RFP32RegClass); setOperationAction(ISD::UNDEF, MVT::f64, Expand); setOperationAction(ISD::UNDEF, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+1.0)); // FLD1 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS addLegalFPImmediate(APFloat(+0.0f)); // FLD0 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } // We don't support FMA. setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); // Long double always uses X87, except f128 in MMX. if (!Subtarget->useSoftFloat()) { if (Subtarget->is64Bit() && Subtarget->hasMMX()) { addRegisterClass(MVT::f128, &X86::FR128RegClass); ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); setOperationAction(ISD::FABS , MVT::f128, Custom); setOperationAction(ISD::FNEG , MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); } addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended); addLegalFPImmediate(TmpFlt); // FLD0 TmpFlt.changeSign(); addLegalFPImmediate(TmpFlt); // FLD0/FCHS bool ignored; APFloat TmpFlt2(+1.0); TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, &ignored); addLegalFPImmediate(TmpFlt2); // FLD1 TmpFlt2.changeSign(); addLegalFPImmediate(TmpFlt2); // FLD1/FCHS } if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f80, Expand); setOperationAction(ISD::FCOS , MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); } setOperationAction(ISD::FFLOOR, MVT::f80, Expand); setOperationAction(ISD::FCEIL, MVT::f80, Expand); setOperationAction(ISD::FTRUNC, MVT::f80, Expand); setOperationAction(ISD::FRINT, MVT::f80, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. setOperationAction(ISD::FPOW , MVT::f32 , Expand); setOperationAction(ISD::FPOW , MVT::f64 , Expand); setOperationAction(ISD::FPOW , MVT::f80 , Expand); setOperationAction(ISD::FLOG, MVT::f80, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); setOperationAction(ISD::FLOG10, MVT::f80, Expand); setOperationAction(ISD::FEXP, MVT::f80, Expand); setOperationAction(ISD::FEXP2, MVT::f80, Expand); setOperationAction(ISD::FMINNUM, MVT::f80, Expand); setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::ADD , VT, Expand); setOperationAction(ISD::SUB , VT, Expand); setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::LOAD, VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::SHL, VT, Expand); setOperationAction(ISD::SRA, VT, Expand); setOperationAction(ISD::SRL, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::SETCC, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); setOperationAction(ISD::TRUNCATE, VT, Expand); setOperationAction(ISD::SIGN_EXTEND, VT, Expand); setOperationAction(ISD::ZERO_EXTEND, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(InnerVT, VT, Expand); setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like // types, we have to deal with them whether we ask for Expansion or not. // Setting Expand causes its own optimisation problems though, so leave // them legal. if (VT.getVectorElementType() == MVT::i1) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are // split/scalarized right now. if (VT.getVectorElementType() == MVT::f16) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); } } // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) { addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); // No operations on x86mmx supported, everything uses intrinsics. } // MMX-sized vectors (other than x86mmx) are expected to be expanded // into smaller operations. for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) { setOperationAction(ISD::MULHS, MMXTy, Expand); setOperationAction(ISD::AND, MMXTy, Expand); setOperationAction(ISD::OR, MMXTy, Expand); setOperationAction(ISD::XOR, MMXTy, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand); setOperationAction(ISD::SELECT, MMXTy, Expand); setOperationAction(ISD::BITCAST, MMXTy, Expand); } setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, &X86::VR128RegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::FABS, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); } if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) { addRegisterClass(MVT::v2f64, &X86::VR128RegClass); // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM // registers cannot be used even for integer operations. addRegisterClass(MVT::v16i8, &X86::VR128RegClass); addRegisterClass(MVT::v8i16, &X86::VR128RegClass); addRegisterClass(MVT::v4i32, &X86::VR128RegClass); addRegisterClass(MVT::v2i64, &X86::VR128RegClass); setOperationAction(ISD::ADD, MVT::v16i8, Legal); setOperationAction(ISD::ADD, MVT::v8i16, Legal); setOperationAction(ISD::ADD, MVT::v4i32, Legal); setOperationAction(ISD::ADD, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom); setOperationAction(ISD::MULHU, MVT::v8i16, Legal); setOperationAction(ISD::MULHS, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); setOperationAction(ISD::SUB, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::FADD, MVT::v2f64, Legal); setOperationAction(ISD::FSUB, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); setOperationAction(ISD::FABS, MVT::v2f64, Custom); setOperationAction(ISD::SMAX, MVT::v8i16, Legal); setOperationAction(ISD::UMAX, MVT::v16i8, Legal); setOperationAction(ISD::SMIN, MVT::v8i16, Legal); setOperationAction(ISD::UMIN, MVT::v16i8, Legal); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); setOperationAction(ISD::SETCC, MVT::v8i16, Custom); setOperationAction(ISD::SETCC, MVT::v4i32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::CTPOP, MVT::v16i8, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); // ISD::CTTZ v2i64 - scalarization is faster. setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster. // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } // We support custom legalizing of sext and anyext loads for specific // memory vector types which we can load as a scalar (or sequence of // scalars) and extend in-register to a legal 128-bit vector type. For sext // loads these must work with a single scalar load. for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::VSELECT, MVT::v2f64, Custom); setOperationAction(ISD::VSELECT, MVT::v2i64, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v2i64); setOperationAction(ISD::OR, VT, Promote); AddPromotedToType (ISD::OR, VT, MVT::v2i64); setOperationAction(ISD::XOR, VT, Promote); AddPromotedToType (ISD::XOR, VT, MVT::v2i64); setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } // Custom lower v2i64 and v2f64 selects. setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::LOAD, MVT::v2i64, Legal); setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); // As there is no 64-bit GPR available, we need build a special custom // sequence to convert from v2i32 to v2f32. if (!Subtarget->is64Bit()) setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal); setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); } if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) { for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, RoundedTy, Legal); setOperationAction(ISD::FCEIL, RoundedTy, Legal); setOperationAction(ISD::FTRUNC, RoundedTy, Legal); setOperationAction(ISD::FRINT, RoundedTy, Legal); setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); } setOperationAction(ISD::SMAX, MVT::v16i8, Legal); setOperationAction(ISD::SMAX, MVT::v4i32, Legal); setOperationAction(ISD::UMAX, MVT::v8i16, Legal); setOperationAction(ISD::UMAX, MVT::v4i32, Legal); setOperationAction(ISD::SMIN, MVT::v16i8, Legal); setOperationAction(ISD::SMIN, MVT::v4i32, Legal); setOperationAction(ISD::UMIN, MVT::v8i16, Legal); setOperationAction(ISD::UMIN, MVT::v4i32, Legal); // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); // We directly match byte blends in the backend as they match the VSELECT // condition form. setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); // SSE41 brings specific instructions for doing vector sign extend even in // cases where we don't have SRA. for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); } // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); // i8 and i16 vectors are custom because the source register and source // source memory operand types are not the same width. f32 vectors are // custom since the immediate controlling the insert encodes additional // information. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); // FIXME: these should be Legal, but that's only for the case where // the index is constant. For now custom expand to deal with that. if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } } if (Subtarget->hasSSE2()) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); setOperationAction(ISD::SRL, MVT::v8i16, Custom); setOperationAction(ISD::SRL, MVT::v16i8, Custom); setOperationAction(ISD::SHL, MVT::v8i16, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); // In the customized shift lowering, the legal cases in AVX2 will be // recognized. setOperationAction(ISD::SRL, MVT::v2i64, Custom); setOperationAction(ISD::SRL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v2i64, Custom); setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v2i64, Custom); setOperationAction(ISD::SRA, MVT::v4i32, Custom); } if (Subtarget->hasXOP()) { setOperationAction(ISD::ROTL, MVT::v16i8, Custom); setOperationAction(ISD::ROTL, MVT::v8i16, Custom); setOperationAction(ISD::ROTL, MVT::v4i32, Custom); setOperationAction(ISD::ROTL, MVT::v2i64, Custom); setOperationAction(ISD::ROTL, MVT::v32i8, Custom); setOperationAction(ISD::ROTL, MVT::v16i16, Custom); setOperationAction(ISD::ROTL, MVT::v8i32, Custom); setOperationAction(ISD::ROTL, MVT::v4i64, Custom); } if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) { addRegisterClass(MVT::v32i8, &X86::VR256RegClass); addRegisterClass(MVT::v16i16, &X86::VR256RegClass); addRegisterClass(MVT::v8i32, &X86::VR256RegClass); addRegisterClass(MVT::v8f32, &X86::VR256RegClass); addRegisterClass(MVT::v4i64, &X86::VR256RegClass); addRegisterClass(MVT::v4f64, &X86::VR256RegClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); setOperationAction(ISD::FADD, MVT::v8f32, Legal); setOperationAction(ISD::FSUB, MVT::v8f32, Legal); setOperationAction(ISD::FMUL, MVT::v8f32, Legal); setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal); setOperationAction(ISD::FCEIL, MVT::v8f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal); setOperationAction(ISD::FRINT, MVT::v8f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); setOperationAction(ISD::FABS, MVT::v8f32, Custom); setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); setOperationAction(ISD::FRINT, MVT::v4f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); setOperationAction(ISD::FABS, MVT::v4f64, Custom); // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted // even though v8i16 is a legal type. setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal); setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); setOperationAction(ISD::SHL, MVT::v16i16, Custom); setOperationAction(ISD::SHL, MVT::v32i8, Custom); setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); setOperationAction(ISD::SETCC, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v4f64, Custom); setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v32i8, Custom); setOperationAction(ISD::CTPOP, MVT::v16i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); setOperationAction(ISD::CTTZ, MVT::v32i8, Custom); setOperationAction(ISD::CTTZ, MVT::v16i16, Custom); setOperationAction(ISD::CTTZ, MVT::v8i32, Custom); setOperationAction(ISD::CTTZ, MVT::v4i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); if (Subtarget->hasAnyFMA()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); } if (Subtarget->hasInt256()) { setOperationAction(ISD::ADD, MVT::v4i64, Legal); setOperationAction(ISD::ADD, MVT::v8i32, Legal); setOperationAction(ISD::ADD, MVT::v16i16, Legal); setOperationAction(ISD::ADD, MVT::v32i8, Legal); setOperationAction(ISD::SUB, MVT::v4i64, Legal); setOperationAction(ISD::SUB, MVT::v8i32, Legal); setOperationAction(ISD::SUB, MVT::v16i16, Legal); setOperationAction(ISD::SUB, MVT::v32i8, Legal); setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Legal); setOperationAction(ISD::MUL, MVT::v16i16, Legal); setOperationAction(ISD::MUL, MVT::v32i8, Custom); setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom); setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom); setOperationAction(ISD::MULHU, MVT::v16i16, Legal); setOperationAction(ISD::MULHS, MVT::v16i16, Legal); setOperationAction(ISD::SMAX, MVT::v32i8, Legal); setOperationAction(ISD::SMAX, MVT::v16i16, Legal); setOperationAction(ISD::SMAX, MVT::v8i32, Legal); setOperationAction(ISD::UMAX, MVT::v32i8, Legal); setOperationAction(ISD::UMAX, MVT::v16i16, Legal); setOperationAction(ISD::UMAX, MVT::v8i32, Legal); setOperationAction(ISD::SMIN, MVT::v32i8, Legal); setOperationAction(ISD::SMIN, MVT::v16i16, Legal); setOperationAction(ISD::SMIN, MVT::v8i32, Legal); setOperationAction(ISD::UMIN, MVT::v32i8, Legal); setOperationAction(ISD::UMIN, MVT::v16i16, Legal); setOperationAction(ISD::UMIN, MVT::v8i32, Legal); // The custom lowering for UINT_TO_FP for v8i32 becomes interesting // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); setOperationAction(ISD::ADD, MVT::v16i16, Custom); setOperationAction(ISD::ADD, MVT::v32i8, Custom); setOperationAction(ISD::SUB, MVT::v4i64, Custom); setOperationAction(ISD::SUB, MVT::v8i32, Custom); setOperationAction(ISD::SUB, MVT::v16i16, Custom); setOperationAction(ISD::SUB, MVT::v32i8, Custom); setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); setOperationAction(ISD::MUL, MVT::v32i8, Custom); setOperationAction(ISD::SMAX, MVT::v32i8, Custom); setOperationAction(ISD::SMAX, MVT::v16i16, Custom); setOperationAction(ISD::SMAX, MVT::v8i32, Custom); setOperationAction(ISD::UMAX, MVT::v32i8, Custom); setOperationAction(ISD::UMAX, MVT::v16i16, Custom); setOperationAction(ISD::UMAX, MVT::v8i32, Custom); setOperationAction(ISD::SMIN, MVT::v32i8, Custom); setOperationAction(ISD::SMIN, MVT::v16i16, Custom); setOperationAction(ISD::SMIN, MVT::v8i32, Custom); setOperationAction(ISD::UMIN, MVT::v32i8, Custom); setOperationAction(ISD::UMIN, MVT::v16i16, Custom); setOperationAction(ISD::UMIN, MVT::v8i32, Custom); } // In the customized shift lowering, the legal cases in AVX2 will be // recognized. setOperationAction(ISD::SRL, MVT::v4i64, Custom); setOperationAction(ISD::SRL, MVT::v8i32, Custom); setOperationAction(ISD::SHL, MVT::v4i64, Custom); setOperationAction(ISD::SHL, MVT::v8i32, Custom); setOperationAction(ISD::SRA, MVT::v4i64, Custom); setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. for (MVT VT : MVT::vector_valuetypes()) { if (VT.getScalarSizeInBits() >= 32) { setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); } // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. if (VT.is128BitVector()) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); } // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } if (Subtarget->hasInt256()) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v4i64); setOperationAction(ISD::OR, VT, Promote); AddPromotedToType (ISD::OR, VT, MVT::v4i64); setOperationAction(ISD::XOR, VT, Promote); AddPromotedToType (ISD::XOR, VT, MVT::v4i64); setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); } } if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { addRegisterClass(MVT::v16i32, &X86::VR512RegClass); addRegisterClass(MVT::v16f32, &X86::VR512RegClass); addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::i1, &X86::VK1RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Custom); setOperationAction(ISD::SETCCE, MVT::i1, Custom); setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); setOperationAction(ISD::XOR, MVT::i1, Legal); setOperationAction(ISD::OR, MVT::i1, Legal); setOperationAction(ISD::AND, MVT::i1, Legal); setOperationAction(ISD::SUB, MVT::i1, Custom); setOperationAction(ISD::ADD, MVT::i1, Custom); setOperationAction(ISD::MUL, MVT::i1, Custom); setOperationAction(ISD::LOAD, MVT::v16f32, Legal); setOperationAction(ISD::LOAD, MVT::v8f64, Legal); setOperationAction(ISD::LOAD, MVT::v8i64, Legal); setOperationAction(ISD::LOAD, MVT::v16i32, Legal); setOperationAction(ISD::LOAD, MVT::v16i1, Legal); setOperationAction(ISD::FADD, MVT::v16f32, Legal); setOperationAction(ISD::FSUB, MVT::v16f32, Legal); setOperationAction(ISD::FMUL, MVT::v16f32, Legal); setOperationAction(ISD::FDIV, MVT::v16f32, Legal); setOperationAction(ISD::FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::FNEG, MVT::v16f32, Custom); setOperationAction(ISD::FABS, MVT::v16f32, Custom); setOperationAction(ISD::FADD, MVT::v8f64, Legal); setOperationAction(ISD::FSUB, MVT::v8f64, Legal); setOperationAction(ISD::FMUL, MVT::v8f64, Legal); setOperationAction(ISD::FDIV, MVT::v8f64, Legal); setOperationAction(ISD::FSQRT, MVT::v8f64, Legal); setOperationAction(ISD::FNEG, MVT::v8f64, Custom); setOperationAction(ISD::FABS, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); if (Subtarget->hasVLX()){ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); } else { setOperationAction(ISD::MLOAD, MVT::v8i32, Custom); setOperationAction(ISD::MLOAD, MVT::v8f32, Custom); setOperationAction(ISD::MSTORE, MVT::v8i32, Custom); setOperationAction(ISD::MSTORE, MVT::v8f32, Custom); } setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom); if (Subtarget->hasDQI()) { setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); if (Subtarget->hasVLX()) { setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); } } if (Subtarget->hasVLX()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); } setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); if (Subtarget->hasDQI()) { setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); } setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal); setOperationAction(ISD::FCEIL, MVT::v16f32, Legal); setOperationAction(ISD::FCEIL, MVT::v8f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v16f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v8f64, Legal); setOperationAction(ISD::FRINT, MVT::v16f32, Legal); setOperationAction(ISD::FRINT, MVT::v8f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v16f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v8f64, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v8i1, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); setOperationAction(ISD::SELECT, MVT::v16i1, Custom); setOperationAction(ISD::SELECT, MVT::v8i1, Custom); setOperationAction(ISD::SMAX, MVT::v16i32, Legal); setOperationAction(ISD::SMAX, MVT::v8i64, Legal); setOperationAction(ISD::UMAX, MVT::v16i32, Legal); setOperationAction(ISD::UMAX, MVT::v8i64, Legal); setOperationAction(ISD::SMIN, MVT::v16i32, Legal); setOperationAction(ISD::SMIN, MVT::v8i64, Legal); setOperationAction(ISD::UMIN, MVT::v16i32, Legal); setOperationAction(ISD::UMIN, MVT::v8i64, Legal); setOperationAction(ISD::ADD, MVT::v8i64, Legal); setOperationAction(ISD::ADD, MVT::v16i32, Legal); setOperationAction(ISD::SUB, MVT::v8i64, Legal); setOperationAction(ISD::SUB, MVT::v16i32, Legal); setOperationAction(ISD::MUL, MVT::v16i32, Legal); setOperationAction(ISD::SRL, MVT::v8i64, Custom); setOperationAction(ISD::SRL, MVT::v16i32, Custom); setOperationAction(ISD::SHL, MVT::v8i64, Custom); setOperationAction(ISD::SHL, MVT::v16i32, Custom); setOperationAction(ISD::SRA, MVT::v8i64, Custom); setOperationAction(ISD::SRA, MVT::v16i32, Custom); setOperationAction(ISD::AND, MVT::v8i64, Legal); setOperationAction(ISD::OR, MVT::v8i64, Legal); setOperationAction(ISD::XOR, MVT::v8i64, Legal); setOperationAction(ISD::AND, MVT::v16i32, Legal); setOperationAction(ISD::OR, MVT::v16i32, Legal); setOperationAction(ISD::XOR, MVT::v16i32, Legal); if (Subtarget->hasCDI()) { setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand); setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v16i16, Custom); setOperationAction(ISD::CTLZ, MVT::v32i8, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom); if (Subtarget->hasVLX()) { setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); } else { setOperationAction(ISD::CTLZ, MVT::v4i64, Custom); setOperationAction(ISD::CTLZ, MVT::v8i32, Custom); setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand); } } // Subtarget->hasCDI() if (Subtarget->hasDQI()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i64, Legal); setOperationAction(ISD::MUL, MVT::v8i64, Legal); } // Custom lower several nodes. for (MVT VT : MVT::vector_valuetypes()) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize == 1) { setOperationAction(ISD::AND, VT, Legal); setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); } if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) { setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } // Extract subvector is special because the value type // (result) is 256/128-bit but the source is 512-bit wide. if (VT.is128BitVector() || VT.is256BitVector()) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); } if (VT.getVectorElementType() == MVT::i1) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); // Do not attempt to custom lower other non-512-bit vectors if (!VT.is512BitVector()) continue; if (EltSize >= 32) { setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::MGATHER, VT, Legal); setOperationAction(ISD::MSCATTER, VT, Custom); } } for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) { setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v8i64); } }// has AVX-512 if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) { addRegisterClass(MVT::v32i16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); addRegisterClass(MVT::v32i1, &X86::VK32RegClass); addRegisterClass(MVT::v64i1, &X86::VK64RegClass); setOperationAction(ISD::LOAD, MVT::v32i16, Legal); setOperationAction(ISD::LOAD, MVT::v64i8, Legal); setOperationAction(ISD::SETCC, MVT::v32i1, Custom); setOperationAction(ISD::SETCC, MVT::v64i1, Custom); setOperationAction(ISD::ADD, MVT::v32i16, Legal); setOperationAction(ISD::ADD, MVT::v64i8, Legal); setOperationAction(ISD::SUB, MVT::v32i16, Legal); setOperationAction(ISD::SUB, MVT::v64i8, Legal); setOperationAction(ISD::MUL, MVT::v32i16, Legal); setOperationAction(ISD::MULHS, MVT::v32i16, Legal); setOperationAction(ISD::MULHU, MVT::v32i16, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::SELECT, MVT::v32i1, Custom); setOperationAction(ISD::SELECT, MVT::v64i1, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::VSELECT, MVT::v32i16, Legal); setOperationAction(ISD::VSELECT, MVT::v64i8, Legal); setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom); setOperationAction(ISD::SMAX, MVT::v64i8, Legal); setOperationAction(ISD::SMAX, MVT::v32i16, Legal); setOperationAction(ISD::UMAX, MVT::v64i8, Legal); setOperationAction(ISD::UMAX, MVT::v32i16, Legal); setOperationAction(ISD::SMIN, MVT::v64i8, Legal); setOperationAction(ISD::SMIN, MVT::v32i16, Legal); setOperationAction(ISD::UMIN, MVT::v64i8, Legal); setOperationAction(ISD::UMIN, MVT::v32i16, Legal); setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); if (Subtarget->hasVLX()) setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); if (Subtarget->hasCDI()) { setOperationAction(ISD::CTLZ, MVT::v32i16, Custom); setOperationAction(ISD::CTLZ, MVT::v64i8, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand); } for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v8i64); setOperationAction(ISD::OR, VT, Promote); AddPromotedToType (ISD::OR, VT, MVT::v8i64); setOperationAction(ISD::XOR, VT, Promote); AddPromotedToType (ISD::XOR, VT, MVT::v8i64); } } if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) { addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); setOperationAction(ISD::SETCC, MVT::v4i1, Custom); setOperationAction(ISD::SETCC, MVT::v2i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); setOperationAction(ISD::SELECT, MVT::v4i1, Custom); setOperationAction(ISD::SELECT, MVT::v2i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom); setOperationAction(ISD::AND, MVT::v8i32, Legal); setOperationAction(ISD::OR, MVT::v8i32, Legal); setOperationAction(ISD::XOR, MVT::v8i32, Legal); setOperationAction(ISD::AND, MVT::v4i32, Legal); setOperationAction(ISD::OR, MVT::v4i32, Legal); setOperationAction(ISD::XOR, MVT::v4i32, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Custom); setOperationAction(ISD::SRA, MVT::v4i64, Custom); setOperationAction(ISD::SMAX, MVT::v2i64, Legal); setOperationAction(ISD::SMAX, MVT::v4i64, Legal); setOperationAction(ISD::UMAX, MVT::v2i64, Legal); setOperationAction(ISD::UMAX, MVT::v4i64, Legal); setOperationAction(ISD::SMIN, MVT::v2i64, Legal); setOperationAction(ISD::SMIN, MVT::v4i64, Legal); setOperationAction(ISD::UMIN, MVT::v2i64, Legal); setOperationAction(ISD::UMIN, MVT::v4i64, Legal); } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); if (!Subtarget->is64Bit()) { setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); } // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. // // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { if (VT == MVT::i64 && !Subtarget->is64Bit()) continue; // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::UADDO, VT, Custom); setOperationAction(ISD::SSUBO, VT, Custom); setOperationAction(ISD::USUBO, VT, Custom); setOperationAction(ISD::SMULO, VT, Custom); setOperationAction(ISD::UMULO, VT, Custom); } if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); } // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); if (Subtarget->isTargetDarwin()) { // For MacOSX, we don't want the normal expansion of a libcall to sincos. // We want to issue a libcall to __sincos_stret to avoid memory traffic. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } } if (Subtarget->isTargetWin64()) { setOperationAction(ISD::SDIV, MVT::i128, Custom); setOperationAction(ISD::UDIV, MVT::i128, Custom); setOperationAction(ISD::SREM, MVT::i128, Custom); setOperationAction(ISD::UREM, MVT::i128, Custom); setOperationAction(ISD::SDIVREM, MVT::i128, Custom); setOperationAction(ISD::UDIVREM, MVT::i128, Custom); } // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::FMINNUM); setTargetDAGCombine(ISD::FMAXNUM); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MLOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MSTORE); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::MSCATTER); setTargetDAGCombine(ISD::MGATHER); computeRegisterProperties(Subtarget->getRegisterInfo()); MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 4; setPrefLoopAlignment(4); // 2^4 bytes. // A predictable cmov does not hurt on an in-order CPU. // FIXME: Use a CPU attribute to trigger this, not a CPU model. PredictableSelectIsExpensive = !Subtarget->isAtom(); EnableExtLdPromotion = true; setPrefFunctionAlignment(4); // 2^4 bytes. verifyIntrinsicTables(); } // This has so far only been implemented for 64-bit MachO. bool X86TargetLowering::useLoadStackGuardNode() const { return Subtarget->isTargetMachO() && Subtarget->is64Bit(); } TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(EVT VT) const { if (ExperimentalVectorWideningLegalization && VT.getVectorNumElements() != 1 && VT.getVectorElementType().getSimpleVT() != MVT::i1) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; if (VT.isSimple()) { MVT VVT = VT.getSimpleVT(); const unsigned NumElts = VVT.getVectorNumElements(); const MVT EltVT = VVT.getVectorElementType(); if (VVT.is512BitVector()) { if (Subtarget->hasAVX512()) if (EltVT == MVT::i32 || EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64) switch(NumElts) { case 8: return MVT::v8i1; case 16: return MVT::v16i1; } if (Subtarget->hasBWI()) if (EltVT == MVT::i8 || EltVT == MVT::i16) switch(NumElts) { case 32: return MVT::v32i1; case 64: return MVT::v64i1; } } if (VVT.is256BitVector() || VVT.is128BitVector()) { if (Subtarget->hasVLX()) if (EltVT == MVT::i32 || EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64) switch(NumElts) { case 2: return MVT::v2i1; case 4: return MVT::v4i1; case 8: return MVT::v8i1; } if (Subtarget->hasBWI() && Subtarget->hasVLX()) if (EltVT == MVT::i8 || EltVT == MVT::i16) switch(NumElts) { case 8: return MVT::v8i1; case 16: return MVT::v16i1; case 32: return MVT::v32i1; } } } return VT.changeVectorElementTypeToInteger(); } /// Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { if (MaxAlign == 16) return; if (VectorType *VTy = dyn_cast(Ty)) { if (VTy->getBitWidth() == 128) MaxAlign = 16; } else if (ArrayType *ATy = dyn_cast(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; getMaxByValAlign(EltTy, EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == 16) break; } } } /// Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. For X86, aggregates /// that contain SSE vectors are placed at 16-byte boundaries while the rest /// are at 4-byte boundaries. unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { if (Subtarget->is64Bit()) { // Max of 8 and alignment of type. unsigned TyAlign = DL.getABITypeAlignment(Ty); if (TyAlign > 8) return TyAlign; return 8; } unsigned Align = 4; if (Subtarget->hasSSE1()) getMaxByValAlign(Ty, Align); return Align; } /// Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); if ((!IsMemset || ZeroMemset) && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && (!Subtarget->isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16)))) { if (Size >= 32) { // FIXME: Check if unaligned 32-byte accesses are slow. if (Subtarget->hasInt256()) return MVT::v8i32; if (Subtarget->hasFp256()) return MVT::v8f32; } if (Subtarget->hasSSE2()) return MVT::v4i32; if (Subtarget->hasSSE1()) return MVT::v4f32; } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. return MVT::f64; } } // This is a compromise. If we reach here, unaligned accesses may be slow on // this target. However, creating smaller, aligned accesses could be even // slower and would certainly be a lot more code. if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; return MVT::i32; } bool X86TargetLowering::isSafeMemOpType(MVT VT) const { if (VT == MVT::f32) return X86ScalarSSEf32; else if (VT == MVT::f64) return X86ScalarSSEf64; return true; } bool X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { if (Fast) { switch (VT.getSizeInBits()) { default: // 8-byte and under are always assumed to be fast. *Fast = true; break; case 128: *Fast = !Subtarget->isUnalignedMem16Slow(); break; case 256: *Fast = !Subtarget->isUnalignedMem32Slow(); break; // TODO: What about AVX-512 (512-bit) accesses? } } // Misaligned accesses of any size are always allowed. return true; } /// Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. unsigned X86TargetLowering::getJumpTableEncoding() const { // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF // symbol. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && Subtarget->isPICStyleGOT()) return MachineJumpTableInfo::EK_Custom32; // Otherwise, use the normal jump table encoding heuristics. return TargetLowering::getJumpTableEncoding(); } bool X86TargetLowering::useSoftFloat() const { return Subtarget->useSoftFloat(); } const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid,MCContext &Ctx) const{ assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ && Subtarget->isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF, Ctx); } /// Returns relocation base for the given PIC jumptable. SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (!Subtarget->is64Bit()) // This doesn't have SDLoc associated with it, but is not really the // same as a Register. return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy(DAG.getDataLayout())); return Table; } /// This returns the relocation base for the given PIC jumptable, /// the same as getPICJumpTableRelocBase, but as an MCExpr. const MCExpr *X86TargetLowering:: getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { // X86-64 uses RIP relative addressing based on the jump table label. if (Subtarget->isPICStyleRIPRel()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); } std::pair X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(TRI, VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; break; case MVT::x86mmx: RRC = &X86::VR64RegClass; break; case MVT::f32: case MVT::f64: case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: RRC = &X86::VR128RegClass; break; } return std::make_pair(RRC, Cost); } bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { if (!Subtarget->isTargetLinux()) return false; if (Subtarget->is64Bit()) { // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: Offset = 0x28; if (getTargetMachine().getCodeModel() == CodeModel::Kernel) AddressSpace = 256; else AddressSpace = 257; } else { // %gs:0x14 on i386 Offset = 0x14; AddressSpace = 256; } return true; } Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { if (!Subtarget->isTargetAndroid()) return TargetLowering::getSafeStackPointerLocation(IRB); // Android provides a fixed TLS slot for the SafeStack pointer. See the // definition of TLS_SLOT_SAFESTACK in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h unsigned AddressSpace, Offset; if (Subtarget->is64Bit()) { // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: Offset = 0x48; if (getTargetMachine().getCodeModel() == CodeModel::Kernel) AddressSpace = 256; else AddressSpace = 257; } else { // %gs:0x24 on i386 Offset = 0x24; AddressSpace = 256; } return ConstantExpr::getIntToPtr( ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); } bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { assert(SrcAS != DestAS && "Expected different address spaces!"); return SrcAS < 256 && DestAS < 256; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// #include "X86GenCallingConv.inc" bool X86TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_X86); } const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; return ScratchRegs; } SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); if (CallConv == CallingConv::X86_INTR && !Outs.empty()) report_fatal_error("X86 interrupts may not return any value"); SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, MVT::i16)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue ValToCopy = OutVals[i]; EVT ValVT = ValToCopy.getValueType(); // Promote values to the appropriate types. if (VA.getLocInfo() == CCValAssign::SExt) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::ZExt) ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::AExt) { if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); } else if (VA.getLocInfo() == CCValAssign::BCvt) ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); assert(VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."); // If this is x86-64, and we disabled SSE, we can't return FP values, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } // Likewise we can't return F64 values with SSE1 only. gcc does so, but // llvm-gcc has never done it right and no one has noticed, so this // should be OK for now. if (ValVT == MVT::f64 && (Subtarget->is64Bit() && !Subtarget->hasSSE2())) report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) { // If this is a copy from an xmm register to ST(0), use an FPExtend to // change the value to the FP stack register class. if (isScalarFPTypeInSSEReg(VA.getValVT())) ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); RetOps.push_back(ValToCopy); // Don't emit a copytoreg. continue; } // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); } } } Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // All x86 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. // // Checking Function.hasStructRetAttr() here is insufficient because the IR // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is // false, then an sret argument may be implicitly inserted in the SelDAG. In // either case FuncInfo->setSRetReturnReg() will have been called. if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy(MF.getDataLayout())); unsigned RetValReg = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ? X86::RAX : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); } const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (X86::GR64RegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i64)); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); X86ISD::NodeType opcode = X86ISD::RET_FLAG; if (CallConv == CallingConv::X86_INTR) opcode = X86ISD::IRET; return DAG.getNode(opcode, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != X86ISD::RET_FLAG) return false; // If we are returning more than one value, we can definitely // not make a tail call see PR19530 if (UI->getNumOperands() > 4) return false; if (UI->getNumOperands() == 4 && UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) return false; HasRet = true; } if (!HasRet) return false; Chain = TCChain; return true; } EVT X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT; // TODO: Is this also valid on 32-bit? if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND) ReturnMVT = MVT::i8; else ReturnMVT = MVT::i32; EVT MinVT = getRegisterType(Context, ReturnMVT); return VT.bitsLT(MinVT) ? MinVT : VT; } /// Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// SDValue X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // Assign locations to each value returned by this call. SmallVector RVLocs; bool Is64Bit = Subtarget->is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getLocVT(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. bool RoundAfterCopy = false; if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && isScalarFPTypeInSSEReg(VA.getValVT())) { CopyVT = MVT::f80; RoundAfterCopy = (CopyVT != VA.getLocVT()); } Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag).getValue(1); SDValue Val = Chain.getValue(0); if (RoundAfterCopy) Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, // This truncation won't change the value. DAG.getIntPtrConstant(1, dl)); if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1) Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); InFlag = Chain.getValue(2); InVals.push_back(Val); } return Chain; } //===----------------------------------------------------------------------===// // C & StdCall & Fast Calling Convention implementation //===----------------------------------------------------------------------===// // StdCall calling convention seems to be standard for many Windows' API // routines and around. It differs from C calling convention just a little: // callee should clean up the stack, not caller. Symbols should be also // decorated in some fancy way :) It doesn't support any vector arguments. // For info on fast calling convention see Fast Calling Convention (tail call) // implementation LowerX86_32FastCCCallTo. /// CallIsStructReturn - Determines whether a call uses struct return /// semantics. enum StructReturnType { NotStructReturn, RegStructReturn, StackStructReturn }; static StructReturnType callIsStructReturn(const SmallVectorImpl &Outs, bool IsMCU) { if (Outs.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Outs[0].Flags; if (!Flags.isSRet()) return NotStructReturn; if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } /// Determines whether a function uses struct return semantics. static StructReturnType argsAreStructReturn(const SmallVectorImpl &Ins, bool IsMCU) { if (Ins.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Ins[0].Flags; if (!Flags.isSRet()) return NotStructReturn; if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } /// Make a copy of an aggregate at address specified by "Src" to address /// "Dst" with size and alignment information specified by the specific /// parameter attribute. The copy will be passed as a byval function parameter. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } /// Return true if the calling convention is one that we can guarantee TCO for. static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::HiPE || CC == CallingConv::HHVM); } /// Return true if we might ever do TCO for calls with this calling convention. static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { // C calling conventions: case CallingConv::C: case CallingConv::X86_64_Win64: case CallingConv::X86_64_SysV: // Callee pop conventions: case CallingConv::X86_ThisCall: case CallingConv::X86_StdCall: case CallingConv::X86_VectorCall: case CallingConv::X86_FastCall: return true; default: return canGuaranteeTCO(CC); } } /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && canGuaranteeTCO(CC); } bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; CallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); if (!mayTailCallThisCC(CalleeCC)) return false; return true; } SDValue X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; bool AlwaysUseMutable = shouldGuaranteeTCO( CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; // If value is passed by pointer we have address passed instead of the value // itself. bool ExtendedInMem = VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1; if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) ValVT = VA.getLocVT(); else ValVT = VA.getValVT(); // Calculate SP offset of interrupt parameter, re-arrange the slot normally // taken by a return address. int Offset = 0; if (CallConv == CallingConv::X86_INTR) { const X86Subtarget& Subtarget = static_cast(DAG.getSubtarget()); // X86 interrupts may take one or two arguments. // On the stack there will be no return address as in regular call. // Offset of last argument need to be set to -4/-8 bytes. // Where offset of the first argument out of two, should be set to 0 bytes. Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1); } // FIXME: For now, all byval parameter objects are marked mutable. This can be // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { unsigned Bytes = Flags.getByValSize(); if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); // Adjust SP offset of interrupt parameter. if (CallConv == CallingConv::X86_INTR) { MFI->setObjectOffset(FI, Offset); } return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); // Adjust SP offset of interrupt parameter. if (CallConv == CallingConv::X86_INTR) { MFI->setObjectOffset(FI, Offset); } SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, false, false, 0); return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; } } // FIXME: Get this from tablegen. static ArrayRef get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget *Subtarget) { assert(Subtarget->is64Bit()); if (Subtarget->isCallingConvWin64(CallConv)) { static const MCPhysReg GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); } static const MCPhysReg GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); } // FIXME: Get this from tablegen. static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget *Subtarget) { assert(Subtarget->is64Bit()); if (Subtarget->isCallingConvWin64(CallConv)) { // The XMM registers which might contain var arg parameters are shadowed // in their paired GPR. So we only need to save the GPR to their home // slots. // TODO: __vectorcall will change this. return None; } const Function *Fn = MF.getFunction(); bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat); bool isSoftFloat = Subtarget->useSoftFloat(); assert(!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so there are no XMM argument // registers. return None; static const MCPhysReg XMMArgRegs64Bit[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); } SDValue X86TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && Subtarget->isTargetCygMing() && Fn->getName() == "main") FuncInfo->setForceFramePointer(true); MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); if (CallConv == CallingConv::X86_INTR) { bool isLegal = Ins.size() == 1 || (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) || (!Is64Bit && Ins[1].VT == MVT::i32))); if (!isLegal) report_fatal_error("X86 interrupts may take one or two arguments"); } // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 if (IsWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeFormalArguments(Ins, CC_X86); unsigned LastVal = ~0U; SDValue ArgValue; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // TODO: If an arg is passed in two places (e.g. reg and stack), skip later // places. assert(VA.getValNo() != LastVal && "Don't support value assigned to multiple locs yet"); (void)LastVal; LastVal = VA.getValNo(); if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); const TargetRegisterClass *RC; if (RegVT == MVT::i32) RC = &X86::GR32RegClass; else if (Is64Bit && RegVT == MVT::i64) RC = &X86::GR64RegClass; else if (RegVT == MVT::f32) RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; else if (RegVT == MVT::f128) RC = &X86::FR128RegClass; else if (RegVT.is512BitVector()) RC = &X86::VR512RegClass; else if (RegVT.is256BitVector()) RC = &X86::VR256RegClass; else if (RegVT.is128BitVector()) RC = &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; else if (RegVT == MVT::i1) RC = &X86::VK1RegClass; else if (RegVT == MVT::v8i1) RC = &X86::VK8RegClass; else if (RegVT == MVT::v16i1) RC = &X86::VK16RegClass; else if (RegVT == MVT::v32i1) RC = &X86::VK32RegClass; else if (RegVT == MVT::v64i1) RC = &X86::VK64RegClass; else llvm_unreachable("Unknown argument type!"); unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } } else { assert(VA.isMemLoc()); ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i); } // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo(), false, false, false, 0); InVals.push_back(ArgValue); } for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { // All x86 ABIs require that for returning structs by value we copy the // sret argument into %rax/%eax (depending on ABI) for the return. Save // the argument into a virtual register so that we can access it from the // return points. if (Ins[i].Flags.isSRet()) { unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { MVT PtrTy = getPointerTy(DAG.getDataLayout()); Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); break; } } unsigned StackSize = CCInfo.getNextStackOffset(); // Align stack specially for tail calls. if (shouldGuaranteeTCO(CallConv, MF.getTarget().Options.GuaranteedTailCallOpt)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. We // can skip this if there are no va_start calls. if (MFI->hasVAStart() && (Is64Bit || (CallConv != CallingConv::X86_FastCall && CallConv != CallingConv::X86_ThisCall))) { FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(1, StackSize, true)); } // Figure out if XMM registers are in use. assert(!(Subtarget->useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"); // 64-bit calling conventions support varargs and register parameters, so we // have to do extra work to spill them in the prologue. if (Is64Bit && isVarArg && MFI->hasVAStart()) { // Find the first unallocated argument registers. ArrayRef ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); ArrayRef ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget); unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); // Gather all the live in physical registers. SmallVector LiveGPRs; SmallVector LiveXMMRegs; SDValue ALVal; for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass); LiveGPRs.push_back( DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64)); } if (!ArgXMMs.empty()) { unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8); for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) { unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass); LiveXMMRegs.push_back( DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32)); } } if (IsWin64) { // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; FuncInfo->setRegSaveFrameIndex( MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); // Fixup to set vararg frame on shadow area (4 x i64). if (NumIntRegs < 4) FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); } else { // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so // they may be loaded by deferencing the result of va_next. FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject( ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false)); } // Store the integer parameter registers. SmallVector MemOps; SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy(DAG.getDataLayout())); unsigned Offset = FuncInfo->getVarArgsGPOffset(); for (SDValue Val : LiveGPRs) { SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), RSFIN, DAG.getIntPtrConstant(Offset, dl)); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), Offset), false, false, 0); MemOps.push_back(Store); Offset += 8; } if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) { // Now store the XMM (fp + vector) parameter registers. SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); SaveXMMOps.push_back(DAG.getIntPtrConstant( FuncInfo->getRegSaveFrameIndex(), dl)); SaveXMMOps.push_back(DAG.getIntPtrConstant( FuncInfo->getVarArgsFPOffset(), dl)); SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), LiveXMMRegs.end()); MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, MVT::Other, SaveXMMOps)); } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); } if (isVarArg && MFI->hasMustTailInVarArgFunc()) { // Find the largest legal vector type. MVT VecVT = MVT::Other; // FIXME: Only some x86_32 calling conventions support AVX512. if (Subtarget->hasAVX512() && (Is64Bit || (CallConv == CallingConv::X86_VectorCall || CallConv == CallingConv::Intel_OCL_BI))) VecVT = MVT::v16f32; else if (Subtarget->hasAVX()) VecVT = MVT::v8f32; else if (Subtarget->hasSSE2()) VecVT = MVT::v4f32; // We forward some GPRs and some vector types. SmallVector RegParmTypes; MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32; RegParmTypes.push_back(IntVT); if (VecVT != MVT::Other) RegParmTypes.push_back(VecVT); // Compute the set of forwarded registers. The rest are scratch. SmallVectorImpl &Forwards = FuncInfo->getForwardedMustTailRegParms(); CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); // Conservatively forward AL on x86_64, since it might be used for varargs. if (Is64Bit && !CCInfo.isAllocated(X86::AL)) { unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass); Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); } // Copy all forwards from physical to virtual registers. for (ForwardedRegister &F : Forwards) { // FIXME: Can we use a less constrained schedule? SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT)); Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal); } } // Some CCs need callee pop. if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { // X86 interrupts must pop the error code if present FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4); } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } if (!Is64Bit) { // RegSaveFrameIndex is X86-64 only. FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); if (CallConv == CallingConv::X86_FastCall || CallConv == CallingConv::X86_ThisCall) // fastcc functions can't have varargs. FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } FuncInfo->setArgumentStackSize(StackSize); if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); if (Personality == EHPersonality::CoreCLR) { assert(Is64Bit); // TODO: Add a mechanism to frame lowering that will allow us to indicate // that we'd prefer this slot be allocated towards the bottom of the frame // (i.e. near the stack pointer after allocating the frame). Every // funclet needs a copy of this slot in its (mostly empty) frame, and the // offset from the bottom of this and each funclet's frame must be the // same, so the size of funclets' (mostly empty) frames is dictated by // how far this slot is from the bottom (since they allocate just enough // space to accomodate holding this slot at the correct offset). int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); EHInfo->PSPSymFrameIdx = PSPSymFI; } } return Chain; } SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); return DAG.getStore( Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), false, false, 0); } /// Emit a load of return address if tail call /// optimization is performed and it is required. SDValue X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, SDLoc dl) const { // Adjust the Return address stack slot. EVT VT = getPointerTy(DAG.getDataLayout()); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(), false, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } /// Emit a store of the return address if tail call /// optimization is performed and it is required (FPDiff!=0). static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, SDLoc dl) { // Store the return address to the appropriate stack slot. if (!FPDiff) return Chain; // Calculate the new stack slot for the return address. int NewReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, false); SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), NewReturnAddrFI), false, false, 0); return Chain; } /// Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; Mask.push_back(NumElems); for (unsigned i = 1; i != NumElems; ++i) Mask.push_back(i); return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; CallingConv::ID CallConv = CLI.CallConv; bool &isTailCall = CLI.IsTailCall; bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU()); bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); if (Attr.getValueAsString() == "true") isTailCall = false; if (Subtarget->isPICStyleGOT() && !MF.getTarget().Options.GuaranteedTailCallOpt) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT // relocation, which forces early binding of the symbol. This breaks code // that require lazy function symbol resolution. Using musttail or // GuaranteedTailCallOpt will override this. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G || (!G->getGlobal()->hasLocalLinkage() && G->getGlobal()->hasDefaultVisibility())) isTailCall = false; } bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall(); if (IsMustTail) { // Force this to be a tail call. The verifier rules are enough to ensure // that we can lower this successfully without moving the return address // around. isTailCall = true; } else if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, MF.getFunction()->hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) IsSibcall = true; if (isTailCall) ++NumTailCalls; } assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 if (IsWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(Outs, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); if (IsSibcall) // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; else if (MF.getTarget().Options.GuaranteedTailCallOpt && canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; if (isTailCall && !IsSibcall && !IsMustTail) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); FPDiff = NumBytesCallerPushed - NumBytes; // Set the delta of movement of the returnaddr stackslot. // But only set if delta is greater than previous delta. if (FPDiff < X86Info->getTCReturnAddrDelta()) X86Info->setTCReturnAddrDelta(FPDiff); } unsigned NumBytesToPush = NumBytes; unsigned NumBytesToPop = NumBytes; // If we have an inalloca argument, all stack space has already been allocated // for us and be right at the top of the stack. We don't support multiple // arguments passed in memory when using inalloca. if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { NumBytesToPush = 0; if (!ArgLocs.back().isMemLoc()) report_fatal_error("cannot use inalloca attribute on a register " "parameter"); if (ArgLocs.back().getLocMemOffset() != 0) report_fatal_error("any parameter with the inalloca attribute must be " "the only memory argument"); } if (!IsSibcall) Chain = DAG.getCALLSEQ_START( Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl); SDValue RetAddrFrIdx; // Load return address for tail calls. if (isTailCall && FPDiff) Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, FPDiff, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization arguments are handle later. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { // Skip inalloca arguments, they have already been written. ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isInAlloca()) continue; CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[i]; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: if (Arg.getValueType().isVector() && Arg.getValueType().getVectorElementType() == MVT::i1) Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. Arg = DAG.getBitcast(MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: Arg = DAG.getBitcast(RegVT, Arg); break; case CCValAssign::Indirect: { // Store the argument. SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast(SpillSlot)->getIndex(); Chain = DAG.getStore( Chain, dl, Arg, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, false, 0); Arg = SpillSlot; break; } } if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); if (isVarArg && IsWin64) { // Win64 ABI requires argument XMM reg to be copied to the corresponding // shadow reg if callee is a varargs function. unsigned ShadowReg = 0; switch (VA.getLocReg()) { case X86::XMM0: ShadowReg = X86::RCX; break; case X86::XMM1: ShadowReg = X86::RDX; break; case X86::XMM2: ShadowReg = X86::R8; break; case X86::XMM3: ShadowReg = X86::R9; break; } if (ShadowReg) RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); } } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { RegsToPass.push_back(std::make_pair( unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy(DAG.getDataLayout())))); } else { // If we are tail calling and generating PIC/GOT style code load the // address of the callee into ECX. The value in ecx is used as target of // the tail jump. This is done to circumvent the ebx/callee-saved problem // for tail calls on PIC/GOT architectures. Normally we would just put the // address of GOT into ebx and then call target@PLT. But for tail calls // ebx would be restored (since ebx is callee saved) before jumping to the // target@PLT. // Note: The actual moving to ECX is done further down. GlobalAddressSDNode *G = dyn_cast(Callee); if (G && !G->getGlobal()->hasLocalLinkage() && G->getGlobal()->hasDefaultVisibility()) Callee = LowerGlobalAddress(Callee, DAG); else if (isa(Callee)) Callee = LowerExternalSymbol(Callee, DAG); } } if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in // the declaration) %al is used as hidden argument to specify the number // of SSE registers used. The contents of %al do not need to match exactly // the number of registers, but must be an ubound on the number of SSE // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. static const MCPhysReg XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); assert((Subtarget->hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); RegsToPass.push_back(std::make_pair(unsigned(X86::AL), DAG.getConstant(NumXMMRegs, dl, MVT::i8))); } if (isVarArg && IsMustTail) { const auto &Forwards = X86Info->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); } } // For tail calls lower the arguments to the 'real' stack slots. Sibcalls // don't need this because the eligibility check rejects calls that require // shuffling arguments passed in memory. if (!IsSibcall && isTailCall) { // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and // the alias isn't otherwise explicit. This is slightly more conservative // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); SmallVector MemOpChains2; SDValue FIN; int FI = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) continue; assert(VA.isMemLoc()); SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Skip inalloca arguments. They don't require any work. if (Flags.isInAlloca()) continue; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); if (Flags.isByVal()) { // Copy relative to framepointer. SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy(DAG.getDataLayout())); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, Source); MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, ArgChain, Flags, DAG, dl)); } else { // Store relative to framepointer. MemOpChains2.push_back(DAG.getStore( ArgChain, dl, Arg, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, false, 0)); } } if (!MemOpChains2.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, getPointerTy(DAG.getDataLayout()), RegInfo->getSlotSize(), FPDiff, dl); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into registers. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (DAG.getTarget().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls // through a register, since the call instruction's 32-bit // pc-relative offset may not be large enough to hold the whole // address. } else if (Callee->getOpcode() == ISD::GlobalAddress) { // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack // it. GlobalAddressSDNode* G = cast(Callee); // We should use extra load for direct calls to dllimported functions in // non-JIT mode. const GlobalValue *GV = G->getGlobal(); if (!GV->hasDLLImportStorageClass()) { unsigned char OpFlags = 0; bool ExtraLoad = false; unsigned WrapperKind = ISD::DELETED_NODE; // On ELF targets, in both X86-64 and X86-32 mode, direct calls to // external symbols most go through the PLT in PIC mode. If the symbol // has hidden or protected visibility, or if it is static or local, then // we don't need to use the PLT - we can directly call it. if (Subtarget->isTargetELF() && DAG.getTarget().getRelocationModel() == Reloc::PIC_ && GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && !GV->isStrongDefinitionForLinker() && (!Subtarget->getTargetTriple().isMacOSX() || Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa(GV) && cast(GV)->hasFnAttribute(Attribute::NonLazyBind)) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). OpFlags = X86II::MO_GOTPCREL; WrapperKind = X86ISD::WrapperRIP; ExtraLoad = true; } Callee = DAG.getTargetGlobalAddress( GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags); // Add a wrapper if needed. if (WrapperKind != ISD::DELETED_NODE) Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(DAG.getDataLayout()), Callee); // Add extra indirection if needed. if (ExtraLoad) Callee = DAG.getLoad( getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to // external symbols should go through the PLT. if (Subtarget->isTargetELF() && DAG.getTarget().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && (!Subtarget->getTargetTriple().isMacOSX() || Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; } Callee = DAG.getTargetExternalSymbol( S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags); } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) { // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector Ops; if (!IsSibcall && isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } Ops.push_back(Chain); Ops.push_back(Callee); if (isTailCall) Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based // personality, assume the function clobbers all registers. If an exception // is thrown, the runtime will not restore CSRs. // FIXME: Model this more precisely so that we can register allocate across // the normal edge and spill and fill across the exceptional edge. if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) { const Function *CallerFn = MF.getFunction(); EHPersonality Pers = CallerFn->hasPersonalityFn() ? classifyEHPersonality(CallerFn->getPersonalityFn()) : EHPersonality::Unknown; if (isFuncletEHPersonality(Pers)) Mask = RegInfo->getNoPreservedMask(); } Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); if (isTailCall) { // We used to do: //// If this is the first return lowered for this function, add the regs //// to the liveout set for the function. // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); } Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, DAG.getTarget().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPop = NumBytes; // Callee pops everything else if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. // For MSVC Win32 targets, the caller pops the hidden struct pointer. NumBytesForCalleeToPop = 4; else NumBytesForCalleeToPop = 0; // Callee pops nothing. // Returns a flag for retval copy to use. if (!IsSibcall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true), DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); } //===----------------------------------------------------------------------===// // Fast Calling Convention (tail call) implementation //===----------------------------------------------------------------------===// // Like std call, callee cleans arguments, convention except that ECX is // reserved for storing the tail called function address. Only 2 registers are // free for argument passing (inreg). Tail call optimization is performed // provided: // * tailcallopt is enabled // * caller/callee are fastcc // On X86_64 architecture with GOT-style position independent code only local // (within module) calls are supported at the moment. // To keep the stack aligned according to platform abi the function // GetAlignedArgumentStackSize ensures that argument delta is always multiples // of stack alignment. (Dynamic linkers need this - darwin's dyld for example) // If a tail called function callee has more arguments than the caller the // caller needs to make sure that there is room to move the RETADDR to. This is // achieved by reserving an area the size of the argument delta right after the // original RETADDR, but before the saved framepointer or the spilled registers // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) // stack layout: // arg1 // arg2 // RETADDR // [ new RETADDR // move area ] // (possible EBP) // ESI // EDI // local1 .. /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align /// requirement. unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG) const { const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; unsigned SlotSize = RegInfo->getSlotSize(); if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { // Number smaller than 12 so just add the difference. Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); } else { // Mask out lower bits, add stackalignment once plus the 12 bytes. Offset = ((~AlignMask) & Offset) + StackAlignment + (StackAlignment-SlotSize); } return Offset; } /// Return true if the given stack call argument is already available in the /// same position (relatively) of the caller's incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII) { unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) return false; if (!Flags.isByVal()) { if (!TII->isLoadFromStackSlot(Def, FI)) return false; } else { unsigned Opcode = Def->getOpcode(); if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); Bytes = Flags.getByValSize(); } else return false; } } else if (LoadSDNode *Ld = dyn_cast(Arg)) { if (Flags.isByVal()) // ByVal argument is passed in as a pointer but it's now being // dereferenced. e.g. // define @foo(%struct.X* %A) { // tail call @bar(%struct.X* byval %A) // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast(Ptr); if (!FINode) return false; FI = FINode->getIndex(); } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { FrameIndexSDNode *FINode = cast(Arg); FI = FINode->getIndex(); Bytes = Flags.getByValSize(); } else return false; assert(FI != INT_MAX); if (!MFI->isFixedObjectIndex(FI)) return false; return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. bool X86TargetLowering::IsEligibleForTailCallOptimization( SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { if (!mayTailCallThisCC(CalleeCC)) return false; // If -tailcallopt is specified, make fastcc functions tail-callable. MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); // If the function return type is x86_fp80 and the callee return type is not, // then the FP_EXTEND of the call result is not a nop. It's not safe to // perform a tailcall optimization here. if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) return false; CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC); // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this // space. if (IsCalleeWin64 != IsCallerWin64) return false; if (DAG.getTarget().Options.GuaranteedTailCallOpt) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; } // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); if (RegInfo->needsStackRealignment(MF)) return false; // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) return false; // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. if (isVarArg && !Outs.empty()) { // Optimizing for varargs on Win64 is unlikely to be safe without // additional testing. if (IsCalleeWin64 || IsCallerWin64) return false; SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_X86); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) if (!ArgLocs[i].isRegLoc()) return false; } // If the call result is in ST0 / ST1, it needs to be popped off the x87 // stack. Therefore, if it's not used by the call it is not safe to optimize // this into a sibcall. bool Unused = false; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (!Ins[i].Used) { Unused = true; break; } } if (Unused) { SmallVector RVLocs; CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) return false; } } // If the calling conventions do not match, then we'd better make sure the // results are returned in the same way as what the caller expects. if (!CCMatch) { SmallVector RVLocs1; CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, *DAG.getContext()); CCInfo1.AnalyzeCallResult(Ins, RetCC_X86); SmallVector RVLocs2; CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, *DAG.getContext()); CCInfo2.AnalyzeCallResult(Ins, RetCC_X86); if (RVLocs1.size() != RVLocs2.size()) return false; for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) return false; if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) return false; if (RVLocs1[i].isRegLoc()) { if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) return false; } else { if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) return false; } } } unsigned StackArgsSize = 0; // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 if (IsCalleeWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(Outs, CC_X86); StackArgsSize = CCInfo.getNextStackOffset(); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const X86InstrInfo *TII = Subtarget->getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (!VA.isRegLoc()) { if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, TII)) return false; } } } // If the tailcall address may be in a register, then make sure it's // possible to register allocate for it. In 32-bit, the call address can // only target EAX, EDX, or ECX since the tail call must be scheduled after // callee-saved registers are restored. These happen to be the same // registers used to pass 'inreg' arguments so watch out for those. if (!Subtarget->is64Bit() && ((!isa(Callee) && !isa(Callee)) || DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { unsigned NumInRegs = 0; // In PIC we need an extra register to formulate the address computation // for the callee. unsigned MaxInRegs = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) continue; unsigned Reg = VA.getLocReg(); switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: if (++NumInRegs == MaxInRegs) return false; break; } } } } bool CalleeWillPop = X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt); if (unsigned BytesToPop = MF.getInfo()->getBytesToPopOnReturn()) { // If we have bytes to pop, the callee must pop them. bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; if (!CalleePopMatches) return false; } else if (CalleeWillPop && StackArgsSize > 0) { // If we don't have bytes to pop, make sure the callee doesn't pop any. return false; } return true; } FastISel * X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { return X86::createFastISel(funcInfo, libInfo); } //===----------------------------------------------------------------------===// // Other Lowering Hooks //===----------------------------------------------------------------------===// static bool MayFoldLoad(SDValue Op) { return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); } static bool MayFoldIntoStore(SDValue Op) { return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); } static bool isTargetShuffle(unsigned Opcode) { switch(Opcode) { default: return false; case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFP: case X86ISD::INSERTPS: case X86ISD::PALIGNR: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: case X86ISD::MOVLPS: case X86ISD::MOVLPD: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::UNPCKL: case X86ISD::UNPCKH: case X86ISD::VPERMILPI: case X86ISD::VPERM2X128: case X86ISD::VPERMI: case X86ISD::VPERMV: case X86ISD::VPERMV3: return true; } } static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::VPERMILPI: case X86ISD::VPERMI: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, dl, MVT::i8)); } } static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: case X86ISD::MOVLPS: case X86ISD::MOVLPD: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::UNPCKL: case X86ISD::UNPCKH: return DAG.getNode(Opc, dl, VT, V1, V2); } } SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); int ReturnAddrIndex = FuncInfo->getRAIndex(); if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -(int64_t)SlotSize, false); FuncInfo->setRAIndex(ReturnAddrIndex); } return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); } bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement) { // Offset should fit into 32 bit immediate field. if (!isInt<32>(Offset)) return false; // If we don't have a symbolic displacement - we don't have any extra // restrictions. if (!hasSymbolicDisplacement) return true; // FIXME: Some tweaks might be needed for medium code model. if (M != CodeModel::Small && M != CodeModel::Kernel) return false; // For small code model we assume that latest object is 16MB before end of 31 // bits boundary. We may also accept pretty large negative constants knowing // that all objects are in the positive half of address space. if (M == CodeModel::Small && Offset < 16*1024*1024) return true; // For kernel code model we know that all object resist in the negative half // of 32bits address space. We may not accept negative offsets, since they may // be just off and we may accept pretty large positive ones. if (M == CodeModel::Kernel && Offset >= 0) return true; return false; } /// Determines whether the callee is required to pop its own arguments. /// Callee pop is necessary to support tail calls. bool X86::isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { // If GuaranteeTCO is true, we force some calls to be callee pop so that we // can guarantee TCO. if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) return true; switch (CallingConv) { default: return false; case CallingConv::X86_StdCall: case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::X86_VectorCall: return !is64Bit; } } /// \brief Return true if the condition is an unsigned comparison operation. static bool isX86CCUnsigned(unsigned X86CC) { switch (X86CC) { default: llvm_unreachable("Invalid integer condition!"); case X86::COND_E: return true; case X86::COND_G: return false; case X86::COND_GE: return false; case X86::COND_L: return false; case X86::COND_LE: return false; case X86::COND_NE: return true; case X86::COND_B: return true; case X86::COND_A: return true; case X86::COND_BE: return true; case X86::COND_AE: return true; } } static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { switch (SetCCOpcode) { default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: return X86::COND_E; case ISD::SETGT: return X86::COND_G; case ISD::SETGE: return X86::COND_GE; case ISD::SETLT: return X86::COND_L; case ISD::SETLE: return X86::COND_LE; case ISD::SETNE: return X86::COND_NE; case ISD::SETULT: return X86::COND_B; case ISD::SETUGT: return X86::COND_A; case ISD::SETULE: return X86::COND_BE; case ISD::SETUGE: return X86::COND_AE; } } /// Do a one-to-one translation of a ISD::CondCode to the X86-specific /// condition code, returning the condition code and the LHS/RHS of the /// comparison to make. static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { if (!isFP) { if (ConstantSDNode *RHSC = dyn_cast(RHS)) { if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { // X > -1 -> X == 0, jump !sign. RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_NS; } if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { // X < 0 -> X == 0, jump on sign. return X86::COND_S; } if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { // X < 1 -> X <= 0 RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_LE; } } return TranslateIntegerX86CC(SetCCOpcode); } // First determine if it is required or is profitable to flip the operands. // If LHS is a foldable load, but RHS is not, flip the condition. if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) { SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); std::swap(LHS, RHS); } switch (SetCCOpcode) { default: break; case ISD::SETOLT: case ISD::SETOLE: case ISD::SETUGT: case ISD::SETUGE: std::swap(LHS, RHS); break; } // On a floating point condition, the flags are set as follows: // ZF PF CF op // 0 | 0 | 0 | X > Y // 0 | 0 | 1 | X < Y // 1 | 0 | 0 | X == Y // 1 | 1 | 1 | unordered switch (SetCCOpcode) { default: llvm_unreachable("Condcode should be pre-legalized away"); case ISD::SETUEQ: case ISD::SETEQ: return X86::COND_E; case ISD::SETOLT: // flipped case ISD::SETOGT: case ISD::SETGT: return X86::COND_A; case ISD::SETOLE: // flipped case ISD::SETOGE: case ISD::SETGE: return X86::COND_AE; case ISD::SETUGT: // flipped case ISD::SETULT: case ISD::SETLT: return X86::COND_B; case ISD::SETUGE: // flipped case ISD::SETULE: case ISD::SETLE: return X86::COND_BE; case ISD::SETONE: case ISD::SETNE: return X86::COND_NE; case ISD::SETUO: return X86::COND_P; case ISD::SETO: return X86::COND_NP; case ISD::SETOEQ: case ISD::SETUNE: return X86::COND_INVALID; } } /// Is there a floating point cmov for the specific X86 condition code? /// Current x86 isa includes the following FP cmov instructions: /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. static bool hasFPCMov(unsigned X86CC) { switch (X86CC) { default: return false; case X86::COND_B: case X86::COND_BE: case X86::COND_E: case X86::COND_P: case X86::COND_A: case X86::COND_AE: case X86::COND_NE: case X86::COND_NP: return true; } } bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); if (!IntrData) return false; switch (IntrData->Type) { case LOADA: case LOADU: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = (IntrData->Type == LOADA ? Info.memVT.getSizeInBits()/8 : 1); Info.vol = false; Info.readMem = true; Info.writeMem = false; return true; } default: break; } return false; } /// Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) return true; } return false; } bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const { // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF // relocation target a movq or addq instruction: don't let the load shrink. SDValue BasePtr = cast(Load)->getBasePtr(); if (BasePtr.getOpcode() == X86ISD::WrapperRIP) if (const auto *GA = dyn_cast(BasePtr.getOperand(0))) return GA->getTargetFlags() != X86II::MO_GOTTPOFF; return true; } /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0 || BitSize > 64) return false; return true; } bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; return (Index == 0 || Index == ResVT.getVectorNumElements()); } bool X86TargetLowering::isCheapToSpeculateCttz() const { // Speculate cttz only if we can directly use TZCNT. return Subtarget->hasBMI(); } bool X86TargetLowering::isCheapToSpeculateCtlz() const { // Speculate ctlz only if we can directly use LZCNT. return Subtarget->hasLZCNT(); } /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size is undef. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { for (unsigned i = Pos, e = Pos + Size; i != e; ++i) if (0 <= Mask[i]) return false; return true; } /// Return true if Val is undef or if its value falls within the /// specified range (L, H]. static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } /// Val is either less than zero (undef) or equal to the specified value. static bool isUndefOrEqual(int Val, int CmpVal) { return (Val < 0 || Val == CmpVal); } /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (Low, Low+Size]. or is undef. static bool isSequentialOrUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size, int Low) { for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) if (!isUndefOrEqual(Mask[i], Low)) return false; return true; } /// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector /// extract that is suitable for instruction that extract 128 or 256 bit vectors static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"); if (!isa(N->getOperand(1).getNode())) return false; // The index should be aligned on a vecWidth-bit boundary. uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % vecWidth == 0; return Result; } /// Return true if the specified INSERT_SUBVECTOR /// operand specifies a subvector insert that is suitable for input to /// insertion of 128 or 256-bit subvectors static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"); if (!isa(N->getOperand(2).getNode())) return false; // The index should be aligned on a vecWidth-bit boundary. uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % vecWidth == 0; return Result; } bool X86::isVINSERT128Index(SDNode *N) { return isVINSERTIndex(N, 128); } bool X86::isVINSERT256Index(SDNode *N) { return isVINSERTIndex(N, 256); } bool X86::isVEXTRACT128Index(SDNode *N) { return isVEXTRACTIndex(N, 128); } bool X86::isVEXTRACT256Index(SDNode *N) { return isVEXTRACTIndex(N, 256); } static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); assert(isa(N->getOperand(1).getNode()) && "Illegal extract subvector for VEXTRACT"); uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); MVT VecVT = N->getOperand(0).getSimpleValueType(); MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; } static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); assert(isa(N->getOperand(2).getNode()) && "Illegal insert subvector for VINSERT"); uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); MVT VecVT = N->getSimpleValueType(0); MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; } /// Return the appropriate immediate to extract the specified /// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions. unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) { return getExtractVEXTRACTImmediate(N, 128); } /// Return the appropriate immediate to extract the specified /// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions. unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) { return getExtractVEXTRACTImmediate(N, 256); } /// Return the appropriate immediate to insert at the specified /// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions. unsigned X86::getInsertVINSERT128Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 128); } /// Return the appropriate immediate to insert at the specified /// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions. unsigned X86::getInsertVINSERT256Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 256); } /// Returns true if Elt is a constant zero or a floating point constant +0.0. bool X86::isZeroNode(SDValue Elt) { return isNullConstant(Elt) || isNullFPConstant(Elt); } // Build a vector of constants // Use an UNDEF node if MaskElt == -1. // Spilt 64-bit constants in the 32-bit mode. static SDValue getConstVector(ArrayRef Values, MVT VT, SelectionDAG &DAG, SDLoc dl, bool IsMask = false) { SmallVector Ops; bool Split = false; MVT ConstVecVT = VT; unsigned NumElts = VT.getVectorNumElements(); bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); Split = true; } MVT EltVT = ConstVecVT.getVectorElementType(); for (unsigned i = 0; i < NumElts; ++i) { bool IsUndef = Values[i] < 0 && IsMask; SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(Values[i], dl, EltVT); Ops.push_back(OpNode); if (Split) Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(0, dl, EltVT)); } SDValue ConstsNode = DAG.getNode(ISD::BUILD_VECTOR, dl, ConstVecVT, Ops); if (Split) ConstsNode = DAG.getBitcast(VT, ConstsNode); return ConstsNode; } /// Returns a vector of specified type with all zero elements. static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.is128BitVector()) { // SSE if (Subtarget->hasSSE2()) { // SSE2 SDValue Cst = DAG.getConstant(0, dl, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else { // SSE1 SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } } else if (VT.is256BitVector()) { // AVX if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // 256-bit logic and arithmetic instructions in AVX are all // floating-point, no support for integer ops. Emit fp zeroed vectors. SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops); } } else if (VT.is512BitVector()) { // AVX-512 SDValue Cst = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8) && "Unexpected vector type"); SDValue Cst = DAG.getConstant(0, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } else llvm_unreachable("Unexpected vector type"); return DAG.getBitcast(VT, Vec); } static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth) { assert((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"); EVT VT = Vec.getValueType(); EVT ElVT = VT.getVectorElementType(); unsigned Factor = VT.getSizeInBits()/vectorWidth; EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, VT.getVectorNumElements()/Factor); // Extract from UNDEF is UNDEF. if (Vec.getOpcode() == ISD::UNDEF) return DAG.getUNDEF(ResultVT); // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. IdxVal &= ~(ElemsPerChunk - 1); // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); } /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 /// instructions or a simple subregister reference. Idx is an index in the /// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes /// lowering EXTRACT_VECTOR_ELT operations easier. static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl) { assert((Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); return ExtractSubVector(Vec, IdxVal, DAG, dl, 128); } /// Generate a DAG to grab 256-bits from a 512-bit vector. static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl) { assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); return ExtractSubVector(Vec, IdxVal, DAG, dl, 256); } static SDValue InsertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth) { assert((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"); // Inserting UNDEF is Result if (Vec.getOpcode() == ISD::UNDEF) return Result; EVT VT = Vec.getValueType(); EVT ElVT = VT.getVectorElementType(); EVT ResultVT = Result.getValueType(); // Insert the relevant vectorWidth bits. unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. IdxVal &= ~(ElemsPerChunk - 1); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } /// Generate a DAG to put 128-bits into a vector > 128 bits. This /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a /// simple superregister reference. Idx is an index in the 128 bits /// we want. It need not be aligned to a 128-bit boundary. That makes /// lowering INSERT_VECTOR_ELT operations easier. static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl) { assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); // For insertion into the zero index (low half) of a 256-bit vector, it is // more efficient to generate a blend with immediate instead of an insert*128. // We are still creating an INSERT_SUBVECTOR below with an undef node to // extend the subvector to the size of the result vector. Make sure that // we are not recursing on that node by checking for undef here. if (IdxVal == 0 && Result.getValueType().is256BitVector() && Result.getOpcode() != ISD::UNDEF) { EVT ResultVT = Result.getValueType(); SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl); SDValue Undef = DAG.getUNDEF(ResultVT); SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef, Vec, ZeroIndex); // The blend instruction, and therefore its mask, depend on the data type. MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT(); if (ScalarType.isFloatingPoint()) { // Choose either vblendps (float) or vblendpd (double). unsigned ScalarSize = ScalarType.getSizeInBits(); assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type"); unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f; SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8); return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask); } const X86Subtarget &Subtarget = static_cast(DAG.getSubtarget()); // AVX2 is needed for 256-bit integer blend support. // Integers must be cast to 32-bit because there is only vpblendd; // vpblendw can't be used for this because it has a handicapped mask. // If we don't have AVX2, then cast to float. Using a wrong domain blend // is still more efficient than using the wrong domain vinsertf128 that // will be created by InsertSubVector(). MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8); Result = DAG.getBitcast(CastVT, Result); Vec256 = DAG.getBitcast(CastVT, Vec256); Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); return DAG.getBitcast(ResultVT, Vec256); } return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128); } static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl) { assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!"); return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } /// Insert i1-subvector to i1-vector. static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue SubVec = Op.getOperand(1); SDValue Idx = Op.getOperand(2); if (!isa(Idx)) return SDValue(); unsigned IdxVal = cast(Idx)->getZExtValue(); if (IdxVal == 0 && Vec.isUndef()) // the operation is legal return Op; MVT OpVT = Op.getSimpleValueType(); MVT SubVecVT = SubVec.getSimpleValueType(); unsigned NumElems = OpVT.getVectorNumElements(); unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); assert(IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"); // There are 3 possible cases: // 1. Subvector should be inserted in the lower part (IdxVal == 0) // 2. Subvector should be inserted in the upper part // (IdxVal + SubVecNumElems == NumElems) // 3. Subvector should be inserted in the middle (for example v2i1 // to v16i1, index 2) SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); SDValue Undef = DAG.getUNDEF(OpVT); SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx); if (Vec.isUndef()) return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec, DAG.getConstant(IdxVal, dl, MVT::i8)); if (ISD::isBuildVectorAllZeros(Vec.getNode())) { unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec, DAG.getConstant(ShiftLeft, dl, MVT::i8)); return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec, DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec; } if (IdxVal == 0) { // Zero lower bits of the Vec SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); // Merge them together return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec); } // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { // Zero upper bits of the Vec WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, DAG.getConstant(IdxVal, dl, MVT::i8)); SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec); } // Subvector should be inserted in the middle - use shuffle SmallVector Mask; for (unsigned i = 0; i < NumElems; ++i) Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ? i : i + NumElems); return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask); } /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 /// instructions. This is used because creating CONCAT_VECTOR nodes of /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower /// large BUILD_VECTORS. static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl) { SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); return Insert128BitVector(V, V2, NumElems/2, DAG, dl); } static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl) { SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); return Insert256BitVector(V, V2, NumElems/2, DAG, dl); } /// Returns a vector of specified type with all bits set. /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. /// Then bitcast to their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32); SDValue Vec; if (VT.is512BitVector()) { SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); } else if (VT.is256BitVector()) { if (Subtarget->hasInt256()) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // AVX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); } } else if (VT.is128BitVector()) { Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else llvm_unreachable("Unexpected vector type"); return DAG.getBitcast(VT, Vec); } /// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { Mask.push_back(i); Mask.push_back(i + NumElems); } return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } /// Returns a vector_shuffle node for an unpackh operation. static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) { Mask.push_back(i + Half); Mask.push_back(i + NumElems + Half); } return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } /// Return a vector_shuffle of the specified vector of zero or undef vector. /// This produces a shuffle where the low element of V2 is swizzled into the /// zero/undef vector, landing at element Idx. /// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool IsZero, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = V2.getSimpleValueType(); SDValue V1 = IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); SmallVector MaskVec; for (unsigned i = 0; i != NumElems; ++i) // If this is the insertion idx, put the low elt of V2 here. MaskVec.push_back(i == Idx ? NumElems : i); return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]); } /// Calculates the shuffle mask corresponding to the target-specific opcode. /// Returns true if the Mask could be calculated. Sets IsUnary to true if only /// uses one source. Note that this will set IsUnary for shuffles which use a /// single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; IsUnary = false; bool IsFakeUnary = false; switch(N->getOpcode()) { case X86ISD::BLENDI: ImmN = N->getOperand(N->getNumOperands()-1); DecodeBLENDMask(VT, cast(ImmN)->getZExtValue(), Mask); break; case X86ISD::SHUFP: ImmN = N->getOperand(N->getNumOperands()-1); DecodeSHUFPMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::INSERTPS: ImmN = N->getOperand(N->getNumOperands()-1); DecodeINSERTPSMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::UNPCKL: DecodeUNPCKLMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVHLPS: DecodeMOVHLPSMask(NumElems, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVLHPS: DecodeMOVLHPSMask(NumElems, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::PALIGNR: ImmN = N->getOperand(N->getNumOperands()-1); DecodePALIGNRMask(VT, cast(ImmN)->getZExtValue(), Mask); break; case X86ISD::PSHUFD: case X86ISD::VPERMILPI: ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFHW: ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFHWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFLW: ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFB: { IsUnary = true; SDValue MaskNode = N->getOperand(1); while (MaskNode->getOpcode() == ISD::BITCAST) MaskNode = MaskNode->getOperand(0); if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. MVT VT = MaskNode.getSimpleValueType(); assert(VT.isVector() && "Can't produce a non-vector with a build_vector!"); if (!VT.isInteger()) return false; int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8; SmallVector RawMask; for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) { SDValue Op = MaskNode->getOperand(i); if (Op->getOpcode() == ISD::UNDEF) { RawMask.push_back((uint64_t)SM_SentinelUndef); continue; } auto *CN = dyn_cast(Op.getNode()); if (!CN) return false; APInt MaskElement = CN->getAPIntValue(); // We now have to decode the element which could be any integer size and // extract each byte of it. for (int j = 0; j < NumBytesPerElement; ++j) { // Note that this is x86 and so always little endian: the low byte is // the first byte of the mask. RawMask.push_back(MaskElement.getLoBits(8).getZExtValue()); MaskElement = MaskElement.lshr(8); } } DecodePSHUFBMask(RawMask, Mask); break; } auto *MaskLoad = dyn_cast(MaskNode); if (!MaskLoad) return false; SDValue Ptr = MaskLoad->getBasePtr(); if (Ptr->getOpcode() == X86ISD::Wrapper || Ptr->getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr->getOperand(0); auto *MaskCP = dyn_cast(Ptr); if (!MaskCP || MaskCP->isMachineConstantPoolEntry()) return false; if (auto *C = dyn_cast(MaskCP->getConstVal())) { DecodePSHUFBMask(C, Mask); break; } return false; } case X86ISD::VPERMI: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERMMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::MOVSS: case X86ISD::MOVSD: DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask); break; case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVSLDUP: DecodeMOVSLDUPMask(VT, Mask); IsUnary = true; break; case X86ISD::MOVSHDUP: DecodeMOVSHDUPMask(VT, Mask); IsUnary = true; break; case X86ISD::MOVDDUP: DecodeMOVDDUPMask(VT, Mask); IsUnary = true; break; case X86ISD::MOVLHPD: case X86ISD::MOVLPD: case X86ISD::MOVLPS: // Not yet implemented return false; case X86ISD::VPERMV: { IsUnary = true; SDValue MaskNode = N->getOperand(0); while (MaskNode->getOpcode() == ISD::BITCAST) MaskNode = MaskNode->getOperand(0); unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()); SmallVector RawMask; if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. assert(MaskNode.getSimpleValueType().isInteger() && MaskNode.getSimpleValueType().getVectorNumElements() == VT.getVectorNumElements()); for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) { SDValue Op = MaskNode->getOperand(i); if (Op->getOpcode() == ISD::UNDEF) RawMask.push_back((uint64_t)SM_SentinelUndef); else if (isa(Op)) { APInt MaskElement = cast(Op)->getAPIntValue(); RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue()); } else return false; } DecodeVPERMVMask(RawMask, Mask); break; } if (MaskNode->getOpcode() == X86ISD::VBROADCAST) { unsigned NumEltsInMask = MaskNode->getNumOperands(); MaskNode = MaskNode->getOperand(0); if (auto *CN = dyn_cast(MaskNode)) { APInt MaskEltValue = CN->getAPIntValue(); for (unsigned i = 0; i < NumEltsInMask; ++i) RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue()); DecodeVPERMVMask(RawMask, Mask); break; } // It may be a scalar load } auto *MaskLoad = dyn_cast(MaskNode); if (!MaskLoad) return false; SDValue Ptr = MaskLoad->getBasePtr(); if (Ptr->getOpcode() == X86ISD::Wrapper || Ptr->getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr->getOperand(0); auto *MaskCP = dyn_cast(Ptr); if (!MaskCP || MaskCP->isMachineConstantPoolEntry()) return false; if (auto *C = dyn_cast(MaskCP->getConstVal())) { DecodeVPERMVMask(C, VT, Mask); break; } return false; } case X86ISD::VPERMV3: { IsUnary = false; SDValue MaskNode = N->getOperand(1); while (MaskNode->getOpcode() == ISD::BITCAST) MaskNode = MaskNode->getOperand(1); if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. assert(MaskNode.getSimpleValueType().isInteger() && MaskNode.getSimpleValueType().getVectorNumElements() == VT.getVectorNumElements()); SmallVector RawMask; unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()*2); for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) { SDValue Op = MaskNode->getOperand(i); if (Op->getOpcode() == ISD::UNDEF) RawMask.push_back((uint64_t)SM_SentinelUndef); else { auto *CN = dyn_cast(Op.getNode()); if (!CN) return false; APInt MaskElement = CN->getAPIntValue(); RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue()); } } DecodeVPERMV3Mask(RawMask, Mask); break; } auto *MaskLoad = dyn_cast(MaskNode); if (!MaskLoad) return false; SDValue Ptr = MaskLoad->getBasePtr(); if (Ptr->getOpcode() == X86ISD::Wrapper || Ptr->getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr->getOperand(0); auto *MaskCP = dyn_cast(Ptr); if (!MaskCP || MaskCP->isMachineConstantPoolEntry()) return false; if (auto *C = dyn_cast(MaskCP->getConstVal())) { DecodeVPERMV3Mask(C, VT, Mask); break; } return false; } default: llvm_unreachable("unknown target shuffle node"); } // Empty mask indicates the decode failed. if (Mask.empty()) return false; // Check if we're getting a shuffle mask with zero'd elements. if (!AllowSentinelZero) if (std::any_of(Mask.begin(), Mask.end(), [](int M){ return M == SM_SentinelZero; })) return false; // If we have a fake unary shuffle, the shuffle mask is spread across two // inputs that are actually the same node. Re-map the mask to always point // into the first input. if (IsFakeUnary) for (int &M : Mask) if (M >= (int)Mask.size()) M -= Mask.size(); return true; } /// Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth) { if (Depth == 6) return SDValue(); // Limit search depth. SDValue V = SDValue(N, 0); EVT VT = V.getValueType(); unsigned Opcode = V.getOpcode(); // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. if (const ShuffleVectorSDNode *SV = dyn_cast(N)) { int Elt = SV->getMaskElt(Index); if (Elt < 0) return DAG.getUNDEF(VT.getVectorElementType()); unsigned NumElems = VT.getVectorNumElements(); SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { MVT ShufVT = V.getSimpleValueType(); int NumElems = (int)ShufVT.getVectorNumElements(); SmallVector ShuffleMask; bool IsUnary; if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; if (Elt == SM_SentinelUndef) return DAG.getUNDEF(ShufVT.getVectorElementType()); assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"); SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } // Actual nodes that may contain scalar elements if (Opcode == ISD::BITCAST) { V = V.getOperand(0); EVT SrcVT = V.getValueType(); unsigned NumElems = VT.getVectorNumElements(); if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems) return SDValue(); } if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) return (Index == 0) ? V.getOperand(0) : DAG.getUNDEF(VT.getVectorElementType()); if (V.getOpcode() == ISD::BUILD_VECTOR) return V.getOperand(Index); return SDValue(); } /// Custom lower build_vector of v16i8. static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget* Subtarget, const TargetLowering &TLI) { if (NumNonZero > 8) return SDValue(); SDLoc dl(Op); SDValue V; bool First = true; // SSE4.1 - use PINSRB to insert each byte directly. if (Subtarget->hasSSE41()) { for (unsigned i = 0; i < 16; ++i) { bool isNonZero = (NonZeros & (1 << i)) != 0; if (isNonZero) { if (First) { if (NumZero) V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); else V = DAG.getUNDEF(MVT::v16i8); First = false; } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } } return V; } // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { if (NumZero) V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; } if ((i & 1) != 0) { SDValue ThisElt, LastElt; bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; if (LastIsNonZero) { LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i-1)); } if (ThisIsNonZero) { ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt, DAG.getConstant(8, dl, MVT::i8)); if (LastIsNonZero) ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); } else ThisElt = LastElt; if (ThisElt.getNode()) V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, DAG.getIntPtrConstant(i/2, dl)); } } return DAG.getBitcast(MVT::v16i8, V); } /// Custom lower build_vector of v8i16. static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget* Subtarget, const TargetLowering &TLI) { if (NumNonZero > 4) return SDValue(); SDLoc dl(Op); SDValue V; bool First = true; for (unsigned i = 0; i < 8; ++i) { bool isNonZero = (NonZeros & (1 << i)) != 0; if (isNonZero) { if (First) { if (NumZero) V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } } return V; } /// Custom lower build_vector of v4i32 or v4f32. static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI) { // Find all zeroable elements. std::bitset<4> Zeroable; for (int i=0; i < 4; ++i) { SDValue Elt = Op->getOperand(i); Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)); } assert(Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"); // We only know how to deal with build_vector nodes where elements are either // zeroable or extract_vector_elt with constant index. SDValue FirstNonZero; unsigned FirstNonZeroIdx; for (unsigned i=0; i < 4; ++i) { if (Zeroable[i]) continue; SDValue Elt = Op->getOperand(i); if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Elt.getOperand(1))) return SDValue(); // Make sure that this node is extracting from a 128-bit vector. MVT VT = Elt.getOperand(0).getSimpleValueType(); if (!VT.is128BitVector()) return SDValue(); if (!FirstNonZero.getNode()) { FirstNonZero = Elt; FirstNonZeroIdx = i; } } assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!"); SDValue V1 = FirstNonZero.getOperand(0); MVT VT = V1.getSimpleValueType(); // See if this build_vector can be lowered as a blend with zero. SDValue Elt; unsigned EltMaskIdx, EltIdx; int Mask[4]; for (EltIdx = 0; EltIdx < 4; ++EltIdx) { if (Zeroable[EltIdx]) { // The zero vector will be on the right hand side. Mask[EltIdx] = EltIdx+4; continue; } Elt = Op->getOperand(EltIdx); // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index. EltMaskIdx = cast(Elt.getOperand(1))->getZExtValue(); if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx) break; Mask[EltIdx] = EltIdx; } if (EltIdx == 4) { // Let the shuffle legalizer deal with blend operations. SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); if (V1.getSimpleValueType() != VT) V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1); return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]); } // See if we can lower this build_vector to a INSERTPS. if (!Subtarget->hasSSE41()) return SDValue(); SDValue V2 = Elt.getOperand(0); if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx) V1 = SDValue(); bool CanFold = true; for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) { if (Zeroable[i]) continue; SDValue Current = Op->getOperand(i); SDValue SrcVector = Current->getOperand(0); if (!V1.getNode()) V1 = SrcVector; CanFold = SrcVector == V1 && cast(Current.getOperand(1))->getZExtValue() == i; } if (!CanFold) return SDValue(); assert(V1.getNode() && "Expected at least two non-zero elements!"); if (V1.getSimpleValueType() != MVT::v4f32) V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1); if (V2.getSimpleValueType() != MVT::v4f32) V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2); // Ok, we can emit an INSERTPS instruction. unsigned ZMask = Zeroable.to_ulong(); unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask; assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, DAG.getIntPtrConstant(InsertPSMask, DL)); return DAG.getBitcast(VT, Result); } /// Return a vector logical shift node. static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, SDLoc dl) { assert(VT.is128BitVector() && "Unknown type for VShift"); MVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) { // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into // the shuffle mask. if (LoadSDNode *LD = dyn_cast(SrcOp)) { SDValue Ptr = LD->getBasePtr(); if (!ISD::isNormalLoad(LD) || LD->isVolatile()) return SDValue(); EVT PVT = LD->getValueType(0); if (PVT != MVT::i32 && PVT != MVT::f32) return SDValue(); int FI = -1; int64_t Offset = 0; if (FrameIndexSDNode *FINode = dyn_cast(Ptr)) { FI = FINode->getIndex(); Offset = 0; } else if (DAG.isBaseWithConstantOffset(Ptr) && isa(Ptr.getOperand(0))) { FI = cast(Ptr.getOperand(0))->getIndex(); Offset = Ptr.getConstantOperandVal(1); Ptr = Ptr.getOperand(0); } else { return SDValue(); } // FIXME: 256-bit vector instructions don't require a strict alignment, // improve this code to support it better. unsigned RequiredAlign = VT.getSizeInBits()/8; SDValue Chain = LD->getChain(); // Make sure the stack object alignment is at least 16 or 32. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) { if (MFI->isFixedObjectIndex(FI)) { // Can't change the alignment. FIXME: It's possible to compute // the exact stack offset and reference FI + adjust offset instead. // If someone *really* cares about this. That's the way to implement it. return SDValue(); } else { MFI->setObjectAlignment(FI, RequiredAlign); } } // (Offset % 16 or 32) must be multiple of 4. Then address is then // Ptr + (Offset & ~15). if (Offset < 0) return SDValue(); if ((Offset % RequiredAlign) & 3) return SDValue(); int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1); if (StartOffset) { SDLoc DL(Ptr); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(StartOffset, DL, Ptr.getValueType())); } int EltNo = (Offset - StartOffset) >> 2; unsigned NumElems = VT.getVectorNumElements(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset), false, false, false, 0); SmallVector Mask(NumElems, EltNo); return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]); } return SDValue(); } /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the /// elements can be replaced by a single large load which has the same value as /// a build_vector or insert_subvector whose loaded operands are 'Elts'. /// /// Example: -> zextload a /// /// FIXME: we'd also like to handle the case where the last elements are zero /// rather than undef via VZEXT_LOAD, but we do not detect that case today. /// There's even a handy isZeroNode for that purpose. static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, SDLoc &DL, SelectionDAG &DAG, bool isAfterLegalize) { unsigned NumElems = Elts.size(); LoadSDNode *LDBase = nullptr; unsigned LastLoadedElt = -1U; // For each element in the initializer, see if we've found a load or an undef. // If we don't find an initial load element, or later load elements are // non-consecutive, bail out. for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Elts[i]; // Look through a bitcast. if (Elt.getNode() && Elt.getOpcode() == ISD::BITCAST) Elt = Elt.getOperand(0); if (!Elt.getNode() || (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) return SDValue(); if (!LDBase) { if (Elt.getNode()->getOpcode() == ISD::UNDEF) return SDValue(); LDBase = cast(Elt.getNode()); LastLoadedElt = i; continue; } if (Elt.getOpcode() == ISD::UNDEF) continue; LoadSDNode *LD = cast(Elt); EVT LdVT = Elt.getValueType(); // Each loaded element must be the correct fractional portion of the // requested vector load. if (LdVT.getSizeInBits() != VT.getSizeInBits() / NumElems) return SDValue(); if (!DAG.isConsecutiveLoad(LD, LDBase, LdVT.getSizeInBits() / 8, i)) return SDValue(); LastLoadedElt = i; } // If we have found an entire vector of loads and undefs, then return a large // load of the entire vector width starting at the base pointer. If we found // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { assert(LDBase && "Did not find base load for merging consecutive loads"); EVT EltVT = LDBase->getValueType(0); // Ensure that the input vector size for the merged loads matches the // cumulative size of the input elements. if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems) return SDValue(); if (isAfterLegalize && !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT)) return SDValue(); SDValue NewLd = SDValue(); NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->isInvariant(), LDBase->getAlignment()); if (LDBase->hasAnyUseOfValue(1)) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), SDValue(NewLd.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), SDValue(NewLd.getNode(), 1)); } return NewLd; } //TODO: The code below fires only for for loading the low v2i32 / v2f32 //of a v4i32 / v4f32. It's probably worth generalizing. EVT EltVT = VT.getVectorElementType(); if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) && DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64, LDBase->getPointerInfo(), LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); // Make sure the newly-created LOAD is in the same position as LDBase in // terms of dependency. We create a TokenFactor for LDBase and ResNode, and // update uses of LDBase's output chain to use the TokenFactor. if (LDBase->hasAnyUseOfValue(1)) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1)); } return DAG.getBitcast(VT, ResNode); } return SDValue(); } /// LowerVectorBroadcast - Attempt to use the vbroadcast instruction /// to generate a splat value for the following cases: /// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant. /// 2. A splat shuffle which uses a scalar_to_vector node which comes from /// a scalar load, or a constant. /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, SelectionDAG &DAG) { // VBROADCAST requires AVX. // TODO: Splats could be generated for non-AVX CPUs using SSE // instructions, but there's less potential gain for only 128-bit vectors. if (!Subtarget->hasAVX()) return SDValue(); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Unsupported vector type for broadcast."); SDValue Ld; bool ConstSplatVal; switch (Op.getOpcode()) { default: // Unknown pattern found. return SDValue(); case ISD::BUILD_VECTOR: { auto *BVOp = cast(Op.getNode()); BitVector UndefElements; SDValue Splat = BVOp->getSplatValue(&UndefElements); // We need a splat of a single value to use broadcast, and it doesn't // make any sense if the value is only in one element of the vector. if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1) return SDValue(); Ld = Splat; ConstSplatVal = (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); // Make sure that all of the users of a non-constant load are from the // BUILD_VECTOR node. if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode())) return SDValue(); break; } case ISD::VECTOR_SHUFFLE: { ShuffleVectorSDNode *SVOp = cast(Op); // Shuffles must have a splat mask where the first element is // broadcasted. if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0) return SDValue(); SDValue Sc = Op.getOperand(0); if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && Sc.getOpcode() != ISD::BUILD_VECTOR) { if (!Subtarget->hasInt256()) return SDValue(); // Use the register form of the broadcast instruction available on AVX2. if (VT.getSizeInBits() >= 256) Sc = Extract128BitVector(Sc, 0, DAG, dl); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); } Ld = Sc.getOperand(0); ConstSplatVal = (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); // The scalar_to_vector node and the suspected // load node must have exactly one user. // Constants may have multiple users. // AVX-512 has register version of the broadcast bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() && Ld.getValueType().getSizeInBits() >= 32; if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) && !hasRegVer)) return SDValue(); break; } } unsigned ScalarSize = Ld.getValueType().getSizeInBits(); bool IsGE256 = (VT.getSizeInBits() >= 256); // When optimizing for size, generate up to 5 extra bytes for a broadcast // instruction to save 8 or more bytes of constant pool data. // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize(); // Handle broadcasting a single constant scalar from the constant pool // into a vector. // On Sandybridge (no AVX2), it is still better to load a constant vector // from the constant pool and not to broadcast it from a scalar. // But override that restriction when optimizing for size. // TODO: Check if splatting is recommended for other AVX-capable CPUs. if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) { EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); // Splat f32, i32, v4f64, v4i64 in all cases with AVX2. // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); else if (ConstantFPSDNode *CF = dyn_cast(Ld)) C = CF->getConstantFPValue(); assert(C && "Invalid constant type"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CP)->getAlignment(); Ld = DAG.getLoad( CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, Alignment); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } } bool IsLoad = ISD::isNormalLoad(Ld.getNode()); // Handle AVX2 in-register broadcasts. if (!IsLoad && Subtarget->hasInt256() && (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The scalar source must be a normal load. if (!IsLoad) return SDValue(); if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || (Subtarget->hasVLX() && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match // double since there is no vbroadcastsd xmm if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) { if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } // Unsupported broadcast. return SDValue(); } /// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real /// underlying vector and index. /// /// Modifies \p ExtractedFromVec to the real vector and returns the real /// index. static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx) { int Idx = cast(ExtIdx)->getZExtValue(); if (!isa(ExtractedFromVec)) return Idx; // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already // lowered this: // (extract_vector_elt (v8f32 %vreg1), Constant<6>) // to: // (extract_vector_elt (vector_shuffle<2,u,u,u> // (extract_subvector (v8f32 %vreg0), Constant<4>), // undef) // Constant<0>) // In this case the vector is the extract_subvector expression and the index // is 2, as specified by the shuffle. ShuffleVectorSDNode *SVOp = cast(ExtractedFromVec); SDValue ShuffleVec = SVOp->getOperand(0); MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); assert(ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()); int ShuffleIdx = SVOp->getMaskElt(Idx); if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { ExtractedFromVec = ShuffleVec; return ShuffleIdx; } return Idx; } static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); // Skip if insert_vec_elt is not supported. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) return SDValue(); SDLoc DL(Op); unsigned NumElems = Op.getNumOperands(); SDValue VecIn1; SDValue VecIn2; SmallVector InsertIndices; SmallVector Mask(NumElems, -1); for (unsigned i = 0; i != NumElems; ++i) { unsigned Opc = Op.getOperand(i).getOpcode(); if (Opc == ISD::UNDEF) continue; if (Opc != ISD::EXTRACT_VECTOR_ELT) { // Quit if more than 1 elements need inserting. if (InsertIndices.size() > 1) return SDValue(); InsertIndices.push_back(i); continue; } SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); SDValue ExtIdx = Op.getOperand(i).getOperand(1); // Quit if non-constant index. if (!isa(ExtIdx)) return SDValue(); int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); // Quit if extracted from vector of different type. if (ExtractedFromVec.getValueType() != VT) return SDValue(); if (!VecIn1.getNode()) VecIn1 = ExtractedFromVec; else if (VecIn1 != ExtractedFromVec) { if (!VecIn2.getNode()) VecIn2 = ExtractedFromVec; else if (VecIn2 != ExtractedFromVec) // Quit if more than 2 vectors to shuffle return SDValue(); } if (ExtractedFromVec == VecIn1) Mask[i] = Idx; else if (ExtractedFromVec == VecIn2) Mask[i] = Idx + NumElems; } if (!VecIn1.getNode()) return SDValue(); VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]); for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { unsigned Idx = InsertIndices[i]; NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), DAG.getIntPtrConstant(Idx, DL)); } return NV; } static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"); uint64_t Immediate = 0; for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (In.getOpcode() != ISD::UNDEF) Immediate |= cast(In)->getZExtValue() << idx; } SDLoc dl(Op); MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8)); return DAG.getConstant(Immediate, dl, VT); } // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. SDValue X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"); SDLoc dl(Op); if (ISD::isBuildVectorAllZeros(Op.getNode())) { SDValue Cst = DAG.getTargetConstant(0, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } if (ISD::isBuildVectorAllOnes(Op.getNode())) { SDValue Cst = DAG.getTargetConstant(1, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { SDValue Imm = ConvertI1VectorToInteger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, Imm); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } // Vector has one or more non-const elements uint64_t Immediate = 0; SmallVector NonConstIdx; bool IsSplat = true; bool HasConstElts = false; int SplatIdx = -1; for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (In.getOpcode() == ISD::UNDEF) continue; if (!isa(In)) NonConstIdx.push_back(idx); else { Immediate |= cast(In)->getZExtValue() << idx; HasConstElts = true; } if (SplatIdx == -1) SplatIdx = idx; else if (In != Op.getOperand(SplatIdx)) IsSplat = false; } // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" if (IsSplat) return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx), DAG.getConstant(1, dl, VT), DAG.getConstant(0, dl, VT)); // insert elements one by one SDValue DstVec; SDValue Imm; if (Immediate) { MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8)); Imm = DAG.getConstant(Immediate, dl, ImmVT); } else if (HasConstElts) Imm = DAG.getConstant(0, dl, VT); else Imm = DAG.getUNDEF(VT); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) DstVec = DAG.getBitcast(VT, Imm); else { SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } for (unsigned i = 0; i < NonConstIdx.size(); ++i) { unsigned InsertIdx = NonConstIdx[i]; DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, Op.getOperand(InsertIdx), DAG.getIntPtrConstant(InsertIdx, dl)); } return DstVec; } /// \brief Return true if \p N implements a horizontal binop and return the /// operands for the horizontal binop into V0 and V1. /// /// This is a helper function of LowerToHorizontalOp(). /// This function checks that the build_vector \p N in input implements a /// horizontal operation. Parameter \p Opcode defines the kind of horizontal /// operation to match. /// For example, if \p Opcode is equal to ISD::ADD, then this function /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode /// is equal to ISD::SUB, then this function checks if this is a horizontal /// arithmetic sub. /// /// This function only analyzes elements of \p N whose indices are /// in range [BaseIdx, LastIdx). static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1) { EVT VT = N->getValueType(0); assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && "Invalid Vector in input!"); bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); bool CanFold = true; unsigned ExpectedVExtractIdx = BaseIdx; unsigned NumElts = LastIdx - BaseIdx; V0 = DAG.getUNDEF(VT); V1 = DAG.getUNDEF(VT); // Check if N implements a horizontal binop. for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { SDValue Op = N->getOperand(i + BaseIdx); // Skip UNDEFs. if (Op->getOpcode() == ISD::UNDEF) { // Update the expected vector extract index. if (i * 2 == NumElts) ExpectedVExtractIdx = BaseIdx; ExpectedVExtractIdx += 2; continue; } CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); if (!CanFold) break; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); // Try to match the following pattern: // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1)) CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0) == Op1.getOperand(0) && isa(Op0.getOperand(1)) && isa(Op1.getOperand(1))); if (!CanFold) break; unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); unsigned I1 = cast(Op1.getOperand(1))->getZExtValue(); if (i * 2 < NumElts) { if (V0.getOpcode() == ISD::UNDEF) { V0 = Op0.getOperand(0); if (V0.getValueType() != VT) return false; } } else { if (V1.getOpcode() == ISD::UNDEF) { V1 = Op0.getOperand(0); if (V1.getValueType() != VT) return false; } if (i * 2 == NumElts) ExpectedVExtractIdx = BaseIdx; } SDValue Expected = (i * 2 < NumElts) ? V0 : V1; if (I0 == ExpectedVExtractIdx) CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected; else if (IsCommutable && I1 == ExpectedVExtractIdx) { // Try to match the following dag sequence: // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I)) CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected; } else CanFold = false; ExpectedVExtractIdx += 2; } return CanFold; } /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by /// a concat_vector. /// /// This is a helper function of LowerToHorizontalOp(). /// This function expects two 256-bit vectors called V0 and V1. /// At first, each vector is split into two separate 128-bit vectors. /// Then, the resulting 128-bit vectors are used to implement two /// horizontal binary operations. /// /// The kind of horizontal binary operation is defined by \p X86Opcode. /// /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to /// the two new horizontal binop. /// When Mode is set, the first horizontal binop dag node would take as input /// the lower 128-bit of V0 and the upper 128-bit of V0. The second /// horizontal binop dag node would take as input the lower 128-bit of V1 /// and the upper 128-bit of V1. /// Example: /// HADD V0_LO, V0_HI /// HADD V1_LO, V1_HI /// /// Otherwise, the first horizontal binop dag node takes as input the lower /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop /// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1. /// Example: /// HADD V0_LO, V1_LO /// HADD V0_HI, V1_HI /// /// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to /// the upper 128-bits of the result. static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, SDLoc DL, SelectionDAG &DAG, unsigned X86Opcode, bool Mode, bool isUndefLO, bool isUndefHI) { EVT VT = V0.getValueType(); assert(VT.is256BitVector() && VT == V1.getValueType() && "Invalid nodes in input!"); unsigned NumElts = VT.getVectorNumElements(); SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL); SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL); SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL); SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL); EVT NewVT = V0_LO.getValueType(); SDValue LO = DAG.getUNDEF(NewVT); SDValue HI = DAG.getUNDEF(NewVT); if (Mode) { // Don't emit a horizontal binop if the result is expected to be UNDEF. if (!isUndefLO && V0->getOpcode() != ISD::UNDEF) LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); if (!isUndefHI && V1->getOpcode() != ISD::UNDEF) HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); } else { // Don't emit a horizontal binop if the result is expected to be UNDEF. if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF || V1_LO->getOpcode() != ISD::UNDEF)) LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF || V1_HI->getOpcode() != ISD::UNDEF)) HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); } /// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB /// node. static SDValue LowerToAddSub(const BuildVectorSDNode *BV, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = BV->getSimpleValueType(0); if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) return SDValue(); SDLoc DL(BV); unsigned NumElts = VT.getVectorNumElements(); SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 || VT == MVT::v2f64) && "build_vector with an invalid type found!"); // Odd-numbered elements in the input build vector are obtained from // adding two integer/float elements. // Even-numbered elements in the input build vector are obtained from // subtracting two integer/float elements. unsigned ExpectedOpcode = ISD::FSUB; unsigned NextExpectedOpcode = ISD::FADD; bool AddFound = false; bool SubFound = false; for (unsigned i = 0, e = NumElts; i != e; ++i) { SDValue Op = BV->getOperand(i); // Skip 'undef' values. unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::UNDEF) { std::swap(ExpectedOpcode, NextExpectedOpcode); continue; } // Early exit if we found an unexpected opcode. if (Opcode != ExpectedOpcode) return SDValue(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); // Try to match the following pattern: // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i)) // Early exit if we cannot match that sequence. if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(Op0.getOperand(1)) || !isa(Op1.getOperand(1)) || Op0.getOperand(1) != Op1.getOperand(1)) return SDValue(); unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); if (I0 != i) return SDValue(); // We found a valid add/sub node. Update the information accordingly. if (i & 1) AddFound = true; else SubFound = true; // Update InVec0 and InVec1. if (InVec0.getOpcode() == ISD::UNDEF) { InVec0 = Op0.getOperand(0); if (InVec0.getSimpleValueType() != VT) return SDValue(); } if (InVec1.getOpcode() == ISD::UNDEF) { InVec1 = Op1.getOperand(0); if (InVec1.getSimpleValueType() != VT) return SDValue(); } // Make sure that operands in input to each add/sub node always // come from a same pair of vectors. if (InVec0 != Op0.getOperand(0)) { if (ExpectedOpcode == ISD::FSUB) return SDValue(); // FADD is commutable. Try to commute the operands // and then test again. std::swap(Op0, Op1); if (InVec0 != Op0.getOperand(0)) return SDValue(); } if (InVec1 != Op1.getOperand(0)) return SDValue(); // Update the pair of expected opcodes. std::swap(ExpectedOpcode, NextExpectedOpcode); } // Don't try to fold this build_vector into an ADDSUB if the inputs are undef. if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF && InVec1.getOpcode() != ISD::UNDEF) return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1); return SDValue(); } /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = BV->getSimpleValueType(0); unsigned NumElts = VT.getVectorNumElements(); unsigned NumUndefsLO = 0; unsigned NumUndefsHI = 0; unsigned Half = NumElts/2; // Count the number of UNDEF operands in the build_vector in input. for (unsigned i = 0, e = Half; i != e; ++i) if (BV->getOperand(i)->getOpcode() == ISD::UNDEF) NumUndefsLO++; for (unsigned i = Half, e = NumElts; i != e; ++i) if (BV->getOperand(i)->getOpcode() == ISD::UNDEF) NumUndefsHI++; // Early exit if this is either a build_vector of all UNDEFs or all the // operands but one are UNDEF. if (NumUndefsLO + NumUndefsHI + 1 >= NumElts) return SDValue(); SDLoc DL(BV); SDValue InVec0, InVec1; if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) { // Try to match an SSE3 float HADD/HSUB. if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) { // Try to match an SSSE3 integer HADD/HSUB. if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1); if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1); } if (!Subtarget->hasAVX()) return SDValue(); if ((VT == MVT::v8f32 || VT == MVT::v4f64)) { // Try to match an AVX horizontal add/sub of packed single/double // precision floating point values from 256-bit vectors. SDValue InVec2, InVec3; if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) && isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) && ((InVec0.getOpcode() == ISD::UNDEF || InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && ((InVec1.getOpcode() == ISD::UNDEF || InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) && isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) && ((InVec0.getOpcode() == ISD::UNDEF || InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && ((InVec1.getOpcode() == ISD::UNDEF || InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); } else if (VT == MVT::v8i32 || VT == MVT::v16i16) { // Try to match an AVX2 horizontal add/sub of signed integers. SDValue InVec2, InVec3; unsigned X86Opcode; bool CanFold = true; if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) && ((InVec0.getOpcode() == ISD::UNDEF || InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && ((InVec1.getOpcode() == ISD::UNDEF || InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) X86Opcode = X86ISD::HADD; else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) && isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) && ((InVec0.getOpcode() == ISD::UNDEF || InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && ((InVec1.getOpcode() == ISD::UNDEF || InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) X86Opcode = X86ISD::HSUB; else CanFold = false; if (CanFold) { // Fold this build_vector into a single horizontal add/sub. // Do this only if the target has AVX2. if (Subtarget->hasAVX2()) return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1); // Do not try to expand this build_vector into a pair of horizontal // add/sub if we can emit a pair of scalar add/sub. if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) return SDValue(); // Convert this build_vector into a pair of horizontal binop followed by // a concat vector. bool isUndefLO = NumUndefsLO == Half; bool isUndefHI = NumUndefsHI == Half; return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false, isUndefLO, isUndefHI); } } if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || VT == MVT::v16i16) && Subtarget->hasAVX()) { unsigned X86Opcode; if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HADD; else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HSUB; else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::FHADD; else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::FHSUB; else return SDValue(); // Don't try to expand this build_vector into a pair of horizontal add/sub // if we can simply emit a pair of scalar add/sub. if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) return SDValue(); // Convert this build_vector into two horizontal add/sub followed by // a concat vector. bool isUndefLO = NumUndefsLO == Half; bool isUndefHI = NumUndefsHI == Half; return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, isUndefLO, isUndefHI); } return SDValue(); } SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT ExtVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); // Generate vectors for predicate vectors. if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG); // Vectors containing all zeros can be matched by pxor and xorps later if (ISD::isBuildVectorAllZeros(Op.getNode())) { // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getZeroVector(VT, Subtarget, DAG, dl); } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; if (!VT.is512BitVector()) return getOnesVector(VT, Subtarget, DAG, dl); } BuildVectorSDNode *BV = cast(Op.getNode()); if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG)) return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) return HorizontalOp; if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG)) return Broadcast; unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; unsigned NumNonZero = 0; uint64_t NonZeros = 0; bool IsAllConstants = true; SmallSet Values; for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Op.getOperand(i); if (Elt.getOpcode() == ISD::UNDEF) continue; Values.insert(Elt); if (Elt.getOpcode() != ISD::Constant && Elt.getOpcode() != ISD::ConstantFP) IsAllConstants = false; if (X86::isZeroNode(Elt)) NumZero++; else { assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range. NonZeros |= ((uint64_t)1 << i); NumNonZero++; } } // All undef vector. Return an UNDEF. All zero vectors were handled above. if (NumNonZero == 0) return DAG.getUNDEF(VT); // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { unsigned Idx = countTrailingZeros(NonZeros); SDValue Item = Op.getOperand(Idx); // If this is an insertion of an i64 value on x86-32, and if the top bits of // the value are obviously zero, truncate the value to i32 and do the // insertion that way. Only do this if the value is non-constant or if the // value is a constant being inserted into element 0. It is cheaper to do // a constant pool load than it is to do a movd + shuffle. if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { // Handle SSE only. assert(VT == MVT::v2i64 && "Expected an SSE value type!"); MVT VecVT = MVT::v4i32; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef( Item, Idx * 2, true, Subtarget, DAG)); } } // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd // depending on what the source datatype is. if (Idx == 0) { if (NumZero == 0) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { if (VT.is512BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0, dl)); } assert((VT.is128BitVector() || VT.is256BitVector()) && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } // We can't directly insert an i8 or i16 into a vector, so zero extend // it to i32 first. if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); if (VT.is256BitVector()) { if (Subtarget->hasAVX()) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } else { // Without AVX, we need to extend to a 128-bit vector and then // insert into the 256-bit vector. Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); } } else { assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getBitcast(VT, Item); } } // Is it a vector logical left shift? if (NumElems == 2 && Idx == 1 && X86::isZeroNode(Op.getOperand(0)) && !X86::isZeroNode(Op.getOperand(1))) { unsigned NumBits = VT.getSizeInBits(); return getVShift(true, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(1)), NumBits/2, DAG, *this, dl); } if (IsAllConstants) // Otherwise, it's better to do a constpool load. return SDValue(); // Otherwise, if this is a vector with i32 or f32 elements, and the element // is a non-constant being inserted into an element other than the low one, // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka // movd/movss) to move this into the low element, then shuffle it into // place. if (EVTBits == 32) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG); } } // Splat is obviously ok. Let legalizer expand it to a shuffle. if (Values.size() == 1) { if (EVTBits == 32) { // Instead of a shuffle like this: // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> // Check if it's possible to issue this instead. // shuffle (vload ptr)), undef, <1, 1, 1, 1> unsigned Idx = countTrailingZeros(NonZeros); SDValue Item = Op.getOperand(Idx); if (Op.getNode()->isOnlyUserOf(Item.getNode())) return LowerAsSplatVectorLoad(Item, VT, dl, DAG); } return SDValue(); } // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. if (IsAllConstants) return SDValue(); // For AVX-length vectors, see if we can use a vector load to get all of the // elements, otherwise build the individual 128-bit pieces and use // shuffles to put them in place. if (VT.is256BitVector() || VT.is512BitVector()) { SmallVector V(Op->op_begin(), Op->op_begin() + NumElems); // Check for a build vector of consecutive loads. if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false)) return LD; EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build both the lower and upper subvector. SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, makeArrayRef(&V[0], NumElems/2)); SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, makeArrayRef(&V[NumElems / 2], NumElems/2)); // Recreate the wider vector with the lower and upper part. if (VT.is256BitVector()) return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl); return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl); } // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) { if (NumNonZero == 1) { // One half is zero or undef. unsigned Idx = countTrailingZeros(NonZeros); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); } // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, DAG, Subtarget, *this)) return V; if (EVTBits == 16 && NumElems == 8) if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, DAG, Subtarget, *this)) return V; // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS if (EVTBits == 32 && NumElems == 4) if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this)) return V; // If element VT is == 32 bits, turn it into a number of shuffles. SmallVector V(NumElems); if (NumElems == 4 && NumZero > 0) { for (unsigned i = 0; i < 4; ++i) { bool isZero = !(NonZeros & (1ULL << i)); if (isZero) V[i] = getZeroVector(VT, Subtarget, DAG, dl); else V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); } for (unsigned i = 0; i < 2; ++i) { switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { default: break; case 0: V[i] = V[i*2]; // Must be a zero vector. break; case 1: V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); break; case 2: V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); break; case 3: V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); break; } } bool Reverse1 = (NonZeros & 0x3) == 2; bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2; int MaskVec[] = { Reverse1 ? 1 : 0, Reverse1 ? 0 : 1, static_cast(Reverse2 ? NumElems+1 : NumElems), static_cast(Reverse2 ? NumElems : NumElems+1) }; return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } if (Values.size() > 1 && VT.is128BitVector()) { // Check for a build vector of consecutive loads. for (unsigned i = 0; i < NumElems; ++i) V[i] = Op.getOperand(i); // Check for elements which are consecutive loads. if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false)) return LD; // Check for a build vector from mostly shuffle plus few inserting. if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) return Sh; // For SSE 4.1, use insertps to put the high elements into the low element. if (Subtarget->hasSSE41()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); else Result = DAG.getUNDEF(VT); for (unsigned i = 1; i < NumElems; ++i) { if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue; Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } return Result; } // Otherwise, expand into a number of unpckl*, start by extending each of // our (non-undef) elements to the full vector width with the element in the // bottom slot of the vector (which generates no code for SSE). for (unsigned i = 0; i < NumElems; ++i) { if (Op.getOperand(i).getOpcode() != ISD::UNDEF) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); else V[i] = DAG.getUNDEF(VT); } // Next, we iteratively mix elements, e.g. for v4f32: // Step 1: unpcklps 0, 2 ==> X: // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> unsigned EltStride = NumElems >> 1; while (EltStride != 0) { for (unsigned i = 0; i < EltStride; ++i) { // If V[i+EltStride] is undef and this is the first round of mixing, // then it is safe to just drop this shuffle: V[i] is already in the // right place, the one element (since it's the first round) being // inserted as undef can be dropped. This isn't safe for successive // rounds because they will permute elements within both vectors. if (V[i+EltStride].getOpcode() == ISD::UNDEF && EltStride == NumElems/2) continue; V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]); } EltStride >>= 1; } return V[0]; } return SDValue(); } // 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); assert((ResVT.is256BitVector() || ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); unsigned NumElems = ResVT.getVectorNumElements(); if (ResVT.is256BitVector()) return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); if (Op.getNumOperands() == 4) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); SDValue V3 = Op.getOperand(2); SDValue V4 = Op.getOperand(3); return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl), Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl); } return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG & DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); unsigned NumOfOperands = Op.getNumOperands(); assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { // Specialize the cases when all, or all but one, of the operands are undef. unsigned NumOfDefinedOps = 0; unsigned OpIdx = 0; for (unsigned i = 0; i < NumOfOperands; i++) if (!Op.getOperand(i).isUndef()) { NumOfDefinedOps++; OpIdx = i; } if (NumOfDefinedOps == 0) return Undef; if (NumOfDefinedOps == 1) { unsigned SubVecNumElts = Op.getOperand(OpIdx).getValueType().getVectorNumElements(); SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, Op.getOperand(OpIdx), IdxVal); } MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); SmallVector Ops; for (unsigned i = 0; i < NumOfOperands/2; i++) Ops.push_back(Op.getOperand(i)); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); Ops.clear(); for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++) Ops.push_back(Op.getOperand(i)); SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } // 2 operands SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); unsigned NumElems = ResVT.getVectorNumElements(); assert(V1.getValueType() == V2.getValueType() && V1.getValueType().getVectorNumElements() == NumElems/2 && "Unexpected operands in CONCAT_VECTORS"); if (ResVT.getSizeInBits() >= 16) return Op; // The operation is legal with KUNPCK bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode()); bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode()); SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl); if (IsZeroV1 && IsZeroV2) return ZeroVec; SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); if (V2.isUndef()) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); if (IsZeroV2) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx); SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl); if (V1.isUndef()) V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); if (IsZeroV1) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal); V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal); } static SDValue LowerCONCAT_VECTORS(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT.getVectorElementType() == MVT::i1) return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); assert((VT.is256BitVector() && Op.getNumOperands() == 2) || (VT.is512BitVector() && (Op.getNumOperands() == 2 || Op.getNumOperands() == 4))); // AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors return LowerAVXCONCAT_VECTORS(Op, DAG); } //===----------------------------------------------------------------------===// // Vector shuffle lowering // // This is an experimental code path for lowering vector shuffles on x86. It is // designed to handle arbitrary vector shuffles and blends, gracefully // degrading performance as necessary. It works hard to recognize idiomatic // shuffles and lower them to optimal instruction patterns without leaving // a framework that allows reasonably efficient handling of all vector shuffle // patterns. //===----------------------------------------------------------------------===// /// \brief Tiny helper function to identify a no-op mask. /// /// This is a somewhat boring predicate function. It checks whether the mask /// array input, which is assumed to be a single-input shuffle mask of the kind /// used by the X86 shuffle instructions (not a fully general /// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an /// in-place shuffle are 'no-op's. static bool isNoopShuffleMask(ArrayRef Mask) { for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] != -1 && Mask[i] != i) return false; return true; } /// \brief Helper function to classify a mask as a single-input mask. /// /// This isn't a generic single-input test because in the vector shuffle /// lowering we canonicalize single inputs to be the first input operand. This /// means we can more quickly test for a single input by only checking whether /// an input from the second operand exists. We also assume that the size of /// mask corresponds to the size of the input vectors which isn't true in the /// fully general case. static bool isSingleInputShuffleMask(ArrayRef Mask) { for (int M : Mask) if (M >= (int)Mask.size()) return false; return true; } /// \brief Test whether there are elements crossing 128-bit lanes in this /// shuffle mask. /// /// X86 divides up its shuffles into in-lane and cross-lane shuffle operations /// and we routinely test for these. static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef Mask) { int LaneSize = 128 / VT.getScalarSizeInBits(); int Size = Mask.size(); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) return true; return false; } /// \brief Test whether a shuffle mask is equivalent within each 128-bit lane. /// /// This checks a shuffle mask to see if it is performing the same /// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies /// that it is also not lane-crossing. It may however involve a blend from the /// same lane of a second vector. /// /// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is /// non-trivial to compute in the face of undef lanes. The representation is /// *not* suitable for use with existing 128-bit shuffles as it will contain /// entries from both V1 and V2 inputs to the wider mask. static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { int LaneSize = 128 / VT.getScalarSizeInBits(); RepeatedMask.resize(LaneSize, -1); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { if (Mask[i] < 0) continue; if ((Mask[i] % Size) / LaneSize != i / LaneSize) // This entry crosses lanes, so there is no way to model this shuffle. return false; // Ok, handle the in-lane shuffles by detecting if and when they repeat. if (RepeatedMask[i % LaneSize] == -1) // This is the first non-undef entry in this slot of a 128-bit lane. RepeatedMask[i % LaneSize] = Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size; else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i]) // Found a mismatch with the repeated mask. return false; } return true; } /// \brief Checks whether a shuffle mask is equivalent to an explicit list of /// arguments. /// /// This is a fast way to test a shuffle mask against a fixed pattern: /// /// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... } /// /// It returns true if the mask is exactly as wide as the argument list, and /// each element of the mask is either -1 (signifying undef) or the value given /// in the argument. static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, ArrayRef ExpectedMask) { if (Mask.size() != ExpectedMask.size()) return false; int Size = Mask.size(); // If the values are build vectors, we can look through them to find // equivalent inputs that make the shuffles equivalent. auto *BV1 = dyn_cast(V1); auto *BV2 = dyn_cast(V2); for (int i = 0; i < Size; ++i) if (Mask[i] != -1 && Mask[i] != ExpectedMask[i]) { auto *MaskBV = Mask[i] < Size ? BV1 : BV2; auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2; if (!MaskBV || !ExpectedBV || MaskBV->getOperand(Mask[i] % Size) != ExpectedBV->getOperand(ExpectedMask[i] % Size)) return false; } return true; } /// \brief Get a 4-lane 8-bit shuffle immediate for a mask. /// /// This helper function produces an 8-bit shuffle immediate corresponding to /// the ubiquitous shuffle encoding scheme used in x86 instructions for /// shuffling 4 lanes. It can be used with most of the PSHUF instructions for /// example. /// /// NB: We rely heavily on "undef" masks preserving the input lane. static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, SDLoc DL, SelectionDAG &DAG) { assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); unsigned Imm = 0; Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0; Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2; Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4; Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6; return DAG.getConstant(Imm, DL, MVT::i8); } /// \brief Compute whether each element of a shuffle is zeroable. /// /// A "zeroable" vector shuffle element is one which can be lowered to zero. /// Either it is an undef element in the shuffle mask, the element of the input /// referenced is undef, or the element of the input referenced is known to be /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// as many lanes with this technique as possible to simplify the remaining /// shuffle. static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, SDValue V1, SDValue V2) { SmallBitVector Zeroable(Mask.size(), false); while (V1.getOpcode() == ISD::BITCAST) V1 = V1->getOperand(0); while (V2.getOpcode() == ISD::BITCAST) V2 = V2->getOperand(0); bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; // Handle the easy cases. if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { Zeroable[i] = true; continue; } // If this is an index into a build_vector node (which has the same number // of elements), dig out the input value and use it. SDValue V = M < Size ? V1 : V2; if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) continue; SDValue Input = V.getOperand(M % Size); // The UNDEF opcode check really should be dead code here, but not quite // worth asserting on (it isn't invalid, just unexpected). if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input)) Zeroable[i] = true; } return Zeroable; } // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { int NumElts = VT.getVectorNumElements(); int NumEltsInLane = 128 / VT.getScalarSizeInBits(); SmallVector Unpckl; SmallVector Unpckh; for (int i = 0; i < NumElts; ++i) { unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2); int HiPos = LoPos + NumEltsInLane / 2; Unpckl.push_back(LoPos); Unpckh.push_back(HiPos); } if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, Unpckh)) return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); // Commute and try again. ShuffleVectorSDNode::commuteMask(Unpckl); if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); ShuffleVectorSDNode::commuteMask(Unpckh); if (isShuffleEquivalent(V1, V2, Mask, Unpckh)) return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); return SDValue(); } /// \brief Try to emit a bitmask instruction for a shuffle. /// /// This handles cases where we can model a blend exactly as a bitmask due to /// one of the inputs being zeroable. static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { MVT EltVT = VT.getVectorElementType(); int NumEltBits = EltVT.getSizeInBits(); MVT IntEltVT = MVT::getIntegerVT(NumEltBits); SDValue Zero = DAG.getConstant(0, DL, IntEltVT); SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, IntEltVT); if (EltVT.isFloatingPoint()) { Zero = DAG.getBitcast(EltVT, Zero); AllOnes = DAG.getBitcast(EltVT, AllOnes); } SmallVector VMaskOps(Mask.size(), Zero); SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); SDValue V; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Zeroable[i]) continue; if (Mask[i] % Size != i) return SDValue(); // Not a blend. if (!V) V = Mask[i] < Size ? V1 : V2; else if (V != (Mask[i] < Size ? V1 : V2)) return SDValue(); // Can only let one input through the mask. VMaskOps[i] = AllOnes; } if (!V) return SDValue(); // No non-zeroable elements! SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps); V = DAG.getNode(VT.isFloatingPoint() ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND, DL, VT, V, VMask); return V; } /// \brief Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are /// unavailable. Currently it is only suitable for integer vectors, but could /// be generalized for floating point vectors if desirable. static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(VT.isInteger() && "Only supports integer vector types!"); MVT EltVT = VT.getVectorElementType(); int NumEltBits = EltVT.getSizeInBits(); SDValue Zero = DAG.getConstant(0, DL, EltVT); SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, EltVT); SmallVector MaskOps; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] != -1 && Mask[i] != i && Mask[i] != i + Size) return SDValue(); // Shuffled input! MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero); } SDValue V1Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, MaskOps); V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); // We have to cast V2 around. MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT, DAG.getBitcast(MaskVT, V1Mask), DAG.getBitcast(MaskVT, V2))); return DAG.getNode(ISD::OR, DL, VT, V1, V2); } /// \brief Try to emit a blend instruction for a shuffle. /// /// This doesn't do any checks for the availability of instructions for blending /// these values. It relies on the availability of the X86ISD::BLENDI pattern to /// be matched in the backend with the type given. What it does check for is /// that the shuffle mask is a blend, or convertible into a blend with zero. static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Original, const X86Subtarget *Subtarget, SelectionDAG &DAG) { bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); SmallVector Mask(Original.begin(), Original.end()); SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); bool ForceV1Zero = false, ForceV2Zero = false; // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. unsigned BlendMask = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; if (M < 0) continue; if (M == i) continue; if (M == i + Size) { BlendMask |= 1u << i; continue; } if (Zeroable[i]) { if (V1IsZero) { ForceV1Zero = true; Mask[i] = i; continue; } if (V2IsZero) { ForceV2Zero = true; BlendMask |= 1u << i; Mask[i] = i + Size; continue; } } return SDValue(); // Shuffled input! } // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. if (ForceV1Zero) V1 = getZeroVector(VT, Subtarget, DAG, DL); if (ForceV2Zero) V2 = getZeroVector(VT, Subtarget, DAG, DL); auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) { unsigned ScaledMask = 0; for (int i = 0; i != Size; ++i) if (BlendMask & (1u << i)) for (int j = 0; j != Scale; ++j) ScaledMask |= 1u << (i * Scale + j); return ScaledMask; }; switch (VT.SimpleTy) { case MVT::v2f64: case MVT::v4f32: case MVT::v4f64: case MVT::v8f32: return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); case MVT::v4i64: case MVT::v8i32: assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!"); // FALLTHROUGH case MVT::v2i64: case MVT::v4i32: // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into // that instruction. if (Subtarget->hasAVX2()) { // Scale the blend by the number of 32-bit dwords per element. int Scale = VT.getScalarSizeInBits() / 32; BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32; V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast( VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8))); } // FALLTHROUGH case MVT::v8i16: { // For integer shuffles we need to expand the mask and cast the inputs to // v8i16s prior to blending. int Scale = 8 / VT.getVectorNumElements(); BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); V1 = DAG.getBitcast(MVT::v8i16, V1); V2 = DAG.getBitcast(MVT::v8i16, V2); return DAG.getBitcast(VT, DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8))); } case MVT::v16i16: { assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!"); SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { // We can lower these with PBLENDW which is mirrored across 128-bit lanes. assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!"); BlendMask = 0; for (int i = 0; i < 8; ++i) if (RepeatedMask[i] >= 16) BlendMask |= 1u << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); } } // FALLTHROUGH case MVT::v16i8: case MVT::v32i8: { assert((VT.is128BitVector() || Subtarget->hasAVX2()) && "256-bit byte-blends require AVX2 support!"); // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG)) return Masked; // Scale the blend by the number of bytes per element. int Scale = VT.getScalarSizeInBits() / 8; // This form of blend is always done on bytes. Compute the byte vector // type. MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); // Compute the VSELECT mask. Note that VSELECT is really confusing in the // mix of LLVM's code generator and the x86 backend. We tell the code // generator that boolean values in the elements of an x86 vector register // are -1 for true and 0 for false. We then use the LLVM semantics of 'true' // mapping a select to operand #1, and 'false' mapping to operand #2. The // reality in x86 is that vector masks (pre-AVX-512) use only the high bit // of the element (the remaining are ignored) and 0 in that high bit would // mean operand #1 while 1 in the high bit would mean operand #2. So while // the LLVM model for boolean values in vector elements gets the relevant // bit set, it is set backwards and over constrained relative to x86's // actual model. SmallVector VSELECTMask; for (int i = 0, Size = Mask.size(); i < Size; ++i) for (int j = 0; j < Scale; ++j) VSELECTMask.push_back( Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, MVT::i8)); V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, DAG.getNode(ISD::BUILD_VECTOR, DL, BlendVT, VSELECTMask), V1, V2)); } default: llvm_unreachable("Not a supported integer vector type!"); } } /// \brief Try to lower as a blend of elements from two inputs followed by /// a single-input permutation. /// /// This matches the pattern where we can blend elements from two inputs and /// then reduce the shuffle to a single-input permutation. static SDValue lowerVectorShuffleAsBlendAndPermute(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { // We build up the blend mask while checking whether a blend is a viable way // to reduce the shuffle. SmallVector BlendMask(Mask.size(), -1); SmallVector PermuteMask(Mask.size(), -1); for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] < 0) continue; assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds."); if (BlendMask[Mask[i] % Size] == -1) BlendMask[Mask[i] % Size] = Mask[i]; else if (BlendMask[Mask[i] % Size] != Mask[i]) return SDValue(); // Can't blend in the needed input! PermuteMask[i] = Mask[i] % Size; } SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask); } /// \brief Generic routine to decompose a shuffle and blend into indepndent /// blends and permutes. /// /// This matches the extremely common pattern for handling combined /// shuffle+blend operations on newer X86 ISAs where we have very fast blend /// operations. It will try to pick the best arrangement of shuffles and /// blends. static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { // Shuffle the input elements into the desired positions in V1 and V2 and // blend them together. SmallVector V1Mask(Mask.size(), -1); SmallVector V2Mask(Mask.size(), -1); SmallVector BlendMask(Mask.size(), -1); for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] >= 0 && Mask[i] < Size) { V1Mask[i] = Mask[i]; BlendMask[i] = i; } else if (Mask[i] >= Size) { V2Mask[i] = Mask[i] - Size; BlendMask[i] = i + Size; } // Try to lower with the simpler initial blend strategy unless one of the // input shuffles would be a no-op. We prefer to shuffle inputs as the // shuffle may be able to fold with a load or other benefit. However, when // we'll have to do 2x as many shuffles in order to achieve this, blending // first is a better strategy. if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG)) return BlendPerm; V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); } /// \brief Try to lower a vector shuffle as a byte rotation. /// /// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary /// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use /// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will /// try to generically lower a vector shuffle through such an pattern. It /// does not check for the profitability of lowering either as PALIGNR or /// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form. /// This matches shuffle vectors that look like: /// /// v8i16 [11, 12, 13, 14, 15, 0, 1, 2] /// /// Essentially it concatenates V1 and V2, shifts right by some number of /// elements, and takes the low elements as the result. Note that while this is /// specified as a *right shift* because x86 is little-endian, it is a *left /// rotate* of the vector lanes. static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); int NumElts = Mask.size(); int NumLanes = VT.getSizeInBits() / 128; int NumLaneElts = NumElts / NumLanes; // We need to detect various ways of spelling a rotation: // [11, 12, 13, 14, 15, 0, 1, 2] // [-1, 12, 13, 14, -1, -1, 1, -1] // [-1, -1, -1, -1, -1, -1, 1, 2] // [ 3, 4, 5, 6, 7, 8, 9, 10] // [-1, 4, 5, 6, -1, -1, 9, -1] // [-1, 4, 5, 6, -1, -1, -1, -1] int Rotation = 0; SDValue Lo, Hi; for (int l = 0; l < NumElts; l += NumLaneElts) { for (int i = 0; i < NumLaneElts; ++i) { if (Mask[l + i] == -1) continue; assert(Mask[l + i] >= 0 && "Only -1 is a valid negative mask element!"); // Get the mod-Size index and lane correct it. int LaneIdx = (Mask[l + i] % NumElts) - l; // Make sure it was in this lane. if (LaneIdx < 0 || LaneIdx >= NumLaneElts) return SDValue(); // Determine where a rotated vector would have started. int StartIdx = i - LaneIdx; if (StartIdx == 0) // The identity rotation isn't interesting, stop. return SDValue(); // If we found the tail of a vector the rotation must be the missing // front. If we found the head of a vector, it must be how much of the // head. int CandidateRotation = StartIdx < 0 ? -StartIdx : NumLaneElts - StartIdx; if (Rotation == 0) Rotation = CandidateRotation; else if (Rotation != CandidateRotation) // The rotations don't match, so we can't match this mask. return SDValue(); // Compute which value this mask is pointing at. SDValue MaskV = Mask[l + i] < NumElts ? V1 : V2; // Compute which of the two target values this index should be assigned // to. This reflects whether the high elements are remaining or the low // elements are remaining. SDValue &TargetV = StartIdx < 0 ? Hi : Lo; // Either set up this value if we've not encountered it before, or check // that it remains consistent. if (!TargetV) TargetV = MaskV; else if (TargetV != MaskV) // This may be a rotation, but it pulls from the inputs in some // unsupported interleaving. return SDValue(); } } // Check that we successfully analyzed the mask, and normalize the results. assert(Rotation != 0 && "Failed to locate a viable rotation!"); assert((Lo || Hi) && "Failed to find a rotated input vector!"); if (!Lo) Lo = Hi; else if (!Hi) Hi = Lo; // The actual rotate instruction rotates bytes, so we need to scale the // rotation based on how many bytes are in the vector lane. int Scale = 16 / NumLaneElts; // SSSE3 targets can use the palignr instruction. if (Subtarget->hasSSSE3()) { // Cast the inputs to i8 vector of correct length to match PALIGNR. MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes); Lo = DAG.getBitcast(AlignVT, Lo); Hi = DAG.getBitcast(AlignVT, Hi); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi, DAG.getConstant(Rotation * Scale, DL, MVT::i8))); } assert(VT.is128BitVector() && "Rotate-based lowering only supports 128-bit lowering!"); assert(Mask.size() <= 16 && "Can shuffle at most 16 bytes in a 128-bit vector!"); // Default SSE2 implementation int LoByteShift = 16 - Rotation * Scale; int HiByteShift = Rotation * Scale; // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ. Lo = DAG.getBitcast(MVT::v2i64, Lo); Hi = DAG.getBitcast(MVT::v2i64, Hi); SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo, DAG.getConstant(LoByteShift, DL, MVT::i8)); SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi, DAG.getConstant(HiByteShift, DL, MVT::i8)); return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); } /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros). /// /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and /// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function /// matches elements from one of the input vectors shuffled to the left or /// right with zeroable elements 'shifted in'. It handles both the strictly /// bit-wise element shifts and the byte shift across an entire 128-bit double /// quad word lane. /// /// PSHL : (little-endian) left bit shift. /// [ zz, 0, zz, 2 ] /// [ -1, 4, zz, -1 ] /// PSRL : (little-endian) right bit shift. /// [ 1, zz, 3, zz] /// [ -1, -1, 7, zz] /// PSLLDQ : (little-endian) left byte shift /// [ zz, 0, 1, 2, 3, 4, 5, 6] /// [ zz, zz, -1, -1, 2, 3, 4, -1] /// [ zz, zz, zz, zz, zz, zz, -1, 1] /// PSRLDQ : (little-endian) right byte shift /// [ 5, 6, 7, zz, zz, zz, zz, zz] /// [ -1, 5, 6, 7, zz, zz, zz, zz] /// [ 1, 2, -1, -1, -1, -1, zz, zz] static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); int Size = Mask.size(); assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); auto CheckZeros = [&](int Shift, int Scale, bool Left) { for (int i = 0; i < Size; i += Scale) for (int j = 0; j < Shift; ++j) if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) return false; return true; }; auto MatchShift = [&](int Shift, int Scale, bool Left, SDValue V) { for (int i = 0; i != Size; i += Scale) { unsigned Pos = Left ? i + Shift : i; unsigned Low = Left ? i : i + Shift; unsigned Len = Scale - Shift; if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + (V == V1 ? 0 : Size))) return SDValue(); } int ShiftEltBits = VT.getScalarSizeInBits() * Scale; bool ByteShift = ShiftEltBits > 64; unsigned OpCode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI) : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI); int ShiftAmt = Shift * VT.getScalarSizeInBits() / (ByteShift ? 8 : 1); // Normalize the scale for byte shifts to still produce an i64 element // type. Scale = ByteShift ? Scale / 2 : Scale; // We need to round trip through the appropriate type for the shift. MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale); MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale); assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && "Illegal integer vector type"); V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, DL, MVT::i8)); return DAG.getBitcast(VT, V); }; // SSE/AVX supports logical shifts up to 64-bit integers - so we can just // keep doubling the size of the integer elements up to that. We can // then shift the elements of the integer vector by whole multiples of // their width within the elements of the larger integer vector. Test each // multiple to see if we can find a match with the moved element indices // and that the shifted in elements are all zeroable. for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 128; Scale *= 2) for (int Shift = 1; Shift != Scale; ++Shift) for (bool Left : {true, false}) if (CheckZeros(Shift, Scale, Left)) for (SDValue V : {V1, V2}) if (SDValue Match = MatchShift(Shift, Scale, Left, V)) return Match; // no match return SDValue(); } /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); assert(!Zeroable.all() && "Fully zeroable shuffle mask"); int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) return SDValue(); // EXTRQ: Extract Len elements from lower half of source, starting at Idx. // Remainder of lower half result is zero and upper half is all undef. auto LowerAsEXTRQ = [&]() { // Determine the extraction length from the part of the // lower half that isn't zeroable. int Len = HalfSize; for (; Len > 0; --Len) if (!Zeroable[Len - 1]) break; assert(Len > 0 && "Zeroable shuffle mask"); // Attempt to match first Len sequential elements from the lower half. SDValue Src; int Idx = -1; for (int i = 0; i != Len; ++i) { int M = Mask[i]; if (M < 0) continue; SDValue &V = (M < Size ? V1 : V2); M = M % Size; // The extracted elements must start at a valid index and all mask // elements must be in the lower half. if (i > M || M >= HalfSize) return SDValue(); if (Idx < 0 || (Src == V && Idx == (M - i))) { Src = V; Idx = M - i; continue; } return SDValue(); } if (Idx < 0) return SDValue(); assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); }; if (SDValue ExtrQ = LowerAsEXTRQ()) return ExtrQ; // INSERTQ: Extract lowest Len elements from lower half of second source and // insert over first source, starting at Idx. // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } auto LowerAsInsertQ = [&]() { for (int Idx = 0; Idx != HalfSize; ++Idx) { SDValue Base; // Attempt to match first source from mask before insertion point. if (isUndefInRange(Mask, 0, Idx)) { /* EMPTY */ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { Base = V1; } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { Base = V2; } else { continue; } // Extend the extraction length looking to match both the insertion of // the second source and the remaining elements of the first. for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { SDValue Insert; int Len = Hi - Idx; // Match insertion. if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { Insert = V1; } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { Insert = V2; } else { continue; } // Match the remaining elements of the lower half. if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { /* EMPTY */ } else if ((!Base || (Base == V1)) && isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { Base = V1; } else if ((!Base || (Base == V2)) && isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Size + Hi)) { Base = V2; } else { continue; } // We may not have a base (first source) - this can safely be undefined. if (!Base) Base = DAG.getUNDEF(VT); int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); } } return SDValue(); }; if (SDValue InsertQ = LowerAsInsertQ()) return InsertQ; return SDValue(); } /// \brief Lower a vector shuffle as a zero or any extension. /// /// Given a specific number of elements, element bit width, and extension /// stride, produce either a zero or any extension based on the available /// features of the subtarget. The extended elements are consecutive and /// begin and can start from an offseted element index in the input; to /// avoid excess shuffling the offset must either being in the bottom lane /// or at the start of a higher lane. All extended elements must be from /// the same lane. static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Scale > 1 && "Need a scale to extend."); int EltBits = VT.getScalarSizeInBits(); int NumElements = VT.getVectorNumElements(); int NumEltsPerLane = 128 / EltBits; int OffsetLane = Offset / NumEltsPerLane; assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Only 8, 16, and 32 bit elements can be extended."); assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits."); assert(0 <= Offset && "Extension offset must be positive."); assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) && "Extension offset must be in the first lane or start an upper lane."); // Check that an index is in same lane as the base offset. auto SafeOffset = [&](int Idx) { return OffsetLane == (Idx / NumEltsPerLane); }; // Shift along an input so that the offset base moves to the first element. auto ShuffleOffset = [&](SDValue V) { if (!Offset) return V; SmallVector ShMask((unsigned)NumElements, -1); for (int i = 0; i * Scale < NumElements; ++i) { int SrcIdx = i + Offset; ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; } return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); }; // Found a valid zext mask! Try various lowering strategies based on the // input type and available ISA extensions. if (Subtarget->hasSSE41()) { // Not worth offseting 128-bit vectors if scale == 2, a pattern using // PUNPCK will catch this in a later shuffle match. if (Offset && Scale == 2 && VT.is128BitVector()) return SDValue(); MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, ShuffleOffset(InputV)); return DAG.getBitcast(VT, InputV); } assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); // For any extends we can cheat for larger element sizes and use shuffle // instructions that can fold with a load and/or copy. if (AnyExt && EltBits == 32) { int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, -1}; return DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } if (AnyExt && EltBits == 16 && Scale > 2) { int PSHUFDMask[4] = {Offset / 2, -1, SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); int PSHUFWMask[4] = {1, -1, -1, -1}; unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW); return DAG.getBitcast( VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, InputV), getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG))); } // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes // to 64-bits. if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) { assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); assert(VT.is128BitVector() && "Unexpected vector width!"); int LoIdx = Offset * EltBits; SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, DAG.getConstant(EltBits, DL, MVT::i8), DAG.getConstant(LoIdx, DL, MVT::i8))); if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) || !SafeOffset(Offset + 1)) return DAG.getNode(ISD::BITCAST, DL, VT, Lo); int HiIdx = (Offset + 1) * EltBits; SDValue Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, DAG.getConstant(EltBits, DL, MVT::i8), DAG.getConstant(HiIdx, DL, MVT::i8))); return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); } // If this would require more than 2 unpack instructions to expand, use // pshufb when available. We can only use more than 2 unpack instructions // when zero extending i8 elements which also makes it easier to use pshufb. if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) { assert(NumElements == 16 && "Unexpected byte vector width!"); SDValue PSHUFBMask[16]; for (int i = 0; i < 16; ++i) { int Idx = Offset + (i / Scale); PSHUFBMask[i] = DAG.getConstant( (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8); } InputV = DAG.getBitcast(MVT::v16i8, InputV); return DAG.getBitcast(VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask))); } // If we are extending from an offset, ensure we start on a boundary that // we can unpack from. int AlignToUnpack = Offset % (NumElements / Scale); if (AlignToUnpack) { SmallVector ShMask((unsigned)NumElements, -1); for (int i = AlignToUnpack; i < NumElements; ++i) ShMask[i - AlignToUnpack] = i; InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); Offset -= AlignToUnpack; } // Otherwise emit a sequence of unpacks. do { unsigned UnpackLoHi = X86ISD::UNPCKL; if (Offset >= (NumElements / 2)) { UnpackLoHi = X86ISD::UNPCKH; Offset -= (NumElements / 2); } MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) : getZeroVector(InputVT, Subtarget, DAG, DL); InputV = DAG.getBitcast(InputVT, InputV); InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext); Scale /= 2; EltBits *= 2; NumElements /= 2; } while (Scale > 1); return DAG.getBitcast(VT, InputV); } /// \brief Try to lower a vector shuffle as a zero extension on any microarch. /// /// This routine will try to do everything in its power to cleverly lower /// a shuffle which happens to match the pattern of a zero extend. It doesn't /// check for the profitability of this lowering, it tries to aggressively /// match this pattern. It will use all of the micro-architectural details it /// can to emit an efficient lowering. It handles both blends with all-zero /// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to /// masking out later). /// /// The reason we have dedicated lowering for zext-style shuffles is that they /// are both incredibly common and often quite performance sensitive. static SDValue lowerVectorShuffleAsZeroOrAnyExtend( SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); int Bits = VT.getSizeInBits(); int NumLanes = Bits / 128; int NumElements = VT.getVectorNumElements(); int NumEltsPerLane = NumElements / NumLanes; assert(VT.getScalarSizeInBits() <= 32 && "Exceeds 32-bit integer zero extension limit"); assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size"); // Define a helper function to check a particular ext-scale and lower to it if // valid. auto Lower = [&](int Scale) -> SDValue { SDValue InputV; bool AnyExt = true; int Offset = 0; int Matches = 0; for (int i = 0; i < NumElements; ++i) { int M = Mask[i]; if (M == -1) continue; // Valid anywhere but doesn't tell us anything. if (i % Scale != 0) { // Each of the extended elements need to be zeroable. if (!Zeroable[i]) return SDValue(); // We no longer are in the anyext case. AnyExt = false; continue; } // Each of the base elements needs to be consecutive indices into the // same input vector. SDValue V = M < NumElements ? V1 : V2; M = M % NumElements; if (!InputV) { InputV = V; Offset = M - (i / Scale); } else if (InputV != V) return SDValue(); // Flip-flopping inputs. // Offset must start in the lowest 128-bit lane or at the start of an // upper lane. // FIXME: Is it ever worth allowing a negative base offset? if (!((0 <= Offset && Offset < NumEltsPerLane) || (Offset % NumEltsPerLane) == 0)) return SDValue(); // If we are offsetting, all referenced entries must come from the same // lane. if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane)) return SDValue(); if ((M % NumElements) != (Offset + (i / Scale))) return SDValue(); // Non-consecutive strided elements. Matches++; } // If we fail to find an input, we have a zero-shuffle which should always // have already been handled. // FIXME: Maybe handle this here in case during blending we end up with one? if (!InputV) return SDValue(); // If we are offsetting, don't extend if we only match a single input, we // can always do better by using a basic PSHUF or PUNPCK. if (Offset != 0 && Matches < 2) return SDValue(); return lowerVectorShuffleAsSpecificZeroOrAnyExtend( DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG); }; // The widest scale possible for extending is to a 64-bit integer. assert(Bits % 64 == 0 && "The number of bits in a vector must be divisible by 64 on x86!"); int NumExtElements = Bits / 64; // Each iteration, try extending the elements half as much, but into twice as // many elements. for (; NumExtElements < NumElements; NumExtElements *= 2) { assert(NumElements % NumExtElements == 0 && "The input vector size must be divisible by the extended size."); if (SDValue V = Lower(NumElements / NumExtElements)) return V; } // General extends failed, but 128-bit vectors may be able to use MOVQ. if (Bits != 128) return SDValue(); // Returns one of the source operands if the shuffle can be reduced to a // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits. auto CanZExtLowHalf = [&]() { for (int i = NumElements / 2; i != NumElements; ++i) if (!Zeroable[i]) return SDValue(); if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0)) return V1; if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements)) return V2; return SDValue(); }; if (SDValue V = CanZExtLowHalf()) { V = DAG.getBitcast(MVT::v2i64, V); V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); return DAG.getBitcast(VT, V); } // No viable ext lowering found. return SDValue(); } /// \brief Try to get a scalar value for a specific element of a vector. /// /// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar. static SDValue getScalarValueForVectorElement(SDValue V, int Idx, SelectionDAG &DAG) { MVT VT = V.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); // If the bitcasts shift the element size, we can't extract an equivalent // element from it. MVT NewVT = V.getSimpleValueType(); if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); if (V.getOpcode() == ISD::BUILD_VECTOR || (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) { // Ensure the scalar operand is the same size as the destination. // FIXME: Add support for scalar truncation where possible. SDValue S = V.getOperand(Idx); if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits()) return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, S); } return SDValue(); } /// \brief Helper to test for a load that can be folded with x86 shuffles. /// /// This is particularly important because the set of instructions varies /// significantly based on whether the operand is a load or not. static bool isShuffleFoldableLoad(SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); return ISD::isNON_EXTLoad(V.getNode()); } /// \brief Try to lower insertion of a single element into a zero vector. /// /// This is a common pattern that we have especially efficient patterns to lower /// across all subtarget feature sets. static SDValue lowerVectorShuffleAsElementInsertion( SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); int V2Index = std::find_if(Mask.begin(), Mask.end(), [&Mask](int M) { return M >= (int)Mask.size(); }) - Mask.begin(); bool IsV1Zeroable = true; for (int i = 0, Size = Mask.size(); i < Size; ++i) if (i != V2Index && !Zeroable[i]) { IsV1Zeroable = false; break; } // Check for a single input from a SCALAR_TO_VECTOR node. // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and // all the smarts here sunk into that routine. However, the current // lowering of BUILD_VECTOR makes that nearly impossible until the old // vector shuffle lowering is dead. SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG); if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { // We need to zext the scalar if it is smaller than an i32. V2S = DAG.getBitcast(EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { // Using zext to expand a narrow element won't work for non-zero // insertions. if (!IsV1Zeroable) return SDValue(); // Zero-extend directly to i32. ExtVT = MVT::v4i32; V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); } V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || EltVT == MVT::i16) { // Either not inserting from the low element of the input or the input // element size is too small to use VZEXT_MOVL to clear the high bits. return SDValue(); } if (!IsV1Zeroable) { // If V1 can't be treated as a zero vector we have fewer options to lower // this. We can't support integer vectors or non-zero targets cheaply, and // the V1 elements can't be permuted in any way. assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); if (!VT.isFloatingPoint() || V2Index != 0) return SDValue(); SmallVector V1Mask(Mask.begin(), Mask.end()); V1Mask[V2Index] = -1; if (!isNoopShuffleMask(V1Mask)) return SDValue(); // This is essentially a special case blend operation, but if we have // general purpose blend operations, they are always faster. Bail and let // the rest of the lowering handle these as blends. if (Subtarget->hasSSE41()) return SDValue(); // Otherwise, use MOVSD or MOVSS. assert((EltVT == MVT::f32 || EltVT == MVT::f64) && "Only two types of floating point element types to handle!"); return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL, ExtVT, V1, V2); } // This lowering only works for the low element with floating point vectors. if (VT.isFloatingPoint() && V2Index != 0) return SDValue(); V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); if (ExtVT != VT) V2 = DAG.getBitcast(VT, V2); if (V2Index != 0) { // If we have 4 or fewer lanes we can cheaply shuffle the element into // the desired position. Otherwise it is more efficient to do a vector // shift left. We know that we can do a vector shift left because all // the inputs are zero. if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) { SmallVector V2Shuffle(Mask.size(), 1); V2Shuffle[V2Index] = 0; V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { V2 = DAG.getBitcast(MVT::v2i64, V2); V2 = DAG.getNode( X86ISD::VSHLDQ, DL, MVT::v2i64, V2, DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, DAG.getTargetLoweringInfo().getScalarShiftAmountTy( DAG.getDataLayout(), VT))); V2 = DAG.getBitcast(VT, V2); } } return V2; } /// \brief Try to lower broadcast of a single - truncated - integer element, /// coming from a scalar_to_vector/build_vector node \p V0 with larger elements. /// /// This assumes we have AVX2. static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0, int BroadcastIdx, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX2() && "We can only lower integer broadcasts with AVX2!"); EVT EltVT = VT.getVectorElementType(); EVT V0VT = V0.getValueType(); assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); EVT V0EltVT = V0VT.getVectorElementType(); if (!V0EltVT.isInteger()) return SDValue(); const unsigned EltSize = EltVT.getSizeInBits(); const unsigned V0EltSize = V0EltVT.getSizeInBits(); // This is only a truncation if the original element type is larger. if (V0EltSize <= EltSize) return SDValue(); assert(((V0EltSize % EltSize) == 0) && "Scalar type sizes must all be powers of 2 on x86!"); const unsigned V0Opc = V0.getOpcode(); const unsigned Scale = V0EltSize / EltSize; const unsigned V0BroadcastIdx = BroadcastIdx / Scale; if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && V0Opc != ISD::BUILD_VECTOR) return SDValue(); SDValue Scalar = V0.getOperand(V0BroadcastIdx); // If we're extracting non-least-significant bits, shift so we can truncate. // Hopefully, we can fold away the trunc/srl/load into the broadcast. // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd. if (const int OffsetIdx = BroadcastIdx % Scale) Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType())); return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); } /// \brief Try to lower broadcast of a single element. /// /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. /// FIXME: This is very similar to LowerVectorBroadcast - can we merge them? static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { if (!Subtarget->hasAVX()) return SDValue(); if (VT.isInteger() && !Subtarget->hasAVX2()) return SDValue(); // Check that the mask is a broadcast. int BroadcastIdx = -1; for (int M : Mask) if (M >= 0 && BroadcastIdx == -1) BroadcastIdx = M; else if (M >= 0 && M != BroadcastIdx) return SDValue(); assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with " "a sorted mask where the broadcast " "comes from V1."); // Go up the chain of (vector) values to find a scalar load that we can // combine with the broadcast. for (;;) { switch (V.getOpcode()) { case ISD::CONCAT_VECTORS: { int OperandSize = Mask.size() / V.getNumOperands(); V = V.getOperand(BroadcastIdx / OperandSize); BroadcastIdx %= OperandSize; continue; } case ISD::INSERT_SUBVECTOR: { SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1); auto ConstantIdx = dyn_cast(V.getOperand(2)); if (!ConstantIdx) break; int BeginIdx = (int)ConstantIdx->getZExtValue(); int EndIdx = BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements(); if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) { BroadcastIdx -= BeginIdx; V = VInner; } else { V = VOuter; } continue; } } break; } // Check if this is a broadcast of a scalar. We special case lowering // for scalars so that we can more effectively fold with loads. // First, look through bitcast: if the original value has a larger element // type than the shuffle, the broadcast element is in essence truncated. // Make that explicit to ease folding. if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast( DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG)) return TruncBroadcast; MVT BroadcastVT = VT; // Peek through any bitcast (only useful for loads). SDValue BC = V; while (BC.getOpcode() == ISD::BITCAST) BC = BC.getOperand(0); // Also check the simpler case, where we can directly reuse the scalar. if (V.getOpcode() == ISD::BUILD_VECTOR || (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { V = V.getOperand(BroadcastIdx); // If the scalar isn't a load, we can't broadcast from it in AVX1. // Only AVX2 has register broadcasts. if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); } else if (MayFoldLoad(BC) && !cast(BC)->isVolatile()) { // 32-bit targets need to load i64 as a f64 and then bitcast the result. if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64) BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); // If we are broadcasting a load that is only used by the shuffle // then we can reduce the vector load to the broadcasted scalar load. LoadSDNode *Ld = cast(BC); SDValue BaseAddr = Ld->getOperand(1); EVT AddrVT = BaseAddr.getValueType(); EVT SVT = BroadcastVT.getScalarType(); unsigned Offset = BroadcastIdx * SVT.getStoreSize(); SDValue NewAddr = DAG.getNode( ISD::ADD, DL, AddrVT, BaseAddr, DAG.getConstant(Offset, DL, AddrVT)); V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { // We can't broadcast from a vector register without AVX2, and we can only // broadcast from the zero-element of a vector register. return SDValue(); } V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V); return DAG.getBitcast(VT, V); } // Check for whether we can use INSERTPS to perform the shuffle. We only use // INSERTPS when the V1 elements are already in the correct locations // because otherwise we can just always use two SHUFPS instructions which // are much smaller to encode than a SHUFPS and an INSERTPS. We can also // perform INSERTPS if a single V1 element is out of place and all V2 // elements are zeroable. static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); unsigned ZMask = 0; int V1DstIndex = -1; int V2DstIndex = -1; bool V1UsedInPlace = false; for (int i = 0; i < 4; ++i) { // Synthesize a zero mask from the zeroable elements (includes undefs). if (Zeroable[i]) { ZMask |= 1 << i; continue; } // Flag if we use any V1 inputs in place. if (i == Mask[i]) { V1UsedInPlace = true; continue; } // We can only insert a single non-zeroable element. if (V1DstIndex != -1 || V2DstIndex != -1) return SDValue(); if (Mask[i] < 4) { // V1 input out of place for insertion. V1DstIndex = i; } else { // V2 input for insertion. V2DstIndex = i; } } // Don't bother if we have no (non-zeroable) element for insertion. if (V1DstIndex == -1 && V2DstIndex == -1) return SDValue(); // Determine element insertion src/dst indices. The src index is from the // start of the inserted vector, not the start of the concatenated vector. unsigned V2SrcIndex = 0; if (V1DstIndex != -1) { // If we have a V1 input out of place, we use V1 as the V2 element insertion // and don't use the original V2 at all. V2SrcIndex = Mask[V1DstIndex]; V2DstIndex = V1DstIndex; V2 = V1; } else { V2SrcIndex = Mask[V2DstIndex] - 4; } // If no V1 inputs are used in place, then the result is created only from // the zero mask and the V2 insertion - so remove V1 dependency. if (!V1UsedInPlace) V1 = DAG.getUNDEF(MVT::v4f32); unsigned InsertPSMask = V2SrcIndex << 6 | V2DstIndex << 4 | ZMask; assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); // Insert the V2 element into the desired position. SDLoc DL(Op); return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, DAG.getConstant(InsertPSMask, DL, MVT::i8)); } /// \brief Try to lower a shuffle as a permute of the inputs followed by an /// UNPCK instruction. /// /// This specifically targets cases where we end up with alternating between /// the two inputs, and so can permute them into something that feeds a single /// UNPCK instruction. Note that this routine only targets integer vectors /// because for floating point vectors we have a generalized SHUFPS lowering /// strategy that handles everything that doesn't *exactly* match an unpack, /// making this clever lowering unnecessary. static SDValue lowerVectorShuffleAsPermuteAndUnpack(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "This routine only supports integer vectors."); assert(!isSingleInputShuffleMask(Mask) && "This routine should only be used when blending two inputs."); assert(Mask.size() >= 2 && "Single element masks are invalid."); int Size = Mask.size(); int NumLoInputs = std::count_if(Mask.begin(), Mask.end(), [Size](int M) { return M >= 0 && M % Size < Size / 2; }); int NumHiInputs = std::count_if( Mask.begin(), Mask.end(), [Size](int M) { return M % Size >= Size / 2; }); bool UnpackLo = NumLoInputs >= NumHiInputs; auto TryUnpack = [&](MVT UnpackVT, int Scale) { SmallVector V1Mask(Mask.size(), -1); SmallVector V2Mask(Mask.size(), -1); for (int i = 0; i < Size; ++i) { if (Mask[i] < 0) continue; // Each element of the unpack contains Scale elements from this mask. int UnpackIdx = i / Scale; // We only handle the case where V1 feeds the first slots of the unpack. // We rely on canonicalization to ensure this is the case. if ((UnpackIdx % 2 == 0) != (Mask[i] < Size)) return SDValue(); // Setup the mask for this input. The indexing is tricky as we have to // handle the unpack stride. SmallVectorImpl &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask; VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] = Mask[i] % Size; } // If we will have to shuffle both inputs to use the unpack, check whether // we can just unpack first and shuffle the result. If so, skip this unpack. if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) return SDValue(); // Shuffle the inputs into place. V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); // Cast the inputs to the type we will use to unpack them. V1 = DAG.getBitcast(UnpackVT, V1); V2 = DAG.getBitcast(UnpackVT, V2); // Unpack the inputs and cast the result back to the desired type. return DAG.getBitcast( VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, UnpackVT, V1, V2)); }; // We try each unpack from the largest to the smallest to try and find one // that fits this mask. int OrigNumElements = VT.getVectorNumElements(); int OrigScalarSize = VT.getScalarSizeInBits(); for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) { int Scale = ScalarSize / OrigScalarSize; int NumElements = OrigNumElements / Scale; MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), NumElements); if (SDValue Unpack = TryUnpack(UnpackVT, Scale)) return Unpack; } // If none of the unpack-rooted lowerings worked (or were profitable) try an // initial unpack. if (NumLoInputs == 0 || NumHiInputs == 0) { assert((NumLoInputs > 0 || NumHiInputs > 0) && "We have to have *some* inputs!"); int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0; // FIXME: We could consider the total complexity of the permute of each // possible unpacking. Or at the least we should consider how many // half-crossings are created. // FIXME: We could consider commuting the unpacks. SmallVector PermMask; PermMask.assign(Size, -1); for (int i = 0; i < Size; ++i) { if (Mask[i] < 0) continue; assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!"); PermMask[i] = 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); } return DAG.getVectorShuffle( VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT, V1, V2), DAG.getUNDEF(VT), PermMask); } return SDValue(); } /// \brief Handle lowering of 2-lane 64-bit floating point shuffles. /// /// This is the basis function for the 2-lane 64-bit shuffles as we have full /// support for floating point shuffles but not integer shuffles. These /// instructions will incur a domain crossing penalty on some chips though so /// it is better to avoid lowering through this for integer vectors where /// possible. static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); if (isSingleInputShuffleMask(Mask)) { // Use low duplicate instructions for masks that match their pattern. if (Subtarget->hasSSE3()) if (isShuffleEquivalent(V1, V2, Mask, {0, 0})) return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1); // Straight shuffle of a single input vector. Simulate this by using the // single input as both of the "inputs" to this instruction.. unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); if (Subtarget->hasAVX()) { // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V1, DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!"); assert(Mask[1] >= 2 && "Non-canonicalized blend!"); // If we have a single input, insert that into V1 if we can do so cheaply. if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) { if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG)) return Insertion; // Try inverting the insertion since for v2 masks it is easy to do and we // can't reliably sort the mask one way or the other. int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2f64, V2, V1, InverseMask, Subtarget, DAG)) return Insertion; } // Try to use one of the special instruction patterns to handle two common // blend patterns if a zero-blend above didn't work. if (isShuffleEquivalent(V1, V2, Mask, {0, 3}) || isShuffleEquivalent(V1, V2, Mask, {1, 3})) if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG)) // We can either use a special instruction to load over the low double or // to move just the low double. return DAG.getNode( isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD, DL, MVT::v2f64, V2, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); if (Subtarget->hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) return V; unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } /// \brief Handle lowering of 2-lane 64-bit integer shuffles. /// /// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by /// the integer unit to minimize domain crossing penalties. However, for blends /// it falls back to the floating point shuffle operation with appropriate bit /// casting. static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); if (isSingleInputShuffleMask(Mask)) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v2i64, V1, Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. // We have to map the mask as it is actually a v4i32 shuffle instruction. V1 = DAG.getBitcast(MVT::v4i32, V1); int WidenedMask[4] = { std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1, std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1}; return DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); } assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); // If we have a blend of two PACKUS operations an the blend aligns with the // low and half halves, we can just merge the PACKUS operations. This is // particularly important as it lets us merge shuffles that this routine itself // creates. auto GetPackNode = [](SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); return V.getOpcode() == X86ISD::PACKUS ? V : SDValue(); }; if (SDValue V1Pack = GetPackNode(V1)) if (SDValue V2Pack = GetPackNode(V2)) return DAG.getBitcast(MVT::v2i64, DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Mask[0] == 0 ? V1Pack.getOperand(0) : V1Pack.getOperand(1), Mask[1] == 2 ? V2Pack.getOperand(0) : V2Pack.getOperand(1))); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, DAG)) return Shift; // When loading a scalar and then shuffling it into a vector we can often do // the insertion cheaply. if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Insertion; // Try inverting the insertion since for v2 masks it is easy to do and we // can't reliably sort the mask one way or the other. int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2}; if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, MVT::v2i64, V2, V1, InverseMask, Subtarget, DAG)) return Insertion; // We have different paths for blend lowering, but they all must use the // *exact* same predicate. bool IsBlendSupported = Subtarget->hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget->hasSSSE3()) if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask, DAG); // We implement this with SHUFPD which is pretty lame because it will likely // incur 2 cycles of stall for integer vectors on Nehalem and older chips. // However, all the alternatives are still more cycles and newer chips don't // have this problem. It would be really nice if x86 had better shuffles here. V1 = DAG.getBitcast(MVT::v2f64, V1); V2 = DAG.getBitcast(MVT::v2f64, V2); return DAG.getBitcast(MVT::v2i64, DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); } /// \brief Test whether this can be lowered with a single SHUFPS instruction. /// /// This is used to disable more specialized lowerings when the shufps lowering /// will happen to be efficient. static bool isSingleSHUFPSMask(ArrayRef Mask) { // This routine only handles 128-bit shufps. assert(Mask.size() == 4 && "Unsupported mask size!"); // To lower with a single SHUFPS we need to have the low half and high half // each requiring a single input. if (Mask[0] != -1 && Mask[1] != -1 && (Mask[0] < 4) != (Mask[1] < 4)) return false; if (Mask[2] != -1 && Mask[3] != -1 && (Mask[2] < 4) != (Mask[3] < 4)) return false; return true; } /// \brief Lower a vector shuffle using the SHUFPS instruction. /// /// This is a helper routine dedicated to lowering vector shuffles using SHUFPS. /// It makes no assumptions about whether this is the *best* lowering, it simply /// uses it. static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { SDValue LowV = V1, HighV = V2; int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]}; int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); if (NumV2Elements == 1) { int V2Index = std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) - Mask.begin(); // Compute the index adjacent to V2Index and in the same half by toggling // the low bit. int V2AdjIndex = V2Index ^ 1; if (Mask[V2AdjIndex] == -1) { // Handles all the cases where we have a single V2 element and an undef. // This will only ever happen in the high lanes because we commute the // vector otherwise. if (V2Index < 2) std::swap(LowV, HighV); NewMask[V2Index] -= 4; } else { // Handle the case where the V2 element ends up adjacent to a V1 element. // To make this work, blend them together as the first step. int V1Index = V2AdjIndex; int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); // Now proceed to reconstruct the final blend as we have the necessary // high or low half formed. if (V2Index < 2) { LowV = V2; HighV = V1; } else { HighV = V2; } NewMask[V1Index] = 2; // We put the V1 element in V2[2]. NewMask[V2Index] = 0; // We shifted the V2 element into V2[0]. } } else if (NumV2Elements == 2) { if (Mask[0] < 4 && Mask[1] < 4) { // Handle the easy case where we have V1 in the low lanes and V2 in the // high lanes. NewMask[2] -= 4; NewMask[3] -= 4; } else if (Mask[2] < 4 && Mask[3] < 4) { // We also handle the reversed case because this utility may get called // when we detect a SHUFPS pattern but can't easily commute the shuffle to // arrange things in the right direction. NewMask[0] -= 4; NewMask[1] -= 4; HighV = V1; LowV = V2; } else { // We have a mixture of V1 and V2 in both low and high lanes. Rather than // trying to place elements directly, just blend them and set up the final // shuffle to place them. // The first two blend mask elements are for V1, the second two are for // V2. int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1], Mask[2] < 4 ? Mask[2] : Mask[3], (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); // Now we do a normal shuffle of V1 by giving V1 as both operands to // a blend. LowV = HighV = V1; NewMask[0] = Mask[0] < 4 ? 0 : 2; NewMask[1] = Mask[0] < 4 ? 2 : 0; NewMask[2] = Mask[2] < 4 ? 1 : 3; NewMask[3] = Mask[2] < 4 ? 3 : 1; } } return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); } /// \brief Lower 4-lane 32-bit floating point shuffles. /// /// Uses instructions exclusively from the floating point unit to minimize /// domain crossing penalties, as these are sufficient to implement all v4f32 /// shuffles. static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); if (NumV2Elements == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4f32, V1, Mask, Subtarget, DAG)) return Broadcast; // Use even/odd duplicate instructions for masks that match their pattern. if (Subtarget->hasSSE3()) { if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1); if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1); } if (Subtarget->hasAVX()) { // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Otherwise, use a straight shuffle of a single input vector. We pass the // input vector to both operands to simulate this with a SHUFPS. return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // There are special ways we can lower some single-element blends. However, we // have custom ways we can lower more complex single-element blends below that // we defer to if both this and BLENDPS fail to match, so restrict this to // when the V2 input is targeting element 0 of the mask -- that is the fast // case here. if (NumV2Elements == 1 && Mask[0] >= 4) if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG)) return V; if (Subtarget->hasSSE41()) { if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG)) return Blend; // Use INSERTPS if we can complete the shuffle efficiently. if (SDValue V = lowerVectorShuffleAsInsertPS(Op, V1, V2, Mask, DAG)) return V; if (!isSingleSHUFPSMask(Mask)) if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute( DL, MVT::v4f32, V1, V2, Mask, DAG)) return BlendPerm; } // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) return V; // Otherwise fall back to a SHUFPS lowering strategy. return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); } /// \brief Lower 4-lane i32 vector shuffles. /// /// We try to handle these with integer-domain shuffles where we can, but for /// blends we use the floating point domain blend instructions. static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return ZExt; int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); if (NumV2Elements == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i32, V1, Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. // We coerce the shuffle pattern to be compatible with UNPCK instructions // but we aren't actually going to use the UNPCK instruction because doing // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 1, 1})) Mask = UnpackLoMask; else if (isShuffleEquivalent(V1, V2, Mask, {2, 2, 3, 3})) Mask = UnpackHiMask; return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, DAG)) return Shift; // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return V; // We have different paths for blend lowering, but they all must use the // *exact* same predicate. bool IsBlendSupported = Subtarget->hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Blend; if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget->hasSSSE3()) if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask, DAG); // Try to lower by permuting the inputs into an unpack instruction. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2, Mask, DAG)) return Unpack; // We implement this with SHUFPS because it can blend from two vectors. // Because we're going to eventually use SHUFPS, we use SHUFPS even to build // up the inputs, bypassing domain shift penalties that we would encur if we // directly used PSHUFD on Nehalem and older. For newer chips, this isn't // relevant. return DAG.getBitcast( MVT::v4i32, DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1), DAG.getBitcast(MVT::v4f32, V2), Mask)); } /// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 /// shuffle lowering, and the most complex part. /// /// The lowering strategy is to try to form pairs of input lanes which are /// targeted at the same half of the final vector, and then use a dword shuffle /// to place them onto the right half, and finally unpack the paired lanes into /// their final position. /// /// The exact breakdown of how to form these dword pairs and align them on the /// correct sides is really tricky. See the comments within the function for /// more of the details. /// /// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each /// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16 /// vector, form the analogous 128-bit 8-element Mask. static SDValue lowerV8I16GeneralSingleInputVectorShuffle( SDLoc DL, MVT VT, SDValue V, MutableArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); assert(Mask.size() == 8 && "Shuffle mask length doen't match!"); MutableArrayRef LoMask = Mask.slice(0, 4); MutableArrayRef HiMask = Mask.slice(4, 4); SmallVector LoInputs; std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs), [](int M) { return M >= 0; }); std::sort(LoInputs.begin(), LoInputs.end()); LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); SmallVector HiInputs; std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs), [](int M) { return M >= 0; }); std::sort(HiInputs.begin(), HiInputs.end()); HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); int NumLToL = std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin(); int NumHToL = LoInputs.size() - NumLToL; int NumLToH = std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin(); int NumHToH = HiInputs.size() - NumLToH; MutableArrayRef LToLInputs(LoInputs.data(), NumLToL); MutableArrayRef LToHInputs(HiInputs.data(), NumLToH); MutableArrayRef HToLInputs(LoInputs.data() + NumLToL, NumHToL); MutableArrayRef HToHInputs(HiInputs.data() + NumLToH, NumHToH); // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all // such inputs we can swap two of the dwords across the half mark and end up // with <=2 inputs to each half in each half. Once there, we can fall through // to the generic code below. For example: // // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5] // // However in some very rare cases we have a 1-into-3 or 3-into-1 on one half // and an existing 2-into-2 on the other half. In this case we may have to // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or // 1-into-3 which could cause us to cycle endlessly fixing each side in turn. // Fortunately, we don't have to handle anything but a 2-into-2 pattern // because any other situation (including a 3-into-1 or 1-into-3 in the other // half than the one we target for fixing) will be fixed when we re-enter this // path. We will also combine away any sequence of PSHUFD instructions that // result into a single instruction. Here is an example of the tricky case: // // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3] // // This now has a 1-into-3 in the high half! Instead, we do two shuffles: // // Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h] // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6] // // Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h] // Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6] // // The result is fine to be handled by the generic logic. auto balanceSides = [&](ArrayRef AToAInputs, ArrayRef BToAInputs, ArrayRef BToBInputs, ArrayRef AToBInputs, int AOffset, int BOffset) { assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) && "Must call this with A having 3 or 1 inputs from the A half."); assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) && "Must call this with B having 1 or 3 inputs from the B half."); assert(AToAInputs.size() + BToAInputs.size() == 4 && "Must call this with either 3:1 or 1:3 inputs (summing to 4)."); bool ThreeAInputs = AToAInputs.size() == 3; // Compute the index of dword with only one word among the three inputs in // a half by taking the sum of the half with three inputs and subtracting // the sum of the actual three inputs. The difference is the remaining // slot. int ADWord, BDWord; int &TripleDWord = ThreeAInputs ? ADWord : BDWord; int &OneInputDWord = ThreeAInputs ? BDWord : ADWord; int TripleInputOffset = ThreeAInputs ? AOffset : BOffset; ArrayRef TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs; int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); int TripleNonInputIdx = TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); TripleDWord = TripleNonInputIdx / 2; // We use xor with one to compute the adjacent DWord to whichever one the // OneInput is in. OneInputDWord = (OneInput / 2) ^ 1; // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA // and BToA inputs. If there is also such a problem with the BToB and AToB // inputs, we don't try to fix it necessarily -- we'll recurse and see it in // the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it // is essential that we don't *create* a 3<-1 as then we might oscillate. if (BToBInputs.size() == 2 && AToBInputs.size() == 2) { // Compute how many inputs will be flipped by swapping these DWords. We // need // to balance this to ensure we don't form a 3-1 shuffle in the other // half. int NumFlippedAToBInputs = std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) + std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1); int NumFlippedBToBInputs = std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) + std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1); if ((NumFlippedAToBInputs == 1 && (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) || (NumFlippedBToBInputs == 1 && (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) { // We choose whether to fix the A half or B half based on whether that // half has zero flipped inputs. At zero, we may not be able to fix it // with that half. We also bias towards fixing the B half because that // will more commonly be the high half, and we have to bias one way. auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord, ArrayRef Inputs) { int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot. bool IsFixIdxInput = std::find(Inputs.begin(), Inputs.end(), PinnedIdx ^ 1) != Inputs.end(); // Determine whether the free index is in the flipped dword or the // unflipped dword based on where the pinned index is. We use this bit // in an xor to conditionally select the adjacent dword. int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord)); bool IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(), FixFreeIdx) != Inputs.end(); if (IsFixIdxInput == IsFixFreeIdxInput) FixFreeIdx += 1; IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(), FixFreeIdx) != Inputs.end(); assert(IsFixIdxInput != IsFixFreeIdxInput && "We need to be changing the number of flipped inputs!"); int PSHUFHalfMask[] = {0, 1, 2, 3}; std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, MVT::v8i16, V, getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); for (int &M : Mask) if (M != -1 && M == FixIdx) M = FixFreeIdx; else if (M != -1 && M == FixFreeIdx) M = FixIdx; }; if (NumFlippedBToBInputs != 0) { int BPinnedIdx = BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput; FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs); } else { assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!"); int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput; FixFlippedInputs(APinnedIdx, ADWord, AToBInputs); } } } int PSHUFDMask[] = {0, 1, 2, 3}; PSHUFDMask[ADWord] = BDWord; PSHUFDMask[BDWord] = ADWord; V = DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // Adjust the mask to match the new locations of A and B. for (int &M : Mask) if (M != -1 && M/2 == ADWord) M = 2 * BDWord + M % 2; else if (M != -1 && M/2 == BDWord) M = 2 * ADWord + M % 2; // Recurse back into this routine to re-compute state now that this isn't // a 3 and 1 problem. return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget, DAG); }; if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0); // At this point there are at most two inputs to the low and high halves from // each half. That means the inputs can always be grouped into dwords and // those dwords can then be moved to the correct half with a dword shuffle. // We use at most one low and one high word shuffle to collect these paired // inputs into dwords, and finally a dword shuffle to place them. int PSHUFLMask[4] = {-1, -1, -1, -1}; int PSHUFHMask[4] = {-1, -1, -1, -1}; int PSHUFDMask[4] = {-1, -1, -1, -1}; // First fix the masks for all the inputs that are staying in their // original halves. This will then dictate the targets of the cross-half // shuffles. auto fixInPlaceInputs = [&PSHUFDMask](ArrayRef InPlaceInputs, ArrayRef IncomingInputs, MutableArrayRef SourceHalfMask, MutableArrayRef HalfMask, int HalfOffset) { if (InPlaceInputs.empty()) return; if (InPlaceInputs.size() == 1) { SourceHalfMask[InPlaceInputs[0] - HalfOffset] = InPlaceInputs[0] - HalfOffset; PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2; return; } if (IncomingInputs.empty()) { // Just fix all of the in place inputs. for (int Input : InPlaceInputs) { SourceHalfMask[Input - HalfOffset] = Input - HalfOffset; PSHUFDMask[Input / 2] = Input / 2; } return; } assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!"); SourceHalfMask[InPlaceInputs[0] - HalfOffset] = InPlaceInputs[0] - HalfOffset; // Put the second input next to the first so that they are packed into // a dword. We find the adjacent index by toggling the low bit. int AdjIndex = InPlaceInputs[0] ^ 1; SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex); PSHUFDMask[AdjIndex / 2] = AdjIndex / 2; }; fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0); fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4); // Now gather the cross-half inputs and place them into a free dword of // their target half. // FIXME: This operation could almost certainly be simplified dramatically to // look more like the 3-1 fixing operation. auto moveInputsToRightHalf = [&PSHUFDMask]( MutableArrayRef IncomingInputs, ArrayRef ExistingInputs, MutableArrayRef SourceHalfMask, MutableArrayRef HalfMask, MutableArrayRef FinalSourceHalfMask, int SourceOffset, int DestOffset) { auto isWordClobbered = [](ArrayRef SourceHalfMask, int Word) { return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word; }; auto isDWordClobbered = [&isWordClobbered](ArrayRef SourceHalfMask, int Word) { int LowWord = Word & ~1; int HighWord = Word | 1; return isWordClobbered(SourceHalfMask, LowWord) || isWordClobbered(SourceHalfMask, HighWord); }; if (IncomingInputs.empty()) return; if (ExistingInputs.empty()) { // Map any dwords with inputs from them into the right half. for (int Input : IncomingInputs) { // If the source half mask maps over the inputs, turn those into // swaps and use the swapped lane. if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == -1) { SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = Input - SourceOffset; // We have to swap the uses in our half mask in one sweep. for (int &M : HalfMask) if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset) M = Input; else if (M == Input) M = SourceHalfMask[Input - SourceOffset] + SourceOffset; } else { assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == Input - SourceOffset && "Previous placement doesn't match!"); } // Note that this correctly re-maps both when we do a swap and when // we observe the other side of the swap above. We rely on that to // avoid swapping the members of the input list directly. Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; } // Map the input's dword into the correct half. if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == -1) PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; else assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == Input / 2 && "Previous placement doesn't match!"); } // And just directly shift any other-half mask elements to be same-half // as we will have mirrored the dword containing the element into the // same position within that half. for (int &M : HalfMask) if (M >= SourceOffset && M < SourceOffset + 4) { M = M - SourceOffset + DestOffset; assert(M >= 0 && "This should never wrap below zero!"); } return; } // Ensure we have the input in a viable dword of its current half. This // is particularly tricky because the original position may be clobbered // by inputs being moved and *staying* in that half. if (IncomingInputs.size() == 1) { if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { int InputFixed = std::find(std::begin(SourceHalfMask), std::end(SourceHalfMask), -1) - std::begin(SourceHalfMask) + SourceOffset; SourceHalfMask[InputFixed - SourceOffset] = IncomingInputs[0] - SourceOffset; std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0], InputFixed); IncomingInputs[0] = InputFixed; } } else if (IncomingInputs.size() == 2) { if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 || isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { // We have two non-adjacent or clobbered inputs we need to extract from // the source half. To do this, we need to map them into some adjacent // dword slot in the source mask. int InputsFixed[2] = {IncomingInputs[0] - SourceOffset, IncomingInputs[1] - SourceOffset}; // If there is a free slot in the source half mask adjacent to one of // the inputs, place the other input in it. We use (Index XOR 1) to // compute an adjacent index. if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) && SourceHalfMask[InputsFixed[0] ^ 1] == -1) { SourceHalfMask[InputsFixed[0]] = InputsFixed[0]; SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; InputsFixed[1] = InputsFixed[0] ^ 1; } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) && SourceHalfMask[InputsFixed[1] ^ 1] == -1) { SourceHalfMask[InputsFixed[1]] = InputsFixed[1]; SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0]; InputsFixed[0] = InputsFixed[1] ^ 1; } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] == -1 && SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] == -1) { // The two inputs are in the same DWord but it is clobbered and the // adjacent DWord isn't used at all. Move both inputs to the free // slot. SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0]; SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1]; InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1); InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1; } else { // The only way we hit this point is if there is no clobbering // (because there are no off-half inputs to this half) and there is no // free slot adjacent to one of the inputs. In this case, we have to // swap an input with a non-input. for (int i = 0; i < 4; ++i) assert((SourceHalfMask[i] == -1 || SourceHalfMask[i] == i) && "We can't handle any clobbers here!"); assert(InputsFixed[1] != (InputsFixed[0] ^ 1) && "Cannot have adjacent inputs here!"); SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1; // We also have to update the final source mask in this case because // it may need to undo the above swap. for (int &M : FinalSourceHalfMask) if (M == (InputsFixed[0] ^ 1) + SourceOffset) M = InputsFixed[1] + SourceOffset; else if (M == InputsFixed[1] + SourceOffset) M = (InputsFixed[0] ^ 1) + SourceOffset; InputsFixed[1] = InputsFixed[0] ^ 1; } // Point everything at the fixed inputs. for (int &M : HalfMask) if (M == IncomingInputs[0]) M = InputsFixed[0] + SourceOffset; else if (M == IncomingInputs[1]) M = InputsFixed[1] + SourceOffset; IncomingInputs[0] = InputsFixed[0] + SourceOffset; IncomingInputs[1] = InputsFixed[1] + SourceOffset; } } else { llvm_unreachable("Unhandled input size!"); } // Now hoist the DWord down to the right half. int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2; assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free"); PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2; for (int &M : HalfMask) for (int Input : IncomingInputs) if (M == Input) M = FreeDWord * 2 + Input % 2; }; moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask, /*SourceOffset*/ 4, /*DestOffset*/ 0); moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask, /*SourceOffset*/ 0, /*DestOffset*/ 4); // Now enact all the shuffles we've computed to move the inputs into their // target half. if (!isNoopShuffleMask(PSHUFLMask)) V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFHMask)) V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFDMask)) V = DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // At this point, each half should contain all its inputs, and we can then // just shuffle them into their final position. assert(std::count_if(LoMask.begin(), LoMask.end(), [](int M) { return M >= 4; }) == 0 && "Failed to lift all the high half inputs to the low mask!"); assert(std::count_if(HiMask.begin(), HiMask.end(), [](int M) { return M >= 0 && M < 4; }) == 0 && "Failed to lift all the low half inputs to the high mask!"); // Do a half shuffle for the low mask. if (!isNoopShuffleMask(LoMask)) V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); // Do a half shuffle with the high mask after shifting its values down. for (int &M : HiMask) if (M >= 0) M -= 4; if (!isNoopShuffleMask(HiMask)) V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); return V; } /// \brief Helper to form a PSHUFB-based shuffle+blend. static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); SDValue V1Mask[16]; SDValue V2Mask[16]; V1InUse = false; V2InUse = false; int Size = Mask.size(); int Scale = 16 / Size; for (int i = 0; i < 16; ++i) { if (Mask[i / Scale] == -1) { V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8); } else { const int ZeroMask = 0x80; int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale : ZeroMask; int V2Idx = Mask[i / Scale] < Size ? ZeroMask : (Mask[i / Scale] - Size) * Scale + i % Scale; if (Zeroable[i / Scale]) V1Idx = V2Idx = ZeroMask; V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8); V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8); V1InUse |= (ZeroMask != V1Idx); V2InUse |= (ZeroMask != V2Idx); } } if (V1InUse) V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, DAG.getBitcast(MVT::v16i8, V1), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask)); if (V2InUse) V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, DAG.getBitcast(MVT::v16i8, V2), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask)); // If we need shuffled inputs from both, blend the two. SDValue V; if (V1InUse && V2InUse) V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2); else V = V1InUse ? V1 : V2; // Cast the result back to the correct type. return DAG.getBitcast(VT, V); } /// \brief Generic lowering of 8-lane i16 shuffles. /// /// This handles both single-input shuffles and combined shuffle/blends with /// two inputs. The single input shuffles are immediately delegated to /// a dedicated lowering routine. /// /// The blends are lowered in one of three fundamental ways. If there are few /// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle /// of the input is significantly cheaper when lowered as an interleaving of /// the two inputs, try to interleave them. Otherwise, blend the low and high /// halves of the inputs separately (making them have relatively few inputs) /// and then concatenate them. static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef OrigMask = SVOp->getMask(); int MaskStorage[8] = {OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3], OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7]}; MutableArrayRef Mask(MaskStorage); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v8i16, V1, V2, OrigMask, Subtarget, DAG)) return ZExt; auto isV1 = [](int M) { return M >= 0 && M < 8; }; (void)isV1; auto isV2 = [](int M) { return M >= 8; }; int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2); if (NumV2Inputs == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i16, V1, Mask, Subtarget, DAG)) return Broadcast; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, DAG)) return Shift; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, Subtarget, DAG)) return Rotate; return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, Mask, Subtarget, DAG); } assert(std::any_of(Mask.begin(), Mask.end(), isV1) && "All single-input shuffles should be canonicalized to be V1-input " "shuffles."); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG)) return Shift; // See if we can use SSE4A Extraction / Insertion. if (Subtarget->hasSSE4A()) if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG)) return V; // There are special ways we can lower some single-element blends. if (NumV2Inputs == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return V; // We have different paths for blend lowering, but they all must use the // *exact* same predicate. bool IsBlendSupported = Subtarget->hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Blend; if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (SDValue BitBlend = lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) return BitBlend; if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2, Mask, DAG)) return Unpack; // If we can't directly blend but can use PSHUFB, that will be better as it // can both shuffle and set up the inefficient blend. if (!IsBlendSupported && Subtarget->hasSSSE3()) { bool V1InUse, V2InUse; return lowerVectorShuffleAsPSHUFB(DL, MVT::v8i16, V1, V2, Mask, DAG, V1InUse, V2InUse); } // We can always bit-blend if we have to so the fallback strategy is to // decompose into single-input permutes and blends. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, Mask, DAG); } /// \brief Check whether a compaction lowering can be done by dropping even /// elements and compute how many times even elements must be dropped. /// /// This handles shuffles which take every Nth element where N is a power of /// two. Example shuffle masks: /// /// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14 /// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 /// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12 /// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 /// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8 /// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 /// /// Any of these lanes can of course be undef. /// /// This routine only supports N <= 3. /// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here /// for larger N. /// /// \returns N above, or the number of times even elements must be dropped if /// there is such a number. Otherwise returns zero. static int canLowerByDroppingEvenElements(ArrayRef Mask) { // Figure out whether we're looping over two inputs or just one. bool IsSingleInput = isSingleInputShuffleMask(Mask); // The modulus for the shuffle vector entries is based on whether this is // a single input or not. int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2); assert(isPowerOf2_32((uint32_t)ShuffleModulus) && "We should only be called with masks with a power-of-2 size!"); uint64_t ModMask = (uint64_t)ShuffleModulus - 1; // We track whether the input is viable for all power-of-2 strides 2^1, 2^2, // and 2^3 simultaneously. This is because we may have ambiguity with // partially undef inputs. bool ViableForN[3] = {true, true, true}; for (int i = 0, e = Mask.size(); i < e; ++i) { // Ignore undef lanes, we'll optimistically collapse them to the pattern we // want. if (Mask[i] == -1) continue; bool IsAnyViable = false; for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) if (ViableForN[j]) { uint64_t N = j + 1; // The shuffle mask must be equal to (i * 2^N) % M. if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask)) IsAnyViable = true; else ViableForN[j] = false; } // Early exit if we exhaust the possible powers of two. if (!IsAnyViable) break; } for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) if (ViableForN[j]) return j + 1; // Return 0 as there is no viable power of two. return 0; } /// \brief Generic lowering of v16i8 shuffles. /// /// This is a hybrid strategy to lower v16i8 vectors. It first attempts to /// detect any complexity reducing interleaving. If that doesn't help, it uses /// UNPCK to spread the i8 elements across two i16-element vectors, and uses /// the existing lowering for v8i16 blends on each half, finally PACK-ing them /// back together. static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!"); assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Rotate; // Try to use a zext lowering. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return ZExt; // See if we can use SSE4A Extraction / Insertion. if (Subtarget->hasSSE4A()) if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG)) return V; int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; }); // For single-input shuffles, there are some nicer lowering tricks we can use. if (NumV2Elements == 0) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i8, V1, Mask, Subtarget, DAG)) return Broadcast; // Check whether we can widen this to an i16 shuffle by duplicating bytes. // Notably, this handles splat and partial-splat shuffles more efficiently. // However, it only makes sense if the pre-duplication shuffle simplifies // things significantly. Currently, this means we need to be able to // express the pre-duplication shuffle as an i16 shuffle. // // FIXME: We should check for other patterns which can be widened into an // i16 shuffle as well. auto canWidenViaDuplication = [](ArrayRef Mask) { for (int i = 0; i < 16; i += 2) if (Mask[i] != -1 && Mask[i + 1] != -1 && Mask[i] != Mask[i + 1]) return false; return true; }; auto tryToWidenViaDuplication = [&]() -> SDValue { if (!canWidenViaDuplication(Mask)) return SDValue(); SmallVector LoInputs; std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs), [](int M) { return M >= 0 && M < 8; }); std::sort(LoInputs.begin(), LoInputs.end()); LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); SmallVector HiInputs; std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs), [](int M) { return M >= 8; }); std::sort(HiInputs.begin(), HiInputs.end()); HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); bool TargetLo = LoInputs.size() >= HiInputs.size(); ArrayRef InPlaceInputs = TargetLo ? LoInputs : HiInputs; ArrayRef MovingInputs = TargetLo ? HiInputs : LoInputs; int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; SmallDenseMap LaneMap; for (int I : InPlaceInputs) { PreDupI16Shuffle[I/2] = I/2; LaneMap[I] = I; } int j = TargetLo ? 0 : 4, je = j + 4; for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { // Check if j is already a shuffle of this input. This happens when // there are two adjacent bytes after we move the low one. if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { // If we haven't yet mapped the input, search for a slot into which // we can map it. while (j < je && PreDupI16Shuffle[j] != -1) ++j; if (j == je) // We can't place the inputs into a single half with a simple i16 shuffle, so bail. return SDValue(); // Map this input with the i16 shuffle. PreDupI16Shuffle[j] = MovingInputs[i] / 2; } // Update the lane map based on the mapping we ended up with. LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; } V1 = DAG.getBitcast( MVT::v16i8, DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, MVT::v16i8, V1, V1); int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; ++i) if (Mask[i] != -1) { int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); assert(MappedMask < 8 && "Invalid v8 shuffle mask!"); if (PostDupI16Shuffle[i / 2] == -1) PostDupI16Shuffle[i / 2] = MappedMask; else assert(PostDupI16Shuffle[i / 2] == MappedMask && "Conflicting entrties in the original shuffle!"); } return DAG.getBitcast( MVT::v16i8, DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); }; if (SDValue V = tryToWidenViaDuplication()) return V; } if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) return V; // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly // with PSHUFB. It is important to do this before we attempt to generate any // blends but after all of the single-input lowerings. If the single input // lowerings can find an instruction sequence that is faster than a PSHUFB, we // want to preserve that and we can DAG combine any longer sequences into // a PSHUFB in the end. But once we start blending from multiple inputs, // the complexity of DAG combining bad patterns back into PSHUFB is too high, // and there are *very* few patterns that would actually be faster than the // PSHUFB approach because of its ability to zero lanes. // // FIXME: The only exceptions to the above are blends which are exact // interleavings with direct instructions supporting them. We currently don't // handle those well here. if (Subtarget->hasSSSE3()) { bool V1InUse = false; bool V2InUse = false; SDValue PSHUFB = lowerVectorShuffleAsPSHUFB(DL, MVT::v16i8, V1, V2, Mask, DAG, V1InUse, V2InUse); // If both V1 and V2 are in use and we can use a direct blend or an unpack, // do so. This avoids using them to handle blends-with-zero which is // important as a single pshufb is significantly faster for that. if (V1InUse && V2InUse) { if (Subtarget->hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Blend; // We can use an unpack to do the blending rather than an or in some // cases. Even though the or may be (very minorly) more efficient, we // preference this lowering because there are common cases where part of // the complexity of the shuffles goes away when we do the final blend as // an unpack. // FIXME: It might be worth trying to detect if the unpack-feeding // shuffles will both be pshufb, in which case we shouldn't bother with // this. if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( DL, MVT::v16i8, V1, V2, Mask, DAG)) return Unpack; } return PSHUFB; } // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return V; if (SDValue BitBlend = lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) return BitBlend; // Check whether a compaction lowering can be done. This handles shuffles // which take every Nth element for some even N. See the helper function for // details. // // We special case these as they can be particularly efficiently handled with // the PACKUSB instruction on x86 and they show up in common patterns of // rearranging bytes to truncate wide elements. if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask)) { // NumEvenDrops is the power of two stride of the elements. Another way of // thinking about it is that we need to drop the even elements this many // times to get the original input. bool IsSingleInput = isSingleInputShuffleMask(Mask); // First we need to zero all the dropped bytes. assert(NumEvenDrops <= 3 && "No support for dropping even elements more than 3 times."); // We use the mask type to pick which bytes are preserved based on how many // elements are dropped. MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; SDValue ByteClearMask = DAG.getBitcast( MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); // Now pack things back together. V1 = DAG.getBitcast(MVT::v8i16, V1); V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2); SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2); for (int i = 1; i < NumEvenDrops; ++i) { Result = DAG.getBitcast(MVT::v8i16, Result); Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); } return Result; } // Handle multi-input cases by blending single-input shuffles. if (NumV2Elements > 0) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask, DAG); // The fallback path for single-input shuffles widens this into two v8i16 // vectors with unpacks, shuffles those, and then pulls them back together // with a pack. SDValue V = V1; int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; ++i) if (Mask[i] >= 0) (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i]; SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL); SDValue VLoHalf, VHiHalf; // Check if any of the odd lanes in the v16i8 are used. If not, we can mask // them out and avoid using UNPCK{L,H} to extract the elements of V as // i16s. if (std::none_of(std::begin(LoBlendMask), std::end(LoBlendMask), [](int M) { return M >= 0 && M % 2 == 1; }) && std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask), [](int M) { return M >= 0 && M % 2 == 1; })) { // Use a mask to drop the high bytes. VLoHalf = DAG.getBitcast(MVT::v8i16, V); VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, DAG.getConstant(0x00FF, DL, MVT::v8i16)); // This will be a single vector shuffle instead of a blend so nuke VHiHalf. VHiHalf = DAG.getUNDEF(MVT::v8i16); // Squash the masks to point directly into VLoHalf. for (int &M : LoBlendMask) if (M >= 0) M /= 2; for (int &M : HiBlendMask) if (M >= 0) M /= 2; } else { // Otherwise just unpack the low half of V into VLoHalf and the high half into // VHiHalf so that we can blend them as i16s. VLoHalf = DAG.getBitcast( MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); VHiHalf = DAG.getBitcast( MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); } SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); } /// \brief Dispatching routine to lower various 128-bit x86 vector shuffles. /// /// This routine breaks down the specific type of 128-bit shuffle and /// dispatches to the lowering routines accordingly. static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG) { switch (VT.SimpleTy) { case MVT::v2i64: return lowerV2I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v2f64: return lowerV2F64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v4i32: return lowerV4I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v4f32: return lowerV4F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i16: return lowerV8I16VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i8: return lowerV16I8VectorShuffle(Op, V1, V2, Subtarget, DAG); default: llvm_unreachable("Unimplemented!"); } } /// \brief Helper function to test whether a shuffle mask could be /// simplified by widening the elements being shuffled. /// /// Appends the mask for wider elements in WidenedMask if valid. Otherwise /// leaves it in an unspecified state. /// /// NOTE: This must handle normal vector shuffle masks and *target* vector /// shuffle masks. The latter have the special property of a '-2' representing /// a zero-ed lane of a vector. static bool canWidenShuffleElements(ArrayRef Mask, SmallVectorImpl &WidenedMask) { for (int i = 0, Size = Mask.size(); i < Size; i += 2) { // If both elements are undef, its trivial. if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) { WidenedMask.push_back(SM_SentinelUndef); continue; } // Check for an undef mask and a mask value properly aligned to fit with // a pair of values. If we find such a case, use the non-undef mask's value. if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) { WidenedMask.push_back(Mask[i + 1] / 2); continue; } if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) { WidenedMask.push_back(Mask[i] / 2); continue; } // When zeroing, we need to spread the zeroing across both lanes to widen. if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) { if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) && (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) { WidenedMask.push_back(SM_SentinelZero); continue; } return false; } // Finally check if the two mask values are adjacent and aligned with // a pair. if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) { WidenedMask.push_back(Mask[i] / 2); continue; } // Otherwise we can't safely widen the elements used in this shuffle. return false; } assert(WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"); return true; } /// \brief Generic routine to split vector shuffle into half-sized shuffles. /// /// This routine just extracts two subvectors, shuffles them independently, and /// then concatenates them back together. This should work effectively with all /// AVX vector shuffle types. static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(VT.getSizeInBits() >= 256 && "Only for 256-bit or wider vector shuffles!"); assert(V1.getSimpleValueType() == VT && "Bad operand type!"); assert(V2.getSimpleValueType() == VT && "Bad operand type!"); ArrayRef LoMask = Mask.slice(0, Mask.size() / 2); ArrayRef HiMask = Mask.slice(Mask.size() / 2); int NumElements = VT.getVectorNumElements(); int SplitNumElements = NumElements / 2; MVT ScalarVT = VT.getVectorElementType(); MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2); // Rather than splitting build-vectors, just build two narrower build // vectors. This helps shuffling with splats and zeros. auto SplitVector = [&](SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V->getOperand(0); MVT OrigVT = V.getSimpleValueType(); int OrigNumElements = OrigVT.getVectorNumElements(); int OrigSplitNumElements = OrigNumElements / 2; MVT OrigScalarVT = OrigVT.getVectorElementType(); MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2); SDValue LoV, HiV; auto *BV = dyn_cast(V); if (!BV) { LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, DAG.getIntPtrConstant(0, DL)); HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, DAG.getIntPtrConstant(OrigSplitNumElements, DL)); } else { SmallVector LoOps, HiOps; for (int i = 0; i < OrigSplitNumElements; ++i) { LoOps.push_back(BV->getOperand(i)); HiOps.push_back(BV->getOperand(i + OrigSplitNumElements)); } LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps); HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps); } return std::make_pair(DAG.getBitcast(SplitVT, LoV), DAG.getBitcast(SplitVT, HiV)); }; SDValue LoV1, HiV1, LoV2, HiV2; std::tie(LoV1, HiV1) = SplitVector(V1); std::tie(LoV2, HiV2) = SplitVector(V2); // Now create two 4-way blends of these half-width vectors. auto HalfBlend = [&](ArrayRef HalfMask) { bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false; SmallVector V1BlendMask, V2BlendMask, BlendMask; for (int i = 0; i < SplitNumElements; ++i) { int M = HalfMask[i]; if (M >= NumElements) { if (M >= NumElements + SplitNumElements) UseHiV2 = true; else UseLoV2 = true; V2BlendMask.push_back(M - NumElements); V1BlendMask.push_back(-1); BlendMask.push_back(SplitNumElements + i); } else if (M >= 0) { if (M >= SplitNumElements) UseHiV1 = true; else UseLoV1 = true; V2BlendMask.push_back(-1); V1BlendMask.push_back(M); BlendMask.push_back(i); } else { V2BlendMask.push_back(-1); V1BlendMask.push_back(-1); BlendMask.push_back(-1); } } // Because the lowering happens after all combining takes place, we need to // manually combine these blend masks as much as possible so that we create // a minimal number of high-level vector shuffle nodes. // First try just blending the halves of V1 or V2. if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2) return DAG.getUNDEF(SplitVT); if (!UseLoV2 && !UseHiV2) return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); if (!UseLoV1 && !UseHiV1) return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); SDValue V1Blend, V2Blend; if (UseLoV1 && UseHiV1) { V1Blend = DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); } else { // We only use half of V1 so map the usage down into the final blend mask. V1Blend = UseLoV1 ? LoV1 : HiV1; for (int i = 0; i < SplitNumElements; ++i) if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements) BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); } if (UseLoV2 && UseHiV2) { V2Blend = DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); } else { // We only use half of V2 so map the usage down into the final blend mask. V2Blend = UseLoV2 ? LoV2 : HiV2; for (int i = 0; i < SplitNumElements; ++i) if (BlendMask[i] >= SplitNumElements) BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0); } return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask); }; SDValue Lo = HalfBlend(LoMask); SDValue Hi = HalfBlend(HiMask); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } /// \brief Either split a vector in halves or decompose the shuffles and the /// blend. /// /// This is provided as a good fallback for many lowerings of non-single-input /// shuffles with more than one 128-bit lane. In those cases, we want to select /// between splitting the shuffle into 128-bit components and stitching those /// back together vs. extracting the single-input shuffles and blending those /// results. static SDValue lowerVectorShuffleAsSplitOrBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(!isSingleInputShuffleMask(Mask) && "This routine must not be used to " "lower single-input shuffles as it " "could then recurse on itself."); int Size = Mask.size(); // If this can be modeled as a broadcast of two elements followed by a blend, // prefer that lowering. This is especially important because broadcasts can // often fold with memory operands. auto DoBothBroadcast = [&] { int V1BroadcastIdx = -1, V2BroadcastIdx = -1; for (int M : Mask) if (M >= Size) { if (V2BroadcastIdx == -1) V2BroadcastIdx = M - Size; else if (M - Size != V2BroadcastIdx) return false; } else if (M >= 0) { if (V1BroadcastIdx == -1) V1BroadcastIdx = M; else if (M != V1BroadcastIdx) return false; } return true; }; if (DoBothBroadcast()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG); // If the inputs all stem from a single 128-bit lane of each input, then we // split them rather than blending because the split will decompose to // unusually few instructions. int LaneCount = VT.getSizeInBits() / 128; int LaneSize = Size / LaneCount; SmallBitVector LaneInputs[2]; LaneInputs[0].resize(LaneCount, false); LaneInputs[1].resize(LaneCount, false); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0) LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true; if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); // Otherwise, just fall back to decomposed shuffles and a blend. This requires // that the decomposed single-input shuffles don't end up here. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG); } /// \brief Lower a vector shuffle crossing multiple 128-bit lanes as /// a permutation and blend of those lanes. /// /// This essentially blends the out-of-lane inputs to each lane into the lane /// from a permuted copy of the vector. This lowering strategy results in four /// instructions in the worst case for a single-input cross lane shuffle which /// is lower than any other fully general cross-lane shuffle strategy I'm aware /// of. Special cases for each particular shuffle pattern should be handled /// prior to trying this lowering. static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { // FIXME: This should probably be generalized for 512-bit vectors as well. assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); int LaneSize = Mask.size() / 2; // If there are only inputs from one 128-bit lane, splitting will in fact be // less expensive. The flags track whether the given lane contains an element // that crosses to another lane. bool LaneCrossing[2] = {false, false}; for (int i = 0, Size = Mask.size(); i < Size; ++i) if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) LaneCrossing[(Mask[i] % Size) / LaneSize] = true; if (!LaneCrossing[0] || !LaneCrossing[1]) return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); if (isSingleInputShuffleMask(Mask)) { SmallVector FlippedBlendMask; for (int i = 0, Size = Mask.size(); i < Size; ++i) FlippedBlendMask.push_back( Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize) ? Mask[i] : Mask[i] % LaneSize + (i / LaneSize) * LaneSize + Size)); // Flip the vector, and blend the results which should now be in-lane. The // VPERM2X128 mask uses the low 2 bits for the low source and bits 4 and // 5 for the high source. The value 3 selects the high half of source 2 and // the value 2 selects the low half of source 2. We only use source 2 to // allow folding it into a memory operand. unsigned PERMMask = 3 | 2 << 4; SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT), V1, DAG.getConstant(PERMMask, DL, MVT::i8)); return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); } // This now reduces to two single-input shuffles of V1 and V2 which at worst // will be handled by the above logic and a blend of the results, much like // other patterns in AVX. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG); } /// \brief Handle lowering 2-lane 128-bit shuffles. static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the // instruction bytes needed to explicitly generate the zero vector. // Blends are faster and handle all the non-lane-crossing cases. if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask, Subtarget, DAG)) return Blend; bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode()); bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode()); // If either input operand is a zero vector, use VPERM2X128 because its mask // allows us to replace the zero input with an implicit zero. if (!IsV1Zero && !IsV2Zero) { // Check for patterns which can be matched with a single insert of a 128-bit // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements() / 2); SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, DAG.getIntPtrConstant(0, DL)); SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); } } // Otherwise form a 128-bit permutation. After accounting for undefs, // convert the 64-bit shuffle mask selection values into 128-bit // selection bits by dividing the indexes by 2 and shifting into positions // defined by a vperm2*128 instruction's immediate control byte. // The immediate permute control byte looks like this: // [1:0] - select 128 bits from sources for low half of destination // [2] - ignore // [3] - zero low half of destination // [5:4] - select 128 bits from sources for high half of destination // [6] - ignore // [7] - zero high half of destination int MaskLO = Mask[0]; if (MaskLO == SM_SentinelUndef) MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; int MaskHI = Mask[2]; if (MaskHI == SM_SentinelUndef) MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; // If either input is a zero vector, replace it with an undef input. // Shuffle mask values < 4 are selecting elements of V1. // Shuffle mask values >= 4 are selecting elements of V2. // Adjust each half of the permute mask by clearing the half that was // selecting the zero vector and setting the zero mask bit. if (IsV1Zero) { V1 = DAG.getUNDEF(VT); if (MaskLO < 4) PermMask = (PermMask & 0xf0) | 0x08; if (MaskHI < 4) PermMask = (PermMask & 0x0f) | 0x80; } if (IsV2Zero) { V2 = DAG.getUNDEF(VT); if (MaskLO >= 4) PermMask = (PermMask & 0xf0) | 0x08; if (MaskHI >= 4) PermMask = (PermMask & 0x0f) | 0x80; } return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, DL, MVT::i8)); } /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then /// shuffling each lane. /// /// This will only succeed when the result of fixing the 128-bit lanes results /// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in /// each 128-bit lanes. This handles many cases where we can quickly blend away /// the lane crosses early and then use simpler shuffles within each lane. /// /// FIXME: It might be worthwhile at some point to support this without /// requiring the 128-bit lane-relative shuffles to be repeating, but currently /// in x86 only floating point has interesting non-repeating shuffles, and even /// those are still *marginally* more expensive. static SDValue lowerVectorShuffleByMerging128BitLanes( SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(!isSingleInputShuffleMask(Mask) && "This is only useful with multiple inputs."); int Size = Mask.size(); int LaneSize = 128 / VT.getScalarSizeInBits(); int NumLanes = Size / LaneSize; assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles."); // See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also // check whether the in-128-bit lane shuffles share a repeating pattern. SmallVector Lanes; Lanes.resize(NumLanes, -1); SmallVector InLaneMask; InLaneMask.resize(LaneSize, -1); for (int i = 0; i < Size; ++i) { if (Mask[i] < 0) continue; int j = i / LaneSize; if (Lanes[j] < 0) { // First entry we've seen for this lane. Lanes[j] = Mask[i] / LaneSize; } else if (Lanes[j] != Mask[i] / LaneSize) { // This doesn't match the lane selected previously! return SDValue(); } // Check that within each lane we have a consistent shuffle mask. int k = i % LaneSize; if (InLaneMask[k] < 0) { InLaneMask[k] = Mask[i] % LaneSize; } else if (InLaneMask[k] != Mask[i] % LaneSize) { // This doesn't fit a repeating in-lane mask. return SDValue(); } } // First shuffle the lanes into place. MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64, VT.getSizeInBits() / 64); SmallVector LaneMask; LaneMask.resize(NumLanes * 2, -1); for (int i = 0; i < NumLanes; ++i) if (Lanes[i] >= 0) { LaneMask[2 * i + 0] = 2*Lanes[i] + 0; LaneMask[2 * i + 1] = 2*Lanes[i] + 1; } V1 = DAG.getBitcast(LaneVT, V1); V2 = DAG.getBitcast(LaneVT, V2); SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask); // Cast it back to the type we actually want. LaneShuffle = DAG.getBitcast(VT, LaneShuffle); // Now do a simple shuffle that isn't lane crossing. SmallVector NewMask; NewMask.resize(Size, -1); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0) NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize; assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) && "Must not introduce lane crosses at this point!"); return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask); } /// Lower shuffles where an entire half of a 256-bit vector is UNDEF. /// This allows for fast cases such as subvector extraction/insertion /// or shuffling smaller vector types which can lower more efficiently. static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector"); unsigned NumElts = VT.getVectorNumElements(); unsigned HalfNumElts = NumElts / 2; MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts); bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts); if (!UndefLower && !UndefUpper) return SDValue(); // Upper half is undef and lower half is whole upper subvector. // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> if (UndefUpper && isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, DAG.getIntPtrConstant(HalfNumElts, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, DAG.getIntPtrConstant(0, DL)); } // Lower half is undef and upper half is whole lower subvector. // e.g. vector_shuffle or if (UndefLower && isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) { SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, DAG.getIntPtrConstant(HalfNumElts, DL)); } // AVX2 supports efficient immediate 64-bit element cross-lane shuffles. if (UndefLower && Subtarget->hasAVX2() && (VT == MVT::v4f64 || VT == MVT::v4i64)) return SDValue(); // If the shuffle only uses the lower halves of the input operands, // then extract them and perform the 'half' shuffle at half width. // e.g. vector_shuffle or int HalfIdx1 = -1, HalfIdx2 = -1; SmallVector HalfMask; unsigned Offset = UndefLower ? HalfNumElts : 0; for (unsigned i = 0; i != HalfNumElts; ++i) { int M = Mask[i + Offset]; if (M < 0) { HalfMask.push_back(M); continue; } // Determine which of the 4 half vectors this element is from. // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2. int HalfIdx = M / HalfNumElts; // Only shuffle using the lower halves of the inputs. // TODO: Investigate usefulness of shuffling with upper halves. if (HalfIdx != 0 && HalfIdx != 2) return SDValue(); // Determine the element index into its half vector source. int HalfElt = M % HalfNumElts; // We can shuffle with up to 2 half vectors, set the new 'half' // shuffle mask accordingly. if (-1 == HalfIdx1 || HalfIdx1 == HalfIdx) { HalfMask.push_back(HalfElt); HalfIdx1 = HalfIdx; continue; } if (-1 == HalfIdx2 || HalfIdx2 == HalfIdx) { HalfMask.push_back(HalfElt + HalfNumElts); HalfIdx2 = HalfIdx; continue; } // Too many half vectors referenced. return SDValue(); } assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length"); auto GetHalfVector = [&](int HalfIdx) { if (HalfIdx < 0) return DAG.getUNDEF(HalfVT); SDValue V = (HalfIdx < 2 ? V1 : V2); HalfIdx = (HalfIdx % 2) * HalfNumElts; return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V, DAG.getIntPtrConstant(HalfIdx, DL)); }; SDValue Half1 = GetHalfVector(HalfIdx1); SDValue Half2 = GetHalfVector(HalfIdx2); SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, DAG.getIntPtrConstant(Offset, DL)); } /// \brief Test whether the specified input (0 or 1) is in-place blended by the /// given mask. /// /// This returns true if the elements from a particular input are already in the /// slot required by the given mask and require no permutation. static bool isShuffleMaskInputInPlace(int Input, ArrayRef Mask) { assert((Input == 0 || Input == 1) && "Only two inputs to shuffles."); int Size = Mask.size(); for (int i = 0; i < Size; ++i) if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i) return false; return true; } static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, .. // Mask for V4F64; 0/1, 4/5, 2/3, 6/7.. assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD"); int NumElts = VT.getVectorNumElements(); bool ShufpdMask = true; bool CommutableMask = true; unsigned Immediate = 0; for (int i = 0; i < NumElts; ++i) { if (Mask[i] < 0) continue; int Val = (i & 6) + NumElts * (i & 1); int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1); if (Mask[i] < Val || Mask[i] > Val + 1) ShufpdMask = false; if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) CommutableMask = false; Immediate |= (Mask[i] % 2) << i; } if (ShufpdMask) return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, DAG.getConstant(Immediate, DL, MVT::i8)); if (CommutableMask) return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, DAG.getConstant(Immediate, DL, MVT::i8)); return SDValue(); } /// \brief Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); SmallVector WidenedMask; if (canWidenShuffleElements(Mask, WidenedMask)) return lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); if (isSingleInputShuffleMask(Mask)) { // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4f64, V1, Mask, Subtarget, DAG)) return Broadcast; // Use low duplicate instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1); if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) { // Non-half-crossing single input shuffles can be lowerid with an // interleaved permutation. unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, DAG.getConstant(VPERMILPMask, DL, MVT::i8)); } // With AVX2 we have direct support for this permutation. if (Subtarget->hasAVX2()) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG); } // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Blend; // Check if the blend happens to exactly fit that of SHUFPD. if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) return Op; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)))) if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Result; // If we have AVX2 then we always want to lower with a blend because an v4 we // can fully permute the elements. if (Subtarget->hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask, DAG); // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG); } /// \brief Handle lowering of 4-lane 64-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v4i64 shuffling.. static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!"); SmallVector WidenedMask; if (canWidenShuffleElements(Mask, WidenedMask)) return lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG); if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, Mask, Subtarget, DAG)) return Broadcast; // When the shuffle is mirrored between the 128-bit lanes of the unit, we can // use lower latency instructions that will operate on both 128-bit lanes. SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { if (isSingleInputShuffleMask(Mask)) { int PSHUFDMask[] = {-1, -1, -1, -1}; for (int i = 0; i < 2; ++i) if (RepeatedMask[i] >= 0) { PSHUFDMask[2 * i] = 2 * RepeatedMask[i]; PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1; } return DAG.getBitcast( MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, DAG.getBitcast(MVT::v8i32, V1), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } } // AVX2 provides a direct instruction for permuting a single input across // lanes. if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, DAG)) return Shift; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) return V; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)))) if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic blend lowering. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask, DAG); } /// \brief Handle lowering of 8-lane 32-bit floating point shuffles. /// /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, Mask, Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane, we have many more // options to efficiently lower the shuffle. SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) { assert(RepeatedMask.size() == 4 && "Repeated masks must be half the mask width!"); // Use even/odd duplicate instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6})) return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1); if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1); if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) return V; // Otherwise, fall back to a SHUFPS sequence. Here it is important that we // have already handled any direct blends. We also need to squash the // repeated mask into a simulated v4f32 mask. for (int i = 0; i < 4; ++i) if (RepeatedMask[i] >= 8) RepeatedMask[i] -= 4; return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); } // If we have a single input shuffle with different shuffle patterns in the // two 128-bit lanes use the variable mask to VPERMILPS. if (isSingleInputShuffleMask(Mask)) { SDValue VPermMask[8]; for (int i = 0; i < 8; ++i) VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(Mask[i], DL, MVT::i32); if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) return DAG.getNode( X86ISD::VPERMILPV, DL, MVT::v8f32, V1, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)); if (Subtarget->hasAVX2()) return DAG.getNode( X86ISD::VPERMV, DL, MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, DAG); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Result; // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. if (Subtarget->hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask, DAG); // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG); } /// \brief Handle lowering of 8-lane 32-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v8i32 shuffling.. static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return ZExt; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, Mask, Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane we can use more // efficient instructions that mirror the shuffles across the two 128-bit // lanes. SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) return V; } // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, DAG)) return Shift; if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; // If the shuffle patterns aren't repeated but it is a single input, directly // generate a cross-lane VPERMD instruction. if (isSingleInputShuffleMask(Mask)) { SDValue VPermMask[8]; for (int i = 0; i < 8; ++i) VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(Mask[i], DL, MVT::i32); return DAG.getNode( X86ISD::VPERMV, DL, MVT::v8i32, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic blend lowering. return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask, DAG); } /// \brief Handle lowering of 16-lane 16-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v16i16 shuffling.. static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return ZExt; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, Mask, Subtarget, DAG)) return Broadcast; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) return V; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (isSingleInputShuffleMask(Mask)) { // There are no generalized cross-lane shuffle operations available on i16 // element types. if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask, DAG); SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v16 case. return lowerV8I16GeneralSingleInputVectorShuffle( DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); } SDValue PSHUFBMask[32]; for (int i = 0; i < 16; ++i) { if (Mask[i] == -1) { PSHUFBMask[2 * i] = PSHUFBMask[2 * i + 1] = DAG.getUNDEF(MVT::i8); continue; } int M = i < 8 ? Mask[i] : Mask[i] - 8; assert(M >= 0 && M < 8 && "Invalid single-input mask!"); PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8); PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8); } return DAG.getBitcast(MVT::v16i16, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, DAG.getBitcast(MVT::v32i8, V1), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask))); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG); } /// \brief Handle lowering of 32-lane 8-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v32i8 shuffling.. static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return ZExt; // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, Mask, Subtarget, DAG)) return Broadcast; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) return V; // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, DAG)) return Shift; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Rotate; if (isSingleInputShuffleMask(Mask)) { // There are no generalized cross-lane shuffle operations available on i8 // element types. if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG); SDValue PSHUFBMask[32]; for (int i = 0; i < 32; ++i) PSHUFBMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL, MVT::i8); return DAG.getNode( X86ISD::PSHUFB, DL, MVT::v32i8, V1, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic lowering. return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG); } /// \brief High-level routine to lower various 256-bit x86 vector shuffles. /// /// This routine either breaks down the specific type of a 256-bit x86 vector /// shuffle or splits it into two 128-bit shuffles and fuses the results back /// together based on the available instructions. static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); // If we have a single input to the zero element, insert that into V1 if we // can do so cheaply. int NumElts = VT.getVectorNumElements(); int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) { return M >= NumElts; }); if (NumV2Elements == 1 && Mask[0] >= NumElts) if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, VT, V1, V2, Mask, Subtarget, DAG)) return Insertion; // Handle special cases where the lower or upper half is UNDEF. if (SDValue V = lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; // There is a really nice hard cut-over between AVX1 and AVX2 that means we // can check for those subtargets here and avoid much of the subtarget // querying in the per-vector-type lowering routines. With AVX1 we have // essentially *zero* ability to manipulate a 256-bit vector with integer // types. Since we'll use floating point types there eventually, just // immediately cast everything to a float and operate entirely in that domain. if (VT.isInteger() && !Subtarget->hasAVX2()) { int ElementBits = VT.getScalarSizeInBits(); if (ElementBits < 32) // No floating point type available, decompose into 128-bit vectors. return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), VT.getVectorNumElements()); V1 = DAG.getBitcast(FpVT, V1); V2 = DAG.getBitcast(FpVT, V2); return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); } switch (VT.SimpleTy) { case MVT::v4f64: return lowerV4F64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v4i64: return lowerV4I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8f32: return lowerV8F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i32: return lowerV8I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i16: return lowerV16I16VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i8: return lowerV32I8VectorShuffle(Op, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 256-bit x86 vector type!"); } } /// \brief Try to lower a vector shuffle as a 128-bit shuffles. static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { assert(VT.getScalarSizeInBits() == 64 && "Unexpected element type size for 128bit shuffle."); // To handle 256 bit vector requires VLX and most probably // function lowerV2X128VectorShuffle() is better solution. assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle."); SmallVector WidenedMask; if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); // Form a 128-bit permutation. // Convert the 64-bit shuffle mask selection values into 128-bit selection // bits defined by a vshuf64x2 instruction's immediate control byte. unsigned PermMask = 0, Imm = 0; unsigned ControlBitsNum = WidenedMask.size() / 2; for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { if (WidenedMask[i] == SM_SentinelZero) return SDValue(); // Use first element in place of undef mask. Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); } return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, DAG.getConstant(PermMask, DL, MVT::i8)); } static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV"); MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); } /// \brief Handle lowering of 8-lane 64-bit floating point shuffles. static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (SDValue Shuf128 = lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Shuf128; if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit floating point shuffles. static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); } /// \brief Handle lowering of 8-lane 64-bit integer shuffles. static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (SDValue Shuf128 = lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG)) return Shuf128; if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } /// \brief Handle lowering of 64-lane 8-bit integer shuffles. static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } /// \brief High-level routine to lower various 512-bit x86 vector shuffles. /// /// This routine either breaks down the specific type of a 512-bit x86 vector /// shuffle or splits it into two 256-bit shuffles and fuses the results back /// together based on the available instructions. static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Subtarget->hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, VT, V1, Mask, Subtarget, DAG)) return Broadcast; // Dispatch to each element type for lowering. If we don't have supprot for // specific element type shuffles at 512 bits, immediately split them and // lower them. Each lowering routine of a given type is allowed to assume that // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16f32: return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i64: return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i32: return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); break; case MVT::v64i8: if (Subtarget->hasBWI()) return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG); break; default: llvm_unreachable("Not a valid 512-bit x86 vector type!"); } // Otherwise fall back on splitting. return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); } // Lower vXi1 vector shuffles. // There is no a dedicated instruction on AVX-512 that shuffles the masks. // The only way to shuffle bits is to sign-extend the mask vector to SIMD // vector, shuffle and then truncate it back. static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Subtarget->hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); MVT ExtVT; switch (VT.SimpleTy) { default: llvm_unreachable("Expected a vector of i1 elements"); case MVT::v2i1: ExtVT = MVT::v2i64; break; case MVT::v4i1: ExtVT = MVT::v4i32; break; case MVT::v8i1: ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL break; case MVT::v16i1: ExtVT = MVT::v16i32; break; case MVT::v32i1: ExtVT = MVT::v32i16; break; case MVT::v64i1: ExtVT = MVT::v64i8; break; } if (ISD::isBuildVectorAllZeros(V1.getNode())) V1 = getZeroVector(ExtVT, Subtarget, DAG, DL); else if (ISD::isBuildVectorAllOnes(V1.getNode())) V1 = getOnesVector(ExtVT, Subtarget, DAG, DL); else V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); if (V2.isUndef()) V2 = DAG.getUNDEF(ExtVT); else if (ISD::isBuildVectorAllZeros(V2.getNode())) V2 = getZeroVector(ExtVT, Subtarget, DAG, DL); else if (ISD::isBuildVectorAllOnes(V2.getNode())) V2 = getOnesVector(ExtVT, Subtarget, DAG, DL); else V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask)); } /// \brief Top-level lowering for x86 vector shuffles. /// /// This handles decomposition, canonicalization, and lowering of all x86 /// vector shuffles. Most of the specific lowering strategies are encapsulated /// above in helper routines. The canonicalization attempts to widen shuffles /// to involve fewer lanes of wider elements, consolidate symmetric patterns /// s.t. only one of the two inputs needs to be tested, etc. static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); MVT VT = Op.getSimpleValueType(); int NumElements = VT.getVectorNumElements(); SDLoc dl(Op); bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); assert((VT.getSizeInBits() != 64 || Is1BitVector) && "Can't lower MMX shuffles"); bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; if (V1IsUndef && V2IsUndef) return DAG.getUNDEF(VT); // When we create a shuffle node we put the UNDEF node to second operand, // but in some cases the first operand may be transformed to UNDEF. // In this case we should just commute the node. if (V1IsUndef) return DAG.getCommutedVectorShuffle(*SVOp); // Check for non-undef masks pointing at an undef vector and make the masks // undef as well. This makes it easier to match the shuffle based solely on // the mask. if (V2IsUndef) for (int M : Mask) if (M >= NumElements) { SmallVector NewMask(Mask.begin(), Mask.end()); for (int &M : NewMask) if (M >= NumElements) M = -1; return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask); } // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); if (Zeroable.all()) return getZeroVector(VT, Subtarget, DAG, dl); // Try to collapse shuffles into using a vector type with fewer elements but // wider element types. We cap this to not form integers or floating point // elements wider than 64 bits, but it might be interesting to form i128 // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector WidenedMask; if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && canWidenShuffleElements(Mask, WidenedMask)) { MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2); // Make sure that the new vector type is legal. For example, v2f64 isn't // legal on SSE1. if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { V1 = DAG.getBitcast(NewVT, V1); V2 = DAG.getBitcast(NewVT, V2); return DAG.getBitcast( VT, DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask)); } } int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0; for (int M : SVOp->getMask()) if (M < 0) ++NumUndefElements; else if (M < NumElements) ++NumV1Elements; else ++NumV2Elements; // Commute the shuffle as needed such that more elements come from V1 than // V2. This allows us to match the shuffle pattern strictly on how many // elements come from V1 without handling the symmetric cases. if (NumV2Elements > NumV1Elements) return DAG.getCommutedVectorShuffle(*SVOp); // When the number of V1 and V2 elements are the same, try to minimize the // number of uses of V2 in the low half of the vector. When that is tied, // ensure that the sum of indices for V1 is equal to or lower than the sum // indices for V2. When those are equal, try to ensure that the number of odd // indices for V1 is lower than the number of odd indices for V2. if (NumV1Elements == NumV2Elements) { int LowV1Elements = 0, LowV2Elements = 0; for (int M : SVOp->getMask().slice(0, NumElements / 2)) if (M >= NumElements) ++LowV2Elements; else if (M >= 0) ++LowV1Elements; if (LowV2Elements > LowV1Elements) { return DAG.getCommutedVectorShuffle(*SVOp); } else if (LowV2Elements == LowV1Elements) { int SumV1Indices = 0, SumV2Indices = 0; for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i) if (SVOp->getMask()[i] >= NumElements) SumV2Indices += i; else if (SVOp->getMask()[i] >= 0) SumV1Indices += i; if (SumV2Indices < SumV1Indices) { return DAG.getCommutedVectorShuffle(*SVOp); } else if (SumV2Indices == SumV1Indices) { int NumV1OddIndices = 0, NumV2OddIndices = 0; for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i) if (SVOp->getMask()[i] >= NumElements) NumV2OddIndices += i % 2; else if (SVOp->getMask()[i] >= 0) NumV1OddIndices += i % 2; if (NumV2OddIndices < NumV1OddIndices) return DAG.getCommutedVectorShuffle(*SVOp); } } } // For each vector width, delegate to a specialized lowering routine. if (VT.is128BitVector()) return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); if (VT.is256BitVector()) return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); if (VT.is512BitVector()) return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); if (Is1BitVector) return lower1BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); llvm_unreachable("Unimplemented!"); } // This function assumes its argument is a BUILD_VECTOR of constants or // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is // true. static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, unsigned &MaskValue) { MaskValue = 0; unsigned NumElems = BuildVector->getNumOperands(); // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. // We don't handle the >2 lanes case right now. unsigned NumLanes = (NumElems - 1) / 8 + 1; if (NumLanes > 2) return false; unsigned NumElemsInLane = NumElems / NumLanes; // Blend for v16i16 should be symmetric for the both lanes. for (unsigned i = 0; i < NumElemsInLane; ++i) { SDValue EltCond = BuildVector->getOperand(i); SDValue SndLaneEltCond = (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond; int Lane1Cond = -1, Lane2Cond = -1; if (isa(EltCond)) Lane1Cond = !isNullConstant(EltCond); if (isa(SndLaneEltCond)) Lane2Cond = !isNullConstant(SndLaneEltCond); unsigned LaneMask = 0; if (Lane1Cond == Lane2Cond || Lane2Cond < 0) // Lane1Cond != 0, means we want the first argument. // Lane1Cond == 0, means we want the second argument. // The encoding of this argument is 0 for the first argument, 1 // for the second. Therefore, invert the condition. LaneMask = !Lane1Cond << i; else if (Lane1Cond < 0) LaneMask = !Lane2Cond << i; else return false; MaskValue |= LaneMask; if (NumLanes == 2) MaskValue |= LaneMask << NumElemsInLane; } return true; } /// \brief Try to lower a VSELECT instruction to a vector shuffle. static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue Cond = Op.getOperand(0); SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); auto *CondBV = cast(Cond); // Only non-legal VSELECTs reach this lowering, convert those into generic // shuffles and re-use the shuffle lowering path for blends. SmallVector Mask; for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) { SDValue CondElt = CondBV->getOperand(i); Mask.push_back( isa(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0) : -1); } return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask); } SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { // A vselect where all conditions and data are constants can be optimized into // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) && ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) && ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode())) return SDValue(); // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) return BlendOp; // Variable blends are only legal from SSE4.1 onward. if (!Subtarget->hasSSE41()) return SDValue(); // Only some types will be legal on some subtargets. If we can emit a legal // VSELECT-matching blend, return Op, and but if we need to expand, return // a null value. switch (Op.getSimpleValueType().SimpleTy) { default: // Most of the vector types have blends past SSE4.1. return Op; case MVT::v32i8: // The byte blends for AVX vectors were introduced only in AVX2. if (Subtarget->hasAVX2()) return Op; return SDValue(); case MVT::v8i16: case MVT::v16i16: // AVX-512 BWI and VLX features support VSELECT with i16 elements. if (Subtarget->hasBWI() && Subtarget->hasVLX()) return Op; // FIXME: We should custom lower this by fixing the condition and using i8 // blends. return SDValue(); } } static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); if (!Op.getOperand(0).getSimpleValueType().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } if (VT.getSizeInBits() == 16) { // If Idx is 0, it's cheaper to do a move instead of a pextrw. if (isNullConstant(Op.getOperand(1))) return DAG.getNode( ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), Op.getOperand(1))); SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } if (VT == MVT::f32) { // EXTRACTPS outputs to a GPR32 register which will require a movd to copy // the result back to FR32 register. It's only worth matching if the // result has a single use which is a store or a bitcast to i32. And in // the case of a store, it's not worth it if the index is a constant 0, // because a MOVSSmr can be used instead, which is smaller and faster. if (!Op.hasOneUse()) return SDValue(); SDNode *User = *Op.getNode()->use_begin(); if ((User->getOpcode() != ISD::STORE || isNullConstant(Op.getOperand(1))) && (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), Op.getOperand(1)); return DAG.getBitcast(MVT::f32, Extract); } if (VT == MVT::i32 || VT == MVT::i64) { // ExtractPS/pextrq works with constant index. if (isa(Op.getOperand(1))) return Op; } return SDValue(); } /// Extract one bit from mask vector, like v16i1 or v8i1. /// AVX-512 feature. SDValue X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getOperand(0); SDLoc dl(Vec); MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); MVT EltVT = Op.getSimpleValueType(); assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector"); assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) && "Unexpected vector type in ExtractBitFromMaskVector"); // variable index can't be handled in mask registers, // extend vector to VR512 if (!isa(Idx)) { MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtVT.getVectorElementType(), Ext, Idx); return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); } unsigned IdxVal = cast(Idx)->getZExtValue(); const TargetRegisterClass* rc = getRegClassFor(VecVT); if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8)) rc = getRegClassFor(MVT::v16i1); unsigned MaxSift = rc->getSize()*8 - 1; Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, DAG.getConstant(MaxSift, dl, MVT::i8)); return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec, DAG.getIntPtrConstant(0, dl)); } SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); if (Op.getSimpleValueType() == MVT::i1) return ExtractBitFromMaskVector(Op, DAG); if (!isa(Idx)) { if (VecVT.is512BitVector() || (VecVT.is256BitVector() && Subtarget->hasInt256() && VecVT.getVectorElementType().getSizeInBits() == 32)) { MVT MaskEltVT = MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits()); MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() / MaskEltVT.getSizeInBits()); Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT, getZeroVector(MaskVT, Subtarget, DAG, dl), Idx, DAG.getConstant(0, dl, PtrVT)); SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm, DAG.getConstant(0, dl, PtrVT)); } return SDValue(); } // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. if (VecVT.is256BitVector() || VecVT.is512BitVector()) { unsigned IdxVal = cast(Idx)->getZExtValue(); // Get the 128-bit vector. Vec = Extract128BitVector(Vec, IdxVal, DAG, dl); MVT EltVT = VecVT.getVectorElementType(); unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2 // this can be done with a mask. IdxVal &= ElemsPerChunk - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getConstant(IdxVal, dl, MVT::i32)); } assert(VecVT.is128BitVector() && "Unexpected vector length"); if (Subtarget->hasSSE41()) if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) return Res; MVT VT = Op.getSimpleValueType(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { SDValue Vec = Op.getOperand(0); if (isNullConstant(Op.getOperand(1))) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. MVT EltVT = MVT::i32; SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } if (VT.getSizeInBits() == 32) { unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { static_cast(Idx), -1, -1, -1 }; MVT VVT = Op.getOperand(0).getSimpleValueType(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0, dl)); } if (VT.getSizeInBits() == 64) { // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught // to match extract_elt for f64. if (isNullConstant(Op.getOperand(1))) return Op; // UNPCKHPD the element to the lowest double word, then movsd. // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; MVT VVT = Op.getOperand(0).getSimpleValueType(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0, dl)); } return SDValue(); } /// Insert one bit to mask vector, like v16i1 or v8i1. /// AVX-512 feature. SDValue X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue Elt = Op.getOperand(1); SDValue Idx = Op.getOperand(2); MVT VecVT = Vec.getSimpleValueType(); if (!isa(Idx)) { // Non constant index. Extend source and destination, // insert element and then truncate the result. MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32); SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec), DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx); return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); } unsigned IdxVal = cast(Idx)->getZExtValue(); SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); if (IdxVal) EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); if (Vec.getOpcode() == ISD::UNDEF) return EltInVec; return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); if (EltVT == MVT::i1) return InsertBitToMaskVector(Op, DAG); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); if (!isa(N2)) return SDValue(); auto *N2C = cast(N2); unsigned IdxVal = N2C->getZExtValue(); // If the vector is wider than 128 bits, extract the 128-bit subvector, insert // into that, and then insert the subvector back into the result. if (VT.is256BitVector() || VT.is512BitVector()) { // With a 256-bit vector, we can insert into the zero element efficiently // using a blend if we have AVX or AVX2 and the right data type. if (VT.is256BitVector() && IdxVal == 0) { // TODO: It is worthwhile to cast integer to floating point and back // and incur a domain crossing penalty if that's what we'll end up // doing anyway after extracting to a 128-bit vector. if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || (Subtarget->hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); N2 = DAG.getIntPtrConstant(1, dl); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); } } // Get the desired 128-bit vector chunk. SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl); // Insert the element into the desired chunk. unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits(); assert(isPowerOf2_32(NumEltsIn128)); // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo. unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, DAG.getConstant(IdxIn128, dl, MVT::i32)); // Insert the changed part back into the bigger vector return Insert128BitVector(N0, V, IdxVal, DAG, dl); } assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); if (Subtarget->hasSSE41()) { if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) { unsigned Opc; if (VT == MVT::v8i16) { Opc = X86ISD::PINSRW; } else { assert(VT == MVT::v16i8); Opc = X86ISD::PINSRB; } // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(Opc, dl, VT, N0, N1, N2); } if (EltVT == MVT::f32) { // Bits [7:6] of the constant are the source select. This will always be // zero here. The DAG Combiner may combine an extract_elt index into // these bits. For example (insert (extract, 3), 2) could be matched by // putting the '3' into bits [7:6] of X86ISD::INSERTPS. // Bits [5:4] of the constant are the destination select. This is the // value of the incoming immediate. // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { // If this is an insertion of 32-bits into the low 32-bits of // a vector, we prefer to generate a blend with immediate rather // than an insertps. Blends are simpler operations in hardware and so // will always have equal or better performance than insertps. // But if optimizing for size and there's a load folding opportunity, // generate insertps because blendps does not have a 32-bit memory // operand form. N2 = DAG.getIntPtrConstant(1, dl); N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2); } N2 = DAG.getIntPtrConstant(IdxVal << 4, dl); // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); } if (EltVT == MVT::i32 || EltVT == MVT::i64) { // PINSR* works with constant index. return Op; } } if (EltVT == MVT::i8) return SDValue(); if (EltVT.getSizeInBits() == 16) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); } return SDValue(); } static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); MVT OpVT = Op.getSimpleValueType(); // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. unsigned SizeFactor = OpVT.getSizeInBits()/128; MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), OpVT.getVectorNumElements() / SizeFactor); Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); // Insert the 128-bit vector. return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); } if (OpVT == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); assert(OpVT.is128BitVector() && "Expected an SSE type!"); return DAG.getBitcast( OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); } // Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in // a simple subregister reference or explicit instructions to grab // upper bits of a vector. static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); SDValue In = Op.getOperand(0); SDValue Idx = Op.getOperand(1); unsigned IdxVal = cast(Idx)->getZExtValue(); MVT ResVT = Op.getSimpleValueType(); MVT InVT = In.getSimpleValueType(); if (Subtarget->hasFp256()) { if (ResVT.is128BitVector() && (InVT.is256BitVector() || InVT.is512BitVector()) && isa(Idx)) { return Extract128BitVector(In, IdxVal, DAG, dl); } if (ResVT.is256BitVector() && InVT.is512BitVector() && isa(Idx)) { return Extract256BitVector(In, IdxVal, DAG, dl); } } return SDValue(); } // Lower a node with an INSERT_SUBVECTOR opcode. This may result in a // simple superregister reference or explicit instructions to insert // the upper bits of a vector. static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { if (!Subtarget->hasAVX()) return SDValue(); SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue SubVec = Op.getOperand(1); SDValue Idx = Op.getOperand(2); if (!isa(Idx)) return SDValue(); unsigned IdxVal = cast(Idx)->getZExtValue(); MVT OpVT = Op.getSimpleValueType(); MVT SubVecVT = SubVec.getSimpleValueType(); // Fold two 16-byte subvector loads into one 32-byte load: // (insert_subvector (insert_subvector undef, (load addr), 0), // (load addr + 16), Elts/2) // --> load32 addr if ((IdxVal == OpVT.getVectorNumElements() / 2) && Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.is256BitVector() && SubVecVT.is128BitVector()) { auto *Idx2 = dyn_cast(Vec.getOperand(2)); if (Idx2 && Idx2->getZExtValue() == 0) { SDValue SubVec2 = Vec.getOperand(1); // If needed, look through a bitcast to get to the load. if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST) SubVec2 = SubVec2.getOperand(0); if (auto *FirstLd = dyn_cast(SubVec2)) { bool Fast; unsigned Alignment = FirstLd->getAlignment(); unsigned AS = FirstLd->getAddressSpace(); const X86TargetLowering *TLI = Subtarget->getTargetLowering(); if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = { SubVec2, SubVec }; if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) return Ld; } } } } if ((OpVT.is256BitVector() || OpVT.is512BitVector()) && SubVecVT.is128BitVector()) return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl); if (OpVT.getVectorElementType() == MVT::i1) return Insert1BitVector(Op, DAG); return SDValue(); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOV32ri. SDValue X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; CodeModel::Model M = DAG.getTarget().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) WrapperKind = X86ISD::WrapperRIP; else if (Subtarget->isPICStyleGOT()) OpFlag = X86II::MO_GOTOFF; else if (Subtarget->isPICStyleStubPIC()) OpFlag = X86II::MO_PIC_BASE_OFFSET; auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetConstantPool( CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag); SDLoc DL(CP); Result = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) { Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); } return Result; } SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; CodeModel::Model M = DAG.getTarget().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) WrapperKind = X86ISD::WrapperRIP; else if (Subtarget->isPICStyleGOT()) OpFlag = X86II::MO_GOTOFF; else if (Subtarget->isPICStyleStubPIC()) OpFlag = X86II::MO_PIC_BASE_OFFSET; auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); SDLoc DL(JT); Result = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); return Result; } SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { const char *Sym = cast(Op)->getSymbol(); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; CodeModel::Model M = DAG.getTarget().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) { if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF()) OpFlag = X86II::MO_GOTPCREL; WrapperKind = X86ISD::WrapperRIP; } else if (Subtarget->isPICStyleGOT()) { OpFlag = X86II::MO_GOT; } else if (Subtarget->isPICStyleStubPIC()) { OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE; } else if (Subtarget->isPICStyleStubNoDynamic()) { OpFlag = X86II::MO_DARWIN_NONLAZY; } auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag); SDLoc DL(Op); Result = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ && !Subtarget->is64Bit()) { Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); } // For symbols that require a load from a stub to get the address, emit the // load. if (isGlobalStubReference(OpFlag)) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); return Result; } SDValue X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. unsigned char OpFlags = Subtarget->ClassifyBlockAddressReference(); CodeModel::Model M = DAG.getTarget().getCodeModel(); const BlockAddress *BA = cast(Op)->getBlockAddress(); int64_t Offset = cast(Op)->getOffset(); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result); else Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (isGlobalRelativeToPICBase(OpFlags)) { Result = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); } return Result; } SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, int64_t Offset, SelectionDAG &DAG) const { // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()); CodeModel::Model M = DAG.getTarget().getCodeModel(); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); Offset = 0; } else { Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags); } if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result); else Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (isGlobalRelativeToPICBase(OpFlags)) { Result = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); } // For globals that require a load from a stub to get the address, emit the // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, DAG.getConstant(Offset, dl, PtrVT)); return Result; } SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast(Op)->getGlobal(); int64_t Offset = cast(Op)->getOffset(); return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG); } static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LocalDynamic = false) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR : X86ISD::TLSADDR; if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } else { SDValue Ops[] = { Chain, TGA }; Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. MFI->setAdjustsStack(true); MFI->setHasCalls(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { SDValue InFlag; SDLoc dl(GA); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSGD); } static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool is64Bit) { SDLoc dl(GA); // Get the start address of the TLS block for this module. X86MachineFunctionInfo* MFI = DAG.getMachineFunction() .getInfo(); MFI->incNumLocalDynamicTLSAccesses(); SDValue Base; if (is64Bit) { Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSLD, /*LocalDynamic=*/true); } else { SDValue InFlag; SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSLDM, /*LocalDynamic=*/true); } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations // of Base. // Build x@dtpoff. unsigned char OperandFlags = X86II::MO_DTPOFF; unsigned WrapperKind = X86ISD::Wrapper; SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); // Add x@dtpoff with the base. return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); } // Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model. static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit, bool isPIC) { SDLoc dl(GA); // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), is64Bit ? 257 : 256)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr), false, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is // initialexec. unsigned WrapperKind = X86ISD::Wrapper; if (model == TLSModel::LocalExec) { OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; } else if (model == TLSModel::InitialExec) { if (is64Bit) { OperandFlags = X86II::MO_GOTTPOFF; WrapperKind = X86ISD::WrapperRIP; } else { OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF; } } else { llvm_unreachable("Unexpected model"); } // emit "addl x@ntpoff,%eax" (local exec) // or "addl x@indntpoff,%eax" (initial exec) // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic) SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) { if (isPIC && !is64Bit) { Offset = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Offset); } Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); // Cygwin uses emutls. // FIXME: It may be EmulatedTLS-generic also for X86-Android. if (Subtarget->isTargetWindowsCygwin()) return LowerToTLSEmulatedModel(GA, DAG); const GlobalValue *GV = GA->getGlobal(); auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Subtarget->isTargetELF()) { if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); case TLSModel::LocalDynamic: return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget->is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(), DAG.getTarget().getRelocationModel() == Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } if (Subtarget->isTargetDarwin()) { // Darwin only has one model of TLS. Lower to that. unsigned char OpFlag = 0; unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper; // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) && !Subtarget->is64Bit(); if (PIC32) OpFlag = X86II::MO_TLVP_PIC_BASE; else OpFlag = X86II::MO_TLVP; SDLoc DL(Op); SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag); SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC32, the address is actually $g + Offset. if (PIC32) Offset = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Offset); // Lowering the machine isd will make sure everything is in the right // location. SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL); SDValue Args[] = { Chain, Offset }; Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), DAG.getIntPtrConstant(0, DL, true), SDValue(), DL); // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setAdjustsStack(true); // And our return value (tls address) is in the standard call return value // location. unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); } if (Subtarget->isTargetKnownWindowsMSVC() || Subtarget->isTargetWindowsGNU()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage // ; from TEB // mov ecx, dword [rel _tls_index]: Load index (from C runtime) // mov rcx, qword [rdx+rcx*8] // mov eax, .tls$:tlsvar // [rax+rcx] contains the address // Windows 64bit: gs:0x58 // Windows 32bit: fs:__tls_array SDLoc dl(GA); SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58, dl) : (Subtarget->isTargetWindowsGNU() ? DAG.getIntPtrConstant(0x2C, dl) : DAG.getExternalSymbol("_tls_array", PtrVT)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); SDValue res; if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { res = ThreadPointer; } else { // Load the _tls_index variable SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); if (Subtarget->is64Bit()) IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, MachinePointerInfo(), MVT::i32, false, false, false, 0); else IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo(), false, false, false, 0); auto &DL = DAG.getDataLayout(); SDValue Scale = DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT); IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX); } res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo(), false, false, false, 0); // Get the offset of start of .tls section SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), X86II::MO_SECREL); SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset); } llvm_unreachable("TLS not implemented for this target."); } /// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values /// and take a 2 x i32 value to shift plus a shift amount. static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); MVT VT = Op.getSimpleValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away // during isel. SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits - 1, dl, MVT::i8)); SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, dl, MVT::i8)) : DAG.getConstant(0, dl, VT); SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); } else { Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt); Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); } // If the shift amount is larger or equal than the width of a part we can't // rely on the results of shld/shrd. Insert a test and select the appropriate // values for large shift amounts. SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, dl, MVT::i8)); SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, AndNode, DAG.getConstant(0, dl, MVT::i8)); SDValue Hi, Lo; SDValue CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; if (Op.getOpcode() == ISD::SHL_PARTS) { Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); } else { Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); } SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(SrcVT))); } if (SrcVT.getVectorElementType() == MVT::i1) { MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src)); } return SDValue(); } assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); // These are really Legal; return the operand so the caller accepts it as // Legal. if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) return Op; if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && Subtarget->is64Bit()) { return Op; } SDValue ValueToStore = Op.getOperand(0); if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Chain = DAG.getStore( DAG.getEntryNode(), dl, ValueToStore, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD SDLoc DL(Op); SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue); else Tys = DAG.getVTList(Op.getValueType(), MVT::Other); unsigned ByteSize = SrcVT.getSizeInBits()/8; FrameIndexSDNode *FI = dyn_cast(StackSlot); MachineMemOperand *MMO; if (FI) { int SSFI = FI->getIndex(); MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOLoad, ByteSize, ByteSize); } else { MMO = cast(StackSlot)->getMemOperand(); StackSlot = StackSlot.getOperand(1); } SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL, Tys, Ops, SrcVT, MMO); if (useSSE) { Chain = Result.getValue(1); SDValue InFlag = Result.getValue(2); // FIXME: Currently the FST is flagged to the FILD_FLAG. This // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); unsigned SSFISize = Op.getValueType().getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false); auto PtrVT = getPointerTy(MF.getDataLayout()); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); Tys = DAG.getVTList(MVT::Other); SDValue Ops[] = { Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag }; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOStore, SSFISize, SSFISize); Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, Ops, Op.getValueType(), MMO); Result = DAG.getLoad( Op.getValueType(), DL, Chain, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false, false, false, 0); } return Result; } // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const { // This algorithm is not obvious. Here it is what we're trying to output: /* movq %rax, %xmm0 punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U } subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 } #ifdef __SSE3__ haddpd %xmm0, %xmm0 #else pshufd $0x4e, %xmm0, %xmm1 addpd %xmm1, %xmm0 #endif */ SDLoc dl(Op); LLVMContext *Context = DAG.getContext(); // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector CV1; CV1.push_back( ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, APInt(64, 0x4330000000000000ULL)))); CV1.push_back( ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16); // Load the 64-bit value into an XMM register. SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; if (Subtarget->hasSSE3()) { // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'. Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub); SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32, S2F, 0x4E, DAG); Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, DAG.getBitcast(MVT::v2f64, Shuffle), Sub); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, DAG.getIntPtrConstant(0, dl)); } // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // Load the 32-bit value into an XMM register. SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(0)); // Zero out the upper parts of the register. Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getBitcast(MVT::v2f64, Load), DAG.getIntPtrConstant(0, dl)); // Or the load with the bias. SDValue Or = DAG.getNode( ISD::OR, dl, MVT::v2i64, DAG.getBitcast(MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), DAG.getBitcast(MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); // Subtract the bias. // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. MVT DestVT = Op.getSimpleValueType(); if (DestVT.bitsLT(MVT::f64)) return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, DAG.getIntPtrConstant(0, dl)); if (DestVT.bitsGT(MVT::f64)) return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); // Handle final rounding. return Sub; } static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // The algorithm is the following: // #ifdef __SSE4_1__ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); // #else // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; // uint4 hi = (v >> 16) | (uint4) 0x53000000; // #endif // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); // return (float4) lo + fhi; // We shouldn't use it when unsafe-fp-math is enabled though: we might later // reassociate the two FADDs, and if we do that, the algorithm fails // spectacularly (PR24512). // FIXME: If we ever have some kind of Machine FMF, this should be marked // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because // there's also the MachineCombiner reassociations happening on Machine IR. if (DAG.getTarget().Options.UnsafeFPMath) return SDValue(); SDLoc DL(Op); SDValue V = Op->getOperand(0); MVT VecIntVT = V.getSimpleValueType(); bool Is128 = VecIntVT == MVT::v4i32; MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; // If we convert to something else than the supported type, e.g., to v4f64, // abort early. if (VecFloatVT != Op->getSimpleValueType(0)) return SDValue(); unsigned NumElts = VecIntVT.getVectorNumElements(); assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && "Unsupported custom type"); assert(NumElts <= 8 && "The size of the constant array must be fixed"); // In the #idef/#else code, we have in common: // - The vector of constants: // -- 0x4b000000 // -- 0x53000000 // - A shift: // -- v >> 16 // Create the splat vector for 0x4b000000. SDValue CstLow = DAG.getConstant(0x4b000000, DL, MVT::i32); SDValue CstLowArray[] = {CstLow, CstLow, CstLow, CstLow, CstLow, CstLow, CstLow, CstLow}; SDValue VecCstLow = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, makeArrayRef(&CstLowArray[0], NumElts)); // Create the splat vector for 0x53000000. SDValue CstHigh = DAG.getConstant(0x53000000, DL, MVT::i32); SDValue CstHighArray[] = {CstHigh, CstHigh, CstHigh, CstHigh, CstHigh, CstHigh, CstHigh, CstHigh}; SDValue VecCstHigh = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, makeArrayRef(&CstHighArray[0], NumElts)); // Create the right shift. SDValue CstShift = DAG.getConstant(16, DL, MVT::i32); SDValue CstShiftArray[] = {CstShift, CstShift, CstShift, CstShift, CstShift, CstShift, CstShift, CstShift}; SDValue VecCstShift = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, makeArrayRef(&CstShiftArray[0], NumElts)); SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift); SDValue Low, High; if (Subtarget.hasSSE41()) { MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); } else { SDValue CstMask = DAG.getConstant(0xffff, DL, MVT::i32); SDValue VecCstMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, CstMask, CstMask, CstMask, CstMask); // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask); Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow); // uint4 hi = (v >> 16) | (uint4) 0x53000000; High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh); } // Create the vector constant for -(0x1.0p39f + 0x1.0p23f). SDValue CstFAdd = DAG.getConstantFP( APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), DL, MVT::f32); SDValue CstFAddArray[] = {CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd}; SDValue VecCstFAdd = DAG.getNode(ISD::BUILD_VECTOR, DL, VecFloatVT, makeArrayRef(&CstFAddArray[0], NumElts)); // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); // TODO: Are there any fast-math-flags to propagate here? SDValue FHigh = DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); // return (float4) lo + fhi; SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); MVT SVT = N0.getSimpleValueType(); SDLoc dl(Op); switch (SVT.SimpleTy) { default: llvm_unreachable("Custom UINT_TO_FP is not supported!"); case MVT::v4i8: case MVT::v4i16: case MVT::v8i8: case MVT::v8i16: { MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); } case MVT::v4i32: case MVT::v8i32: return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget); case MVT::v16i8: case MVT::v16i16: assert(Subtarget->hasAVX512()); return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); } } SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Op.getSimpleValueType().isVector()) return lowerUINT_TO_FP_vec(Op, DAG); // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) { // Conversions from unsigned i32 to f32/f64 are legal, // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode. return Op; } if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); if (SrcVT == MVT::i32) { SDValue WordOff = DAG.getConstant(4, dl, PtrVT); SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, MachinePointerInfo(), false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), OffsetSlot, MachinePointerInfo(), false, false, 0); SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); return Fild; } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); SDValue ValueToStore = Op.getOperand(0); if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot, MachinePointerInfo(), false, false, 0); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. This is the same as the optimization in // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, // we must be careful to do the computation in x87 extended precision, not // in SSE. (The generic code can't know it's OK to do this, or how to.) int SSFI = cast(StackSlot)->getIndex(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOLoad, 8, 8); SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); // Check whether the sign bit is set. SDValue SignSet = DAG.getSetCC( dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. SDValue FudgePtr = DAG.getConstantPool( ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, Zero, Four); FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? SDValue Fudge = DAG.getExtLoad( ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, false, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. // TODO: Are there any fast-math-flags to propagate here? SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0, dl)); } // If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation // is legal, or has an fp128 or f16 source (which needs to be promoted to f32), // just return an pair. // Otherwise it is assumed to be a conversion from one of f32, f64 or f80 // to i16, i32 or i64, and we lower it to a legal sequence. // If lowered to the final integer result we return a pair. // Otherwise we lower it to a sequence ending with a FIST, return a // pair, and the caller is responsible for loading // the final integer result from StackSlot. std::pair X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const { SDLoc DL(Op); EVT DstTy = Op.getValueType(); EVT TheVT = Op.getOperand(0).getValueType(); auto PtrVT = getPointerTy(DAG.getDataLayout()); if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) { // f16 must be promoted before using the lowering in this routine. // fp128 does not use this lowering. return std::make_pair(SDValue(), SDValue()); } // If using FIST to compute an unsigned i64, we'll need some fixup // to handle values above the maximum signed i64. A FIST is always // used for the 32-bit subtarget, but also for f80 on a 64-bit target. bool UnsignedFixup = !IsSigned && DstTy == MVT::i64 && (!Subtarget->is64Bit() || !isScalarFPTypeInSSEReg(TheVT)); if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) { // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST. // The low 32 bits of the fist result will have the correct uint32 result. assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); DstTy = MVT::i64; } assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 && "Unknown FP_TO_INT to lower!"); // These are really Legal. if (DstTy == MVT::i32 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); if (Subtarget->is64Bit() && DstTy == MVT::i64 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); // We lower FP->int64 into FISTP64 followed by a load from a temporary // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); unsigned Opc; switch (DstTy.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; } SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment. if (UnsignedFixup) { // // Conversion to unsigned i64 is implemented with a select, // depending on whether the source value fits in the range // of a signed i64. Let Thresh be the FP equivalent of // 0x8000000000000000ULL. // // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000; // FistSrc = (Value < Thresh) ? Value : (Value - Thresh); // Fist-to-mem64 FistSrc // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent // to XOR'ing the high 32 bits with Adjust. // // Being a power of 2, Thresh is exactly representable in all FP formats. // For X87 we'd like to use the smallest FP type for this constant, but // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000)); LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &LosesInfo); else if (TheVT == MVT::f80) Status = Thresh.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, &LosesInfo); assert(Status == APFloat::opOK && !LosesInfo && "FP conversion should have been exact"); SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); SDValue Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT), Value, ThreshVal, ISD::SETLT); Adjust = DAG.getSelect(DL, MVT::i32, Cmp, DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(0x80000000, DL, MVT::i32)); SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal); Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT), Value, ThreshVal, ISD::SETLT); Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); } // FIXME This causes a redundant load/store if the SSE-class value is already // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, DL, Value, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI), false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(TheVT) }; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOLoad, MemSize, MemSize); Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, PtrVT); } MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, MemSize, MemSize); if (UnsignedFixup) { // Insert the FIST, load its result as two i32's, // and XOR the high i32 with Adjust. SDValue FistOps[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), FistOps, DstTy, MMO); SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo(), false, false, false, 0); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot, DAG.getConstant(4, DL, PtrVT)); SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo(), false, false, false, 0); High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust); if (Subtarget->is64Bit()) { // Join High32 and Low32 into a 64-bit result. // (High32 << 32) | Low32 Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32); High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32); High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32, DAG.getConstant(32, DL, MVT::i8)); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32); return std::make_pair(Result, SDValue()); } SDValue ResultOps[] = { Low32, High32 }; SDValue pair = IsReplace ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps) : DAG.getMergeValues(ResultOps, DL); return std::make_pair(pair, SDValue()); } else { // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO); return std::make_pair(FIST, StackSlot); } } static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In); // Optimize vectors in AVX mode: // // v8i16 -> v8i32 // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. // Concat upper and lower parts. // // v4i32 -> v4i64 // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. // Concat upper and lower parts. // if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) && ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) && ((VT != MVT::v4i64) || (InVT != MVT::v4i32))) return SDValue(); if (Subtarget->hasInt256()) return DAG.getNode(X86ISD::VZEXT, dl, VT, In); SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); SDValue Undef = DAG.getUNDEF(InVT); bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND; SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); MVT HVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getBitcast(HVT, OpLo); OpHi = DAG.getBitcast(HVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc DL(Op); unsigned int NumElts = VT.getVectorNumElements(); if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI()) return SDValue(); if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) return DAG.getNode(X86ISD::VZEXT, DL, VT, In); assert(InVT.getVectorElementType() == MVT::i1); MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32; SDValue One = DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT); SDValue Zero = DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); if (VT.is512BitVector()) return V; return DAG.getNode(X86ISD::VTRUNC, DL, VT, V); } static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { if (Subtarget->hasFp256()) if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; return SDValue(); } static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG); if (Subtarget->hasFp256()) if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; assert(!VT.is256BitVector() || !SVT.is128BitVector() || VT.getVectorNumElements() != SVT.getVectorNumElements()); return SDValue(); } static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); assert(VT.getVectorElementType() == MVT::i1 && "Unexected vector type."); // Shift LSB to MSB and use VPMOVB2M - SKX. unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M ((InVT.is256BitVector() || InVT.is128BitVector()) && InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() && Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M // Shift packed bytes not supported natively, bitcast to dword MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT, DAG.getBitcast(ExtVT, In), DAG.getConstant(ShiftInx, DL, ExtVT)); ShiftNode = DAG.getBitcast(InVT, ShiftNode); return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode); } if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M ((InVT.is256BitVector() || InVT.is128BitVector()) && InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() && Subtarget->hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode); } // Shift LSB to MSB, extend if necessary and use TESTM. unsigned NumElts = InVT.getVectorNumElements(); if (InVT.getSizeInBits() < 512 && (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 || !Subtarget->hasVLX())) { assert((NumElts == 8 || NumElts == 16) && "Unexected vector type."); // TESTD/Q should be used (if BW supported we use CVT2MASK above), // so vector should be extended to packed dword/qword. MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); InVT = ExtVT; ShiftInx = InVT.getScalarSizeInBits() - 1; } SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode); } SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); if (VT == MVT::i1) { assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) && "Invalid scalar TRUNCATE operation"); if (InVT.getSizeInBits() >= 32) return SDValue(); In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In); return DAG.getNode(ISD::TRUNCATE, DL, VT, In); } assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); if (VT.getVectorElementType() == MVT::i1) return LowerTruncateVecI1(Op, DAG, Subtarget); // vpmovqb/w/d, vpmovdb/w, vpmovwb if (Subtarget->hasAVX512()) { // word to byte only under BWI if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8 return DAG.getNode(X86ISD::VTRUNC, DL, VT, DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In)); return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); } if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, DAG.getIntPtrConstant(0, DL)); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(2, DL)); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); static const int ShufMask[] = {0, 2, 4, 6}; return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); } if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomed PSHUFB. if (Subtarget->hasInt256()) { In = DAG.getBitcast(MVT::v32i8, In); SmallVector pshufbMask; for (unsigned i = 0; i < 2; ++i) { pshufbMask.push_back(DAG.getConstant(0x0, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0x1, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0x4, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0x5, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0x8, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0x9, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0xc, DL, MVT::i8)); pshufbMask.push_back(DAG.getConstant(0xd, DL, MVT::i8)); for (unsigned j = 0; j < 8; ++j) pshufbMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask); In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); In = DAG.getBitcast(MVT::v4i64, In); static const int ShufMask[] = {0, 2, -1, -1}; In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), &ShufMask[0]); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); return DAG.getBitcast(VT, In); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(4, DL)); OpLo = DAG.getBitcast(MVT::v16i8, OpLo); OpHi = DAG.getBitcast(MVT::v16i8, OpHi); // The PSHUFB mask: static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}; SDValue Undef = DAG.getUNDEF(MVT::v16i8); OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1); OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); // The MOVLHPS Mask: static const int ShufMask2[] = {0, 1, 4, 5}; SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); return DAG.getBitcast(MVT::v8i16, res); } // Handle truncation of V256 to V128 using shuffles. if (!VT.is128BitVector() || !InVT.is256BitVector()) return SDValue(); assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2); SmallVector MaskVec(NumElems * 2, -1); // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) MaskVec[i] = i * 2; SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In), DAG.getUNDEF(NVT), &MaskVec[0]); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, DAG.getIntPtrConstant(0, DL)); } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { assert(!Op.getSimpleValueType().isVector()); std::pair Vals = FP_TO_INTHelper(Op, DAG, /*IsSigned=*/ true, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. if (!FIST.getNode()) return Op; if (StackSlot.getNode()) // Load the result. return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MachinePointerInfo(), false, false, false, 0); // The node is the result. return FIST; } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { std::pair Vals = FP_TO_INTHelper(Op, DAG, /*IsSigned=*/ false, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. if (!FIST.getNode()) return Op; if (StackSlot.getNode()) // Load the result. return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MachinePointerInfo(), false, false, false, 0); // The node is the result. return FIST; } static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT))); } /// The only differences between FABS and FNEG are the mask and the logic op. /// FNEG also has a folding opportunity for FNEG(FABS(x)). static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && "Wrong opcode for lowering FABS or FNEG."); bool IsFABS = (Op.getOpcode() == ISD::FABS); // If this is a FABS and it has an FNEG user, bail out to fold the combination // into an FNABS. We'll lower the FABS after that if it is still in use. if (IsFABS) for (SDNode *User : Op->uses()) if (User->getOpcode() == ISD::FNEG) return Op; SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); bool IsF128 = (VT == MVT::f128); // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to // decide if we should generate a 16-byte constant mask when we only need 4 or // 8 bytes for the scalar case. MVT LogicVT; MVT EltVT; unsigned NumElts; if (VT.isVector()) { LogicVT = VT; EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); } else if (IsF128) { // SSE instructions are used for optimized f128 logical operations. LogicVT = MVT::f128; EltVT = VT; NumElts = 1; } else { // There are no scalar bitwise logical SSE/AVX instructions, so we // generate a 16-byte vector constant and logic op even for the scalar case. // Using a 16-byte mask allows folding the load of the mask with // the logic op, so it can save (~4 bytes) on code size. LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; EltVT = VT; NumElts = (VT == MVT::f64) ? 2 : 4; } unsigned EltBits = EltVT.getSizeInBits(); LLVMContext *Context = DAG.getContext(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); Constant *C = ConstantInt::get(*Context, MaskElt); C = ConstantVector::getSplat(NumElts, C); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, Alignment); SDValue Op0 = Op.getOperand(0); bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); unsigned LogicOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; if (VT.isVector() || IsF128) return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); // For the scalar case extend to a 128-bit vector, perform the logic op, // and extract the scalar result back out. Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand); SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode, DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); LLVMContext *Context = DAG.getContext(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT SrcVT = Op1.getSimpleValueType(); bool IsF128 = (VT == MVT::f128); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1); SrcVT = VT; } // And if it is bigger, shrink it first. if (SrcVT.bitsGT(VT)) { Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1, dl)); SrcVT = VT; } // At this point the operands and the result should have the same // type, and that won't be f80 since that is not custom lowered. assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) && "Unexpected type in LowerFCOPYSIGN"); const fltSemantics &Sem = VT == MVT::f64 ? APFloat::IEEEdouble : (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle); const unsigned SizeInBits = VT.getSizeInBits(); SmallVector CV( VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4), ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0)))); // First, clear all bits but the sign bit from the second operand (sign). CV[0] = ConstantFP::get(*Context, APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1))); Constant *C = ConstantVector::get(CV); auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16); // Perform all logic operations as 16-byte vectors because there are no // scalar FP logic instructions in SSE. This allows load folding of the // constants into the logic instructions. MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32); SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); if (!IsF128) Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1); // Next, clear the sign bit from the first operand (magnitude). // If it's a constant, we can clear it here. if (ConstantFPSDNode *Op0CN = dyn_cast(Op0)) { APFloat APF = Op0CN->getValueAPF(); // If the magnitude is a positive zero, the sign bit alone is enough. if (APF.isPosZero()) return IsF128 ? SignBit : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, DAG.getIntPtrConstant(0, dl)); APF.clearSign(); CV[0] = ConstantFP::get(*Context, APF); } else { CV[0] = ConstantFP::get( *Context, APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1))); } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, PtrVT, 16); SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); // If the magnitude operand wasn't a constant, we need to AND out the sign. if (!isa(Op0)) { if (!IsF128) Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val); } // OR the magnitude value with the sign bit. Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit); return IsF128 ? Val : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1). SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0, DAG.getConstant(1, dl, VT)); return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, dl, VT)); } // Check whether an OR'd tree is PTEST-able. static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); if (!Subtarget->hasSSE41()) return SDValue(); if (!Op->hasOneUse()) return SDValue(); SDNode *N = Op.getNode(); SDLoc DL(N); SmallVector Opnds; DenseMap VecInMap; SmallVector VecIns; EVT VT = MVT::Other; // Recognize a special case where a vector is casted into wide integer to // test all 0s. Opnds.push_back(N->getOperand(0)); Opnds.push_back(N->getOperand(1)); for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { SmallVectorImpl::const_iterator I = Opnds.begin() + Slot; // BFS traverse all OR'd operands. if (I->getOpcode() == ISD::OR) { Opnds.push_back(I->getOperand(0)); Opnds.push_back(I->getOperand(1)); // Re-evaluate the number of nodes to be traversed. e += 2; // 2 more nodes (LHS and RHS) are pushed. continue; } // Quit if a non-EXTRACT_VECTOR_ELT if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); // Quit if without a constant index. SDValue Idx = I->getOperand(1); if (!isa(Idx)) return SDValue(); SDValue ExtractedFromVec = I->getOperand(0); DenseMap::iterator M = VecInMap.find(ExtractedFromVec); if (M == VecInMap.end()) { VT = ExtractedFromVec.getValueType(); // Quit if not 128/256-bit vector. if (!VT.is128BitVector() && !VT.is256BitVector()) return SDValue(); // Quit if not the same type. if (VecInMap.begin() != VecInMap.end() && VT != VecInMap.begin()->first.getValueType()) return SDValue(); M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first; VecIns.push_back(ExtractedFromVec); } M->second |= 1U << cast(Idx)->getZExtValue(); } assert((VT.is128BitVector() || VT.is256BitVector()) && "Not extracted from 128-/256-bit vector."); unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U; for (DenseMap::const_iterator I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) { // Quit if not all elements are used. if (I->second != FullMask) return SDValue(); } MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; // Cast all vectors into TestVT for PTEST. for (unsigned i = 0, e = VecIns.size(); i < e; ++i) VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]); // If more than one full vectors are evaluated, OR them first before PTEST. for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) { // Each iteration will OR 2 nodes and append the result until there is only // 1 node left, i.e. the final OR'd value of all vectors. SDValue LHS = VecIns[Slot]; SDValue RHS = VecIns[Slot + 1]; VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS)); } return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back()); } /// \brief return true if \c Op has a use that doesn't just read flags. static bool hasNonFlagsUse(SDValue Op) { for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; ++UI) { SDNode *User = *UI; unsigned UOpNo = UI.getOperandNo(); if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { // Look pass truncate. UOpNo = User->use_begin().getOperandNo(); User = *User->use_begin(); } if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) return true; } return false; } /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) { SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp, DAG.getConstant(0, dl, MVT::i8)); } // CF and OF aren't always set the way we want. Determine which // of these we need. bool NeedCF = false; bool NeedOF = false; switch (X86CC) { default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: NeedCF = true; break; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: case X86::COND_O: case X86::COND_NO: { // Check if we really need to set the // Overflow flag. If NoSignedWrap is present // that is not actually needed. switch (Op->getOpcode()) { case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::SHL: { const auto *BinNode = cast(Op.getNode()); if (BinNode->Flags.hasNoSignedWrap()) break; } default: NeedOF = true; break; } break; } } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. if (Op.getResNo() != 0 || NeedOF || NeedCF) { // Emit a CMP with 0, which is the TEST pattern. //if (Op.getValueType() == MVT::i1) // return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op, // DAG.getConstant(0, MVT::i1)); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); } unsigned Opcode = 0; unsigned NumOperands = 0; // Truncate operations may prevent the merge of the SETCC instruction // and the arithmetic instruction before it. Attempt to truncate the operands // of the arithmetic instruction and use a reduced bit-width instruction. bool NeedTruncation = false; SDValue ArithOp = Op; if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) { SDValue Arith = Op->getOperand(0); // Both the trunc and the arithmetic op need to have one user each. if (Arith->hasOneUse()) switch (Arith.getOpcode()) { default: break; case ISD::ADD: case ISD::SUB: case ISD::AND: case ISD::OR: case ISD::XOR: { NeedTruncation = true; ArithOp = Arith; } } } // NOTICE: In the code below we use ArithOp to hold the arithmetic operation // which may be the result of a CAST. We use the variable 'Op', which is the // non-casted variable when we check for possible users. switch (ArithOp.getOpcode()) { case ISD::ADD: // Due to an isel shortcoming, be conservative if this add is likely to be // selected as part of a load-modify-store instruction. When the root node // in a match is a store, isel doesn't know how to remap non-chain non-flag // uses of other nodes in the match, such as the ADD in this case. This // leads to the ADD being left around and reselected, with the result being // two adds in the output. Alas, even if none our users are stores, that // doesn't prove we're O.K. Ergo, if we have any parents that aren't // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require // climbing the DAG back to the root, and it doesn't seem to be worth the // effort. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC && UI->getOpcode() != ISD::STORE) goto default_case; if (ConstantSDNode *C = dyn_cast(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. if (C->isOne() && !Subtarget->slowIncDec()) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. if (C->isAllOnesValue() && !Subtarget->slowIncDec()) { Opcode = X86ISD::DEC; NumOperands = 1; break; } } // Otherwise use a regular EFLAGS-setting add. Opcode = X86ISD::ADD; NumOperands = 2; break; case ISD::SHL: case ISD::SRL: // If we have a constant logical shift that's only used in a comparison // against zero turn it into an equivalent AND. This allows turning it into // a TEST instruction later. if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() && isa(Op->getOperand(1)) && !hasNonFlagsUse(Op)) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); unsigned ShAmt = Op->getConstantOperandVal(1); if (ShAmt >= BitWidth) // Avoid undefined shifts. break; APInt Mask = ArithOp.getOpcode() == ISD::SRL ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt) : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt); if (!Mask.isSignedIntN(32)) // Avoid large immediates. break; SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0), DAG.getConstant(Mask, dl, VT)); DAG.ReplaceAllUsesWith(Op, New); Op = New; } break; case ISD::AND: // If the primary and result isn't used, don't bother using X86ISD::AND, // because a TEST instruction will be better. if (!hasNonFlagsUse(Op)) break; // FALL THROUGH case ISD::SUB: case ISD::OR: case ISD::XOR: // Due to the ISEL shortcoming noted above, be conservative if this op is // likely to be selected as part of a load-modify-store instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() == ISD::STORE) goto default_case; // Otherwise use a regular EFLAGS-setting instruction. switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; case ISD::OR: { if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG); if (EFLAGS.getNode()) return EFLAGS; } Opcode = X86ISD::OR; break; } } NumOperands = 2; break; case X86ISD::ADD: case X86ISD::SUB: case X86ISD::INC: case X86ISD::DEC: case X86ISD::OR: case X86ISD::XOR: case X86ISD::AND: return SDValue(Op.getNode(), 1); default: default_case: break; } // If we found that truncation is beneficial, perform the truncation and // update 'Op'. if (NeedTruncation) { EVT VT = Op.getValueType(); SDValue WideVal = Op->getOperand(0); EVT WideVT = WideVal.getValueType(); unsigned ConvertedOp = 0; // Use a target machine opcode to prevent further DAGCombine // optimizations that may separate the arithmetic operations // from the setcc node. switch (WideVal.getOpcode()) { default: break; case ISD::ADD: ConvertedOp = X86ISD::ADD; break; case ISD::SUB: ConvertedOp = X86ISD::SUB; break; case ISD::AND: ConvertedOp = X86ISD::AND; break; case ISD::OR: ConvertedOp = X86ISD::OR; break; case ISD::XOR: ConvertedOp = X86ISD::XOR; break; } if (ConvertedOp) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) { SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0)); SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1)); Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1); } } } if (Opcode == 0) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector Ops(Op->op_begin(), Op->op_begin() + NumOperands); SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); DAG.ReplaceAllUsesWith(Op, New); return SDValue(New.getNode(), 1); } /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const { if (isNullConstant(Op1)) return EmitTest(Op0, X86CC, dl, DAG); assert(!(isa(Op1) && Op0.getValueType() == MVT::i1) && "Unexpected comparison operation for MVT::i1 operands"); if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { // Do the comparison at i32 if it's smaller, besides the Atom case. // This avoids subregister aliasing issues. Keep the smaller reference // if we're optimizing for size, however, as that'll allow better folding // of memory operations. if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 && !DAG.getMachineFunction().getFunction()->optForMinSize() && !Subtarget->isAtom()) { unsigned ExtendOp = isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0); Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1); } // Use SUB instead of CMP to enable CSE between SUB and CMP. SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); return SDValue(Sub.getNode(), 1); } return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } /// Convert a comparison if required by the subtarget. SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const { // If the subtarget does not support the FUCOMI instruction, floating-point // comparisons have to be converted. if (Subtarget->hasCMov() || Cmp.getOpcode() != X86ISD::CMP || !Cmp.getOperand(0).getValueType().isFloatingPoint() || !Cmp.getOperand(1).getValueType().isFloatingPoint()) return Cmp; // The instruction selector will select an FUCOM instruction instead of // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence // build an SDNode sequence that transfers the result from FPSW into EFLAGS: // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8)))) SDLoc dl(Cmp); SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, DAG.getConstant(8, dl, MVT::i8)); SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); // Some 64-bit targets lack SAHF support, but they do support FCOMI. assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Op.getValueType(); const char *RecipOp; // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision // rsqrt estimate with refinement on x86 prior to FMA requires at least 16 // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. if (VT == MVT::f32 && Subtarget->hasSSE1()) RecipOp = "sqrtf"; else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || (VT == MVT::v8f32 && Subtarget->hasAVX())) RecipOp = "vec-sqrtf"; else return SDValue(); TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; if (!Recips.isEnabled(RecipOp)) return SDValue(); RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = false; return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getRecipEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { EVT VT = Op.getValueType(); const char *RecipOp; // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision // reciprocal estimate with refinement on x86 prior to FMA requires // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. if (VT == MVT::f32 && Subtarget->hasSSE1()) RecipOp = "divf"; else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || (VT == MVT::v8f32 && Subtarget->hasAVX())) RecipOp = "vec-divf"; else return SDValue(); TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; if (!Recips.isEnabled(RecipOp)) return SDValue(); RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); } /// If we have at least two divisions that use the same divisor, convert to /// multplication by a reciprocal. This may need to be adjusted for a given /// CPU if a division's cost is not at least twice the cost of a multiplication. /// This is because we still need one division to calculate the reciprocal and /// then we need two multiplies by that reciprocal as replacements for the /// original divisions. unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG) const { SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::TRUNCATE) Op1 = Op1.getOperand(0); SDValue LHS, RHS; if (Op1.getOpcode() == ISD::SHL) std::swap(Op0, Op1); if (Op0.getOpcode() == ISD::SHL) { if (isOneConstant(Op0.getOperand(0))) { // If we looked past a truncate, check that it's only truncating away // known zeros. unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { APInt Zeros, Ones; DAG.computeKnownBits(Op0, Zeros, Ones); if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) return SDValue(); } LHS = Op1; RHS = Op0.getOperand(1); } } else if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *AndRHS = cast(Op1); uint64_t AndRHSVal = AndRHS->getZExtValue(); SDValue AndLHS = Op0; if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); } // Use BT if the immediate can't be encoded in a TEST instruction. if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) { LHS = AndLHS; RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType()); } } if (LHS.getNode()) { // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know // that doing a bittest on the i32 value is ok. We extend to i32 because // the encoding for the i16 version is larger than the i32 version. // Also promote i16 to i32 for performance / code size reason. if (LHS.getValueType() == MVT::i8 || LHS.getValueType() == MVT::i16) LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); // If the operand types disagree, extend the shift amount to match. Since // BT ignores high bits (like shifts) we can use anyextend. if (LHS.getValueType() != RHS.getValueType()) RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(Cond, dl, MVT::i8), BT); } return SDValue(); } /// \brief - Turns an ISD::CondCode into a value suitable for SSE floating point /// mask CMPs. static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, SDValue &Op1) { unsigned SSECC; bool Swap = false; // SSE Condition code mapping: // 0 - EQ // 1 - LT // 2 - LE // 3 - UNORD // 4 - NEQ // 5 - NLT // 6 - NLE // 7 - ORD switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETOEQ: case ISD::SETEQ: SSECC = 0; break; case ISD::SETOGT: case ISD::SETGT: Swap = true; // Fallthrough case ISD::SETLT: case ISD::SETOLT: SSECC = 1; break; case ISD::SETOGE: case ISD::SETGE: Swap = true; // Fallthrough case ISD::SETLE: case ISD::SETOLE: SSECC = 2; break; case ISD::SETUO: SSECC = 3; break; case ISD::SETUNE: case ISD::SETNE: SSECC = 4; break; case ISD::SETULE: Swap = true; // Fallthrough case ISD::SETUGE: SSECC = 5; break; case ISD::SETULT: Swap = true; // Fallthrough case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; case ISD::SETUEQ: case ISD::SETONE: SSECC = 8; break; } if (Swap) std::swap(Op0, Op1); return SSECC; } // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 // ones, and then concatenate the result back. static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); SDLoc dl(Op); SDValue CC = Op.getOperand(2); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl); SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl); SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); // Issue the operation on the smaller types and concatenate the result back MVT EltVT = VT.getVectorElementType(); MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 && "Unexpected type for boolean compare operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0, DAG.getConstant(-1, dl, VT)); SDValue NotOp1 = DAG.getNode(ISD::XOR, dl, VT, Op1, DAG.getConstant(-1, dl, VT)); switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETEQ: // (x == y) -> ~(x ^ y) return DAG.getNode(ISD::XOR, dl, VT, DAG.getNode(ISD::XOR, dl, VT, Op0, Op1), DAG.getConstant(-1, dl, VT)); case ISD::SETNE: // (x != y) -> (x ^ y) return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1); case ISD::SETUGT: case ISD::SETGT: // (x > y) -> (x & ~y) return DAG.getNode(ISD::AND, dl, VT, Op0, NotOp1); case ISD::SETULT: case ISD::SETLT: // (x < y) -> (~x & y) return DAG.getNode(ISD::AND, dl, VT, NotOp0, Op1); case ISD::SETULE: case ISD::SETLE: // (x <= y) -> (~x | y) return DAG.getNode(ISD::OR, dl, VT, NotOp0, Op1); case ISD::SETUGE: case ISD::SETGE: // (x >=y) -> (x | ~y) return DAG.getNode(ISD::OR, dl, VT, Op0, NotOp1); } } static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 && Op.getSimpleValueType().getVectorElementType() == MVT::i1 && "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); unsigned Opc = 0; bool Unsigned = false; bool Swap = false; unsigned SSECC; switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: SSECC = 4; break; case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break; case ISD::SETUGT: SSECC = 6; Unsigned = true; break; case ISD::SETLT: Swap = true; //fall-through case ISD::SETGT: Opc = X86ISD::PCMPGTM; break; case ISD::SETULT: SSECC = 1; Unsigned = true; break; case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap case ISD::SETULE: Unsigned = true; //fall-through case ISD::SETLE: SSECC = 2; break; } if (Swap) std::swap(Op0, Op1); if (Opc) return DAG.getNode(Opc, dl, VT, Op0, Op1); Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, dl, MVT::i8)); } /// \brief Try to turn a VSETULT into a VSETULE by modifying its second /// operand \p Op1. If non-trivial (for example because it's not constant) /// return an empty value. static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) { BuildVectorSDNode *BV = dyn_cast(Op1.getNode()); if (!BV) return SDValue(); MVT VT = Op1.getSimpleValueType(); MVT EVT = VT.getVectorElementType(); unsigned n = VT.getVectorNumElements(); SmallVector ULTOp1; for (unsigned i = 0; i < n; ++i) { ConstantSDNode *Elt = dyn_cast(BV->getOperand(i)); if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT) return SDValue(); // Avoid underflow. APInt Val = Elt->getAPIntValue(); if (Val == 0) return SDValue(); ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1); } static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); if (isFP) { #ifndef NDEBUG MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); unsigned Opc = X86ISD::CMPP; if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); Opc = X86ISD::CMPM; } // In the two special cases we can't handle, emit two comparisons. if (SSECC == 8) { unsigned CC0, CC1; unsigned CombineOpc; if (SetCCOpcode == ISD::SETUEQ) { CC0 = 3; CC1 = 0; CombineOpc = ISD::OR; } else { assert(SetCCOpcode == ISD::SETONE); CC0 = 7; CC1 = 4; CombineOpc = ISD::AND; } SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC0, dl, MVT::i8)); SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC1, dl, MVT::i8)); return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } // Handle all other FP comparisons here. return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, dl, MVT::i8)); } MVT VTOp0 = Op0.getSimpleValueType(); assert(VTOp0 == Op1.getSimpleValueType() && "Expected operands with same type!"); assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && "Invalid number of packed elements for source and destination!"); if (VT.is128BitVector() && VTOp0.is256BitVector()) { // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type // legalizer to a wider vector type. In the case of 'vsetcc' nodes, the // legalizer firstly checks if the first operand in input to the setcc has // a legal type. If so, then it promotes the return type to that same type. // Otherwise, the return type is promoted to the 'next legal type' which, // for a vector of MVT::i1 is always a 128-bit integer vector type. // // We reach this code only if the following two conditions are met: // 1. Both return type and operand type have been promoted to wider types // by the type legalizer. // 2. The original operand type has been promoted to a 256-bit vector. // // Note that condition 2. only applies for AVX targets. SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); return DAG.getZExtOrTrunc(NewOp, dl, VT); } // The non-AVX512 code below works under the assumption that source and // destination types are the same. assert((Subtarget->hasAVX512() || (VT == VTOp0)) && "Value types for source and destination must be the same!"); // Break 256-bit integer vector compare into smaller ones. if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntVSETCC(Op, DAG); MVT OpVT = Op1.getSimpleValueType(); if (OpVT.getVectorElementType() == MVT::i1) return LowerBoolVSETCC_AVX512(Op, DAG); bool MaskResult = (VT.getVectorElementType() == MVT::i1); if (Subtarget->hasAVX512()) { if (Op1.getSimpleValueType().is512BitVector() || (Subtarget->hasBWI() && Subtarget->hasVLX()) || (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) return LowerIntVSETCC_AVX512(Op, DAG, Subtarget); // In AVX-512 architecture setcc returns mask with i1 elements, // But there is no compare instruction for i8 and i16 elements in KNL. // We are not talking about 512-bit operands in this case, these // types are illegal. if (MaskResult && (OpVT.getVectorElementType().getSizeInBits() < 32 && OpVT.getVectorElementType().getSizeInBits() >= 8)) return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC)); } // Lower using XOP integer comparisons. if ((VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: case ISD::SETLT: CmpMode = 0x00; break; case ISD::SETULE: case ISD::SETLE: CmpMode = 0x01; break; case ISD::SETUGT: case ISD::SETGT: CmpMode = 0x02; break; case ISD::SETUGE: case ISD::SETGE: CmpMode = 0x03; break; case ISD::SETEQ: CmpMode = 0x04; break; case ISD::SETNE: CmpMode = 0x05; break; } // Are we comparing unsigned or signed integers? unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CmpMode, dl, MVT::i8)); } // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. unsigned Opc; bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; bool Subus = false; switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; case ISD::SETGT: Opc = X86ISD::PCMPGT; break; case ISD::SETGE: Swap = true; case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break; case ISD::SETULT: Swap = true; case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break; case ISD::SETUGE: Swap = true; case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; } // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); bool hasMinMax = (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || (Subtarget->hasSSE2() && (VET == MVT::i8)); if (hasMinMax) { switch (SetCCOpcode) { default: break; case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break; case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break; } if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } } bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); if (!MinMax && hasSubus) { // As another special case, use PSUBUS[BW] when it's profitable. E.g. for // Op0 u<= Op1: // t = psubus Op0, Op1 // pcmpeq t, <0..0> switch (SetCCOpcode) { default: break; case ISD::SETULT: { // If the comparison is against a constant we can turn this into a // setule. With psubus, setule does not require a swap. This is // beneficial because the constant in the register is no longer // destructed as the destination so it can be hoisted out of a loop. // Only do this pre-AVX since vpcmp* is no longer destructive. if (Subtarget->hasAVX()) break; SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG); if (ULEOp1.getNode()) { Op1 = ULEOp1; Subus = true; Invert = false; Swap = false; } break; } // Psubus is better than flip-sign because it requires no inversion. case ISD::SETUGE: Subus = true; Invert = false; Swap = true; break; case ISD::SETULE: Subus = true; Invert = false; Swap = false; break; } if (Subus) { Opc = X86ISD::SUBUS; FlipSigns = false; } } if (Swap) std::swap(Op0, Op1); // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). if (VT == MVT::v2i64) { if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) { assert(Subtarget->hasSSE2() && "Don't know how to lower!"); // First cast everything to the right type. Op0 = DAG.getBitcast(MVT::v4i32, Op0); Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower // compare is always unsigned. SDValue SB; if (FlipSigns) { SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32); } else { SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32); SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32); SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Sign, Zero, Sign, Zero); } Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB); Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB); // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2)) SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); // Create masks for only the low parts/high parts of the 64 bit integers. static const int MaskHi[] = { 1, 1, 3, 3 }; static const int MaskLo[] = { 0, 0, 2, 2 }; SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); return DAG.getBitcast(VT, Result); } if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with // pcmpeqd + pshufd + pand. assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type. Op0 = DAG.getBitcast(MVT::v4i32, Op0); Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Do the compare. SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); // Make sure the lower and upper halves are both all-ones. static const int Mask[] = { 1, 0, 3, 2 }; SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); return DAG.getBitcast(VT, Result); } } // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, VT); Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); // If the logical-not of the result is required, perform that now. if (Invert) Result = DAG.getNOT(dl, Result, VT); if (MinMax) Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); if (Subus) Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, getZeroVector(VT, Subtarget, DAG, dl)); return Result; } SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1)) && "SetCC type must be 8-bit or 1-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); ISD::CondCode CC = cast(Op.getOperand(2))->get(); // Optimize to BT if possible. // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) { if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC); return NewSetCC; } } // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if ((isOneConstant(Op1) || isNullConstant(Op1)) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // If the input is a setcc, then reuse the input setcc or use a new one with // the inverted condition. if (Op0.getOpcode() == X86ISD::SETCC) { X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); if (!Invert) return Op0; CCode = X86::GetOppositeBranchCondition(CCode); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(CCode, dl, MVT::i8), Op0.getOperand(1)); if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC); return SetCC; } } if ((Op0.getValueType() == MVT::i1) && isOneConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true); return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, dl, MVT::i1), NewCC); } bool isFP = Op1.getSimpleValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, dl, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) return SDValue(); SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, dl, MVT::i8), EFLAGS); if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC); return SetCC; } SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); assert(Carry.getOpcode() != ISD::CARRY_FALSE); SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry); SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1)); if (Op.getSimpleValueType() == MVT::i1) return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); return SetCC; } // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || Opc == X86ISD::SAHF) return true; if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) return true; if (Op.getResNo() == 2 && Opc == X86ISD::UMUL) return true; return false; } static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { if (V.getOpcode() != ISD::TRUNCATE) return false; SDValue VOp0 = V.getOperand(0); unsigned InBits = VOp0.getValueSizeInBits(); unsigned Bits = V.getValueSizeInBits(); return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); } SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); SDLoc DL(Op); MVT VT = Op1.getSimpleValueType(); SDValue CC; // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops // are available or VBLENDV if AVX is available. // Otherwise FP cmovs get lowered into a less efficient branch sequence later. if (Cond.getOpcode() == ISD::SETCC && ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) || (Subtarget->hasSSE1() && VT == MVT::f32)) && VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); int SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (SSECC != 8) { if (Subtarget->hasAVX512()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2); } SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); // If we have AVX, we can use a variable vector select (VBLENDV) instead // of 3 logic instructions for size savings and potentially speed. // Unfortunately, there is no scalar form of VBLENDV. // If either operand is a constant, don't try this. We can expect to // optimize away at least one of the logic instructions later in that // case, so that sequence would be faster than a variable blend. // BLENDV was introduced with SSE 4.1, but the 2 register form implicitly // uses XMM0 as the selection register. That may need just as many // instructions as the AND/ANDN/OR sequence due to register moves, so // don't bother. if (Subtarget->hasAVX() && !isa(Op1) && !isa(Op2)) { // Convert to vectors, do a VSELECT, and convert back to scalar. // All of the conversions should be optimized away. MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel, DAG.getIntPtrConstant(0, DL)); } SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); } } if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) Op1Scalar = ConvertI1VectorToInteger(Op1, DAG); else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0)) Op1Scalar = Op1.getOperand(0); SDValue Op2Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode())) Op2Scalar = ConvertI1VectorToInteger(Op2, DAG); else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { SDValue newSelect = DAG.getNode(ISD::SELECT, DL, Op1Scalar.getValueType(), Cond, Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, newSelect); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec, DAG.getIntPtrConstant(0, DL)); } } if (VT == MVT::v4i1 || VT == MVT::v2i1) { SDValue zeroConst = DAG.getIntPtrConstant(0, DL); Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, DAG.getUNDEF(MVT::v8i1), Op1, zeroConst); Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, DAG.getUNDEF(MVT::v8i1), Op2, zeroConst); SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::v8i1, Cond, Op1, Op2); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); } if (Cond.getOpcode() == ISD::SETCC) { SDValue NewCond = LowerSETCC(Cond, DAG); if (NewCond.getNode()) Cond = NewCond; } // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); unsigned CondCode =cast(Cond.getOperand(0))->getZExtValue(); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; SDValue CmpOp0 = Cmp.getOperand(0); // Apply further optimizations for special cases // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb if (isNullConstant(Y) && (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, DAG.getConstant(0, DL, CmpOp0.getValueType()), CmpOp0); SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), SDValue(Neg.getNode(), 1)); return Res; } Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); Cmp = ConvertCmpIfNecessary(Cmp, DAG); SDValue Res = // Res = 0 or -1. DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); if (!isNullConstant(Op2)) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; } } // Look past (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && isOneConstant(Cond.getOperand(1))) Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. unsigned CondOpcode = Cond.getOpcode(); if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); MVT VT = Op.getSimpleValueType(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT)) // FPStack? IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME Cond = Cmp; addTest = false; } } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && Cond.getOperand(0).getValueType() != MVT::i8)) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); unsigned X86Opcode; unsigned X86Cond; SDVTList VTs; switch (CondOpcode) { case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; default: llvm_unreachable("unexpected overflowing operator"); } if (CondOpcode == ISD::UMULO) VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32); else VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS); if (CondOpcode == ISD::UMULO) Cond = X86Op.getValue(2); else Cond = X86Op.getValue(1); CC = DAG.getConstant(X86Cond, DL, MVT::i8); addTest = false; } if (addTest) { // Look past the truncate if the high bits are known zero. if (isTruncWithZeroHighBitsInput(Cond, DAG)) Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); addTest = false; } } } if (addTest) { CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DL, DAG); } // a < b ? -1 : 0 -> RES = ~setcc_carry // a < b ? 0 : -1 -> RES = setcc_carry // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry if (Cond.getOpcode() == X86ISD::SUB) { Cond = ConvertCmpIfNecessary(Cond, DAG); unsigned CondCode = cast(CC)->getZExtValue(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (isNullConstant(Op1) || isNullConstant(Op2))) { SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cond); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; } } // X86 doesn't have an i8 cmov. If both operands are the result of a truncate // widen the cmov and push the truncate through. This avoids introducing a new // branch during isel and doesn't add any extensions. if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); if (T1.getValueType() == T2.getValueType() && // Blacklist CopyFromReg to avoid partial register stalls. T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue); SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } } // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = { Op2, Op1, CC, Cond }; return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); } static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); MVT VTElt = VT.getVectorElementType(); MVT InVTElt = InVT.getVectorElementType(); SDLoc dl(Op); // SKX processor if ((InVTElt == MVT::i1) && (((Subtarget->hasBWI() && Subtarget->hasVLX() && VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) || ((Subtarget->hasBWI() && VT.is512BitVector() && VTElt.getSizeInBits() <= 16)) || ((Subtarget->hasDQI() && Subtarget->hasVLX() && VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) || ((Subtarget->hasDQI() && VT.is512BitVector() && VTElt.getSizeInBits() >= 32)))) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); unsigned int NumElts = VT.getVectorNumElements(); if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI()) return SDValue(); if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT) return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0)); return DAG.getNode(X86ISD::VSEXT, dl, VT, In); } assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32; SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); SDValue Zero = DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); if (VT.is512BitVector()) return V; return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); } static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue In = Op->getOperand(0); MVT VT = Op->getSimpleValueType(0); MVT InVT = In.getSimpleValueType(); assert(VT.getSizeInBits() == InVT.getSizeInBits()); MVT InSVT = InVT.getVectorElementType(); assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits()); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) return SDValue(); SDLoc dl(Op); // SSE41 targets can use the pmovsx* instructions directly. if (Subtarget->hasSSE41()) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI. SDValue Curr = In; MVT CurrVT = InVT; // As SRAI is only available on i16/i32 types, we expand only up to i32 // and handle i64 separately. while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) { Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr); MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2); CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2); Curr = DAG.getBitcast(CurrVT, Curr); } SDValue SignExt = Curr; if (CurrVT != InVT) { unsigned SignExtShift = CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr, DAG.getConstant(SignExtShift, dl, MVT::i8)); } if (CurrVT == VT) return SignExt; if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) { SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr, DAG.getConstant(31, dl, MVT::i8)); SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5}); return DAG.getBitcast(VT, Ext); } return SDValue(); } static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && (VT != MVT::v8i32 || InVT != MVT::v8i16) && (VT != MVT::v16i16 || InVT != MVT::v16i8)) return SDValue(); if (Subtarget->hasInt256()) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); // Optimize vectors in AVX mode // Sign extend v8i16 to v8i32 and // v4i32 to v4i64 // // Divide input vector into two parts // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 // concat the vectors to original VT unsigned NumElems = InVT.getVectorNumElements(); SDValue Undef = DAG.getUNDEF(InVT); SmallVector ShufMask1(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask1[i] = i; SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]); SmallVector ShufMask2(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask2[i] = i + NumElems/2; SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo); OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } // Lower vector extended loads using a shuffle. If SSSE3 is not available we // may emit an illegal shuffle but the expansion is still better than scalar // code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise // we'll emit a shuffle and a arithmetic shift. // FIXME: Is the expansion actually better than scalar code? It doesn't seem so. // TODO: It is possible to support ZExt by zeroing the undef values during // the shuffle phase or after the shuffle. static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT RegVT = Op.getSimpleValueType(); assert(RegVT.isVector() && "We only custom lower vector sext loads."); assert(RegVT.isInteger() && "We only custom lower integer vector sext loads."); // Nothing useful we can do without SSE2 shuffles. assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2."); LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); EVT MemVT = Ld->getMemoryVT(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned RegSz = RegVT.getSizeInBits(); ISD::LoadExtType Ext = Ld->getExtensionType(); assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) && "Only anyext and sext are currently implemented."); assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); unsigned NumElems = RegVT.getVectorNumElements(); unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) { // The only way in which we have a legal 256-bit vector result but not the // integer 256-bit operations needed to directly lower a sextload is if we // have AVX1 but not AVX2. In that case, we can always emit a sextload to // a 128-bit vector and a normal sign_extend to 256-bits that should get // correctly legalized. We do this late to allow the canonical form of // sextload to persist throughout the rest of the DAG combiner -- it wants // to fold together any extensions it can, and so will fuse a sign_extend // of an sextload into a sextload targeting a wider value. SDValue Load; if (MemSz == 128) { // Just switch this to a normal load. assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, " "it must be a legal 128-bit vector " "type!"); Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); } else { assert(MemSz < 128 && "Can't extend a type wider than 128 bits to a 256 bit vector!"); // Do an sext load to a 128-bit vector type. We want to use the same // number of elements, but elements half as wide. This will end up being // recursively lowered by this routine, but will succeed as we definitely // have all the necessary features if we're using AVX1. EVT HalfEltVT = EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2); EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems); Load = DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), MemVT, Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); } // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); // Finally, do a normal sign-extend to the desired register. return DAG.getSExtOrTrunc(Load, dl, RegVT); } // All sizes must be a power of two. assert(isPowerOf2_32(RegSz * MemSz * NumElems) && "Non-power-of-two elements are not custom lowered!"); // Attempt to load the original value using scalar loads. // Find the largest scalar type that divides the total loaded size. MVT SclrLoadTy = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) { SclrLoadTy = Tp; } } // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 && (64 <= MemSz)) SclrLoadTy = MVT::f64; // Calculate the number of scalar loads that we need to perform // in order to load our vector from memory. unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); assert((Ext != ISD::SEXTLOAD || NumLoads == 1) && "Can only lower sext loads with a single scalar load!"); unsigned loadRegZize = RegSz; if (Ext == ISD::SEXTLOAD && RegSz >= 256) loadRegZize = 128; // Represent our vector as a sequence of elements which are the // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT( *DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits()); // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), loadRegZize / MemVT.getScalarSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); // We can't shuffle using an illegal type. assert(TLI.isTypeLegal(WideVecVT) && "We only lower types that form legal widened vector types"); SmallVector Chains; SDValue Ptr = Ld->getBasePtr(); SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Res = DAG.getUNDEF(LoadUnitVecVT); for (unsigned i = 0; i < NumLoads; ++i) { // Perform a single load. SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); Chains.push_back(ScalarLoad.getValue(1)); // Create the first element type using SCALAR_TO_VECTOR in order to avoid // another round of DAGCombining. if (i == 0) Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); else Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, ScalarLoad, DAG.getIntPtrConstant(i, dl)); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); unsigned SizeRatio = RegSz / MemSz; if (Ext == ISD::SEXTLOAD) { // If we have SSE4.1, we can directly emit a VSEXT node. if (Subtarget->hasSSE41()) { SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Sext; } // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest // lanes. assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) && "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!"); SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Shuff; } // Redistribute the loaded elements into the different locations. SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i * SizeRatio] = i; SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); // Bitcast to the requested type. Shuff = DAG.getBitcast(RegVT, Shuff); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Shuff; } // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart // from the AND / OR. static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { Opc = Op.getOpcode(); if (Opc != ISD::OR && Opc != ISD::AND) return false; return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && Op.getOperand(0).hasOneUse() && Op.getOperand(1).getOpcode() == X86ISD::SETCC && Op.getOperand(1).hasOneUse()); } // isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and // 1 and that the SETCC node has a single use. static bool isXor1OfSetCC(SDValue Op) { if (Op.getOpcode() != ISD::XOR) return false; if (isOneConstant(Op.getOperand(1))) return Op.getOperand(0).getOpcode() == X86ISD::SETCC && Op.getOperand(0).hasOneUse(); return false; } SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); SDValue Dest = Op.getOperand(2); SDLoc dl(Op); SDValue CC; bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { // Check for setcc([su]{add,sub,mul}o == 0). if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && isNullConstant(Cond.getOperand(1)) && Cond.getOperand(0).getResNo() == 1 && (Cond.getOperand(0).getOpcode() == ISD::SADDO || Cond.getOperand(0).getOpcode() == ISD::UADDO || Cond.getOperand(0).getOpcode() == ISD::SSUBO || Cond.getOperand(0).getOpcode() == ISD::USUBO || Cond.getOperand(0).getOpcode() == ISD::SMULO || Cond.getOperand(0).getOpcode() == ISD::UMULO)) { Inverted = true; Cond = Cond.getOperand(0); } else { SDValue NewCond = LowerSETCC(Cond, DAG); if (NewCond.getNode()) Cond = NewCond; } } #if 0 // FIXME: LowerXALUO doesn't handle these!! else if (Cond.getOpcode() == X86ISD::ADD || Cond.getOpcode() == X86ISD::SUB || Cond.getOpcode() == X86ISD::SMUL || Cond.getOpcode() == X86ISD::UMUL) Cond = LowerXALUO(Cond, DAG); #endif // Look pass (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && isOneConstant(Cond.getOperand(1))) Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. unsigned CondOpcode = Cond.getOpcode(); if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); // FIXME: WHY THE SPECIAL CASING OF LogicalCmp?? if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) { Cond = Cmp; addTest = false; } else { switch (cast(CC)->getZExtValue()) { default: break; case X86::COND_O: case X86::COND_B: // These can only come from an arithmetic instruction with overflow, // e.g. SADDO, UADDO. Cond = Cond.getNode()->getOperand(1); addTest = false; break; } } } CondOpcode = Cond.getOpcode(); if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && Cond.getOperand(0).getValueType() != MVT::i8)) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); unsigned X86Opcode; unsigned X86Cond; SDVTList VTs; // Keep this in sync with LowerXALUO, otherwise we might create redundant // instructions that can't be removed afterwards (i.e. X86ISD::ADD and // X86ISD::INC). switch (CondOpcode) { case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; case ISD::SADDO: if (isOneConstant(RHS)) { X86Opcode = X86ISD::INC; X86Cond = X86::COND_O; break; } X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; case ISD::SSUBO: if (isOneConstant(RHS)) { X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O; break; } X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; default: llvm_unreachable("unexpected overflowing operator"); } if (Inverted) X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond); if (CondOpcode == ISD::UMULO) VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32); else VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS); if (CondOpcode == ISD::UMULO) Cond = X86Op.getValue(2); else Cond = X86Op.getValue(1); CC = DAG.getConstant(X86Cond, dl, MVT::i8); addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { SDValue Cmp = Cond.getOperand(0).getOperand(1); if (CondOpc == ISD::OR) { // Also, recognize the pattern generated by an FCMP_UNE. We can emit // two branches instead of an explicit OR instruction with a // separate test. if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp)) { CC = Cond.getOperand(0).getOperand(0); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); CC = Cond.getOperand(1).getOperand(0); Cond = Cmp; addTest = false; } } else { // ISD::AND // Also, recognize the pattern generated by an FCMP_OEQ. We can emit // two branches instead of an explicit AND instruction with a // separate test. However, we only do this if this block doesn't // have a fall-through edge, because this requires an explicit // jmp when the condition is false. if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp) && Op.getNode()->hasOneUse()) { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; Dest = FalseBB; Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); X86::CondCode CCode = (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cmp; addTest = false; } } } } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) { // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition. // It should be transformed during dag combiner except when the condition // is set by a arithmetics with overflow node. X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; } else if (Cond.getOpcode() == ISD::SETCC && cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { // For FCMP_OEQ, we can emit // two branches instead of an explicit AND instruction with a // separate test. However, we only do this if this block doesn't // have a fall-through edge, because this requires an explicit // jmp when the condition is false. if (Op.getNode()->hasOneUse()) { SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; Dest = FalseBB; SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } } } else if (Cond.getOpcode() == ISD::SETCC && cast(Cond.getOperand(2))->get() == ISD::SETUNE) { // For FCMP_UNE, we can emit // two branches instead of an explicit AND instruction with a // separate test. However, we only do this if this block doesn't // have a fall-through edge, because this requires an explicit // jmp when the condition is false. if (Op.getNode()->hasOneUse()) { SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_UNE. if (User->getOpcode() == ISD::BR) { SDValue FalseBB = User->getOperand(1); SDNode *NewBR = DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); (void)NewBR; SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8); Cond = Cmp; addTest = false; Dest = FalseBB; } } } } if (addTest) { // Look pass the truncate if the high bits are known zero. if (isTruncWithZeroHighBitsInput(Cond, DAG)) Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); addTest = false; } } } if (addTest) { X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; CC = DAG.getConstant(X86Cond, dl, MVT::i8); Cond = EmitTest(Cond, X86Cond, dl, DAG); } Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cond); } // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. // Calls to _alloca are needed to probe the stack when allocating more than 4k // bytes in one go. Touching the stack at 4K increments is necessary to ensure // that the guard pages used by the OS virtual memory manager are allocated in // correct sequence. SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) || SplitStack; SDLoc dl(Op); // Get the inputs. SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); unsigned Align = cast(Op.getOperand(2))->getZExtValue(); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); bool Is64Bit = Subtarget->is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); SDValue Result; if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); EVT VT = Node->getValueType(0); SDValue Tmp3 = Node->getOperand(2); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Result = DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { // The 64 bit implementation of segmented stacks needs to clobber both r10 // r11. This makes it impossible to use it along with nested parameters. const Function *F = MF.getFunction(); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->hasNestAttr()) report_fatal_error("Cannot use segmented stacks with functions that " "have nested arguments."); } const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); } else { SDValue Flag; const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); if (Align) { SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } Result = SP; } Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); SDValue Ops[2] = {Result, Chain}; return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); if (!Subtarget->is64Bit() || Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); } // __va_list_tag: // gp_offset (0 - 6 * 8) // fp_offset (48 - 48 + 8 * 16) // overflow_arg_area (point to parameters coming in memory). // reg_save_area SmallVector MemOps; SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN, MachinePointerInfo(SV), false, false, 0); MemOps.push_back(Store); // Store fp_offset FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, MachinePointerInfo(SV, 4), false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8), false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( Subtarget->isTarget64BitLP64() ? 8 : 4, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo( SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); assert(Op.getNode()->getNumOperands() == 4); MachineFunction &MF = DAG.getMachineFunction(); if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) // The Win64 ABI uses char* instead of a structure. return DAG.expandVAArg(Op.getNode()); SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); unsigned Align = Op.getConstantOperandVal(3); SDLoc dl(Op); EVT ArgVT = Op.getNode()->getValueType(0); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); uint8_t ArgMode; // Decide which area this value should be read from. // TODO: Implement the AMD64 ABI in its entirety. This simple // selection mechanism works only for the basic types. if (ArgVT == MVT::f80) { llvm_unreachable("va_arg for f80 not yet implemented"); } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { ArgMode = 2; // Argument passed in XMM register. Use fp_offset. } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. } else { llvm_unreachable("Unhandled argument type in LowerVAARG"); } if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. assert(!Subtarget->useSoftFloat() && !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); } // Insert VAARG_64 node into the DAG // VAARG_64 returns two values: Variable Argument Address, Chain SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32), DAG.getConstant(ArgMode, dl, MVT::i8), DAG.getConstant(Align, dl, MVT::i32)}; SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV), /*Align=*/0, /*Volatile=*/false, /*ReadMem=*/true, /*WriteMem=*/true); Chain = VAARG.getValue(1); // Load the next argument and return it return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo(), false, false, false, 0); } static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows, // where a va_list is still an i8*. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); if (Subtarget->isCallingConvWin64( DAG.getMachineFunction().getFunction()->getCallingConv())) // Probably a Win64 va_copy. return DAG.expandVACopy(Op.getNode()); SDValue Chain = Op.getOperand(0); SDValue DstPtr = Op.getOperand(1); SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false, false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } // getTargetVShiftByConstNode - Handle vector element shifts where the shift // amount is a constant. Takes immediate version of shift as input. static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG) { MVT ElementType = VT.getVectorElementType(); // Fold this packed shift into its first operand if ShiftAmt is 0. if (ShiftAmt == 0) return SrcOp; // Check for ShiftAmt >= element width if (ShiftAmt >= ElementType.getSizeInBits()) { if (Opc == X86ISD::VSRAI) ShiftAmt = ElementType.getSizeInBits() - 1; else return DAG.getConstant(0, dl, VT); } assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) && "Unknown target vector shift-by-constant node"); // Fold this packed vector shift into a build vector if SrcOp is a // vector of Constants or UNDEFs, and SrcOp valuetype is the same as VT. if (VT == SrcOp.getSimpleValueType() && ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { SmallVector Elts; unsigned NumElts = SrcOp->getNumOperands(); ConstantSDNode *ND; switch(Opc) { default: llvm_unreachable(nullptr); case X86ISD::VSHLI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->getOpcode() == ISD::UNDEF) { Elts.push_back(CurrentOp); continue; } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); } break; case X86ISD::VSRLI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->getOpcode() == ISD::UNDEF) { Elts.push_back(CurrentOp); continue; } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); } break; case X86ISD::VSRAI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->getOpcode() == ISD::UNDEF) { Elts.push_back(CurrentOp); continue; } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); } break; } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); } return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, dl, MVT::i8)); } // getTargetVShiftNode - Handle vector element shifts where the shift amount // may or may not be a constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG) { MVT SVT = ShAmt.getSimpleValueType(); assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); // Catch shift-by-constant. if (ConstantSDNode *CShAmt = dyn_cast(ShAmt)) return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp, CShAmt->getZExtValue(), DAG); // Change opcode to non-immediate version switch (Opc) { default: llvm_unreachable("Unknown target vector shift node"); case X86ISD::VSHLI: Opc = X86ISD::VSHL; break; case X86ISD::VSRLI: Opc = X86ISD::VSRL; break; case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; } const X86Subtarget &Subtarget = static_cast(DAG.getSubtarget()); if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { // Let the shuffle legalizer expand this shift amount node. SDValue Op0 = ShAmt.getOperand(0); Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0); ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG); } else { // Need to build a vector containing shift amount. // SSE/AVX packed shifts only use the lower 64-bit of the shift count. SmallVector ShOps; ShOps.push_back(ShAmt); if (SVT == MVT::i32) { ShOps.push_back(DAG.getConstant(0, dl, SVT)); ShOps.push_back(DAG.getUNDEF(SVT)); } ShOps.push_back(DAG.getUNDEF(SVT)); MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64; ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps); } // The return type has to be a 128-bit type with the same element // type as the input type. MVT EltVT = VT.getVectorElementType(); MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } /// \brief Return Mask with the necessary casting or extending /// for \p Mask according to \p MaskVT when lowering masking intrinsics static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl) { if (isAllOnesConstant(Mask)) return DAG.getTargetConstant(1, dl, MaskVT); if (X86::isZeroNode(Mask)) return DAG.getTargetConstant(0, dl, MaskVT); if (MaskVT.bitsGT(Mask.getSimpleValueType())) { // Mask should be extended Mask = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask); } if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) { if (MaskVT == MVT::v64i1) { assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); // In case 32bit mode, bitcast i64 is illegal, extend/split it. SDValue Lo, Hi; Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, DAG.getConstant(0, dl, MVT::i32)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, DAG.getConstant(1, dl, MVT::i32)); Lo = DAG.getBitcast(MVT::v32i1, Lo); Hi = DAG.getBitcast(MVT::v32i1, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); } else { // MaskVT require < 64bit. Truncate mask (should succeed in any case), // and bitcast. MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); return DAG.getBitcast(MaskVT, DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask)); } } else { MVT BitcastVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); } } /// \brief Return (and \p Op, \p Mask) for compare instructions or /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); unsigned OpcodeSelect = ISD::VSELECT; SDLoc dl(Op); if (isAllOnesConstant(Mask)) return Op; SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); switch (Op.getOpcode()) { default: break; case X86ISD::PCMPEQM: case X86ISD::PCMPGTM: case X86ISD::CMPM: case X86ISD::CMPMU: return DAG.getNode(ISD::AND, dl, VT, Op, VMask); case X86ISD::VFPCLASS: case X86ISD::VFPCLASSS: return DAG.getNode(ISD::OR, dl, VT, Op, VMask); case X86ISD::VTRUNC: case X86ISD::VTRUNCS: case X86ISD::VTRUNCUS: // We can't use ISD::VSELECT here because it is not always "Legal" // for the destination type. For example vpmovqb require only AVX512 // and vselect that can operate on byte element type require BWI OpcodeSelect = X86ISD::SELECT; break; } if (PreservedSrc.getOpcode() == ISD::UNDEF) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); } /// \brief Creates an SDNode for a predicated scalar operation. /// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc). /// The mask is coming as MVT::i8 and it should be truncated /// to MVT::i1 while lowering masking intrinsics. /// The main difference between ScalarMaskingNode and VectorMaskingNode is using /// "X86select" instead of "vselect". We just can't create the "vselect" node /// for a scalar instruction. static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget *Subtarget, SelectionDAG &DAG) { if (isAllOnesConstant(Mask)) return Op; MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); // The mask should be of type MVT::i1 SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); if (Op.getOpcode() == X86ISD::FSETCC) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); if (Op.getOpcode() == X86ISD::VFPCLASS || Op.getOpcode() == X86ISD::VFPCLASSS) return DAG.getNode(ISD::OR, dl, VT, Op, IMask); if (PreservedSrc.getOpcode() == ISD::UNDEF) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); } static int getSEHRegistrationNodeSize(const Function *Fn) { if (!Fn->hasPersonalityFn()) report_fatal_error( "querying registration node size for function without personality"); // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See // WinEHStatePass for the full struct definition. switch (classifyEHPersonality(Fn->getPersonalityFn())) { case EHPersonality::MSVC_X86SEH: return 24; case EHPersonality::MSVC_CXX: return 16; default: break; } report_fatal_error( "can only recover FP for 32-bit MSVC EH personality functions"); } /// When the MSVC runtime transfers control to us, either to an outlined /// function or when returning to a parent frame after catching an exception, we /// recover the parent frame pointer by doing arithmetic on the incoming EBP. /// Here's the math: /// RegNodeBase = EntryEBP - RegNodeSize /// ParentFP = RegNodeBase - ParentFrameOffset /// Subtracting RegNodeSize takes us to the offset of the registration node, and /// subtracting the offset (negative on x86) takes us back to the parent FP. static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, SDValue EntryEBP) { MachineFunction &MF = DAG.getMachineFunction(); SDLoc dl; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); // It's possible that the parent function no longer has a personality function // if the exceptional code was optimized away, in which case we just return // the incoming EBP. if (!Fn->hasPersonalityFn()) return EntryEBP; // Get an MCSymbol that will ultimately resolve to the frame offset of the EH // registration, or the .set_setframe offset. MCSymbol *OffsetSym = MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( GlobalValue::getRealLinkageName(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); SDValue ParentFrameOffset = DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after // prologue to RBP in the parent function. const X86Subtarget &Subtarget = static_cast(DAG.getSubtarget()); if (Subtarget.is64Bit()) return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); int RegNodeSize = getSEHRegistrationNodeSize(Fn); // RegNodeBase = EntryEBP - RegNodeSize // ParentFP = RegNodeBase - ParentFrameOffset SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, DAG.getConstant(RegNodeSize, dl, PtrVT)); return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); } static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { switch(IntrData->Type) { case INTR_TYPE_1OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1)); case INTR_TYPE_2OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case INTR_TYPE_2OP_IMM8: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2))); case INTR_TYPE_3OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case INTR_TYPE_4OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); case INTR_TYPE_1OP_MASK_RM: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue RoundingMode; // We allways add rounding mode to the Node. // If the rounding mode is not specified, we add the // "current direction" mode. if (Op.getNumOperands() == 4) RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); else RoundingMode = Op.getOperand(4); unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) if (cast(RoundingMode)->getZExtValue() != X86::STATIC_ROUNDING::CUR_DIRECTION) return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, RoundingMode), Mask, PassThru, Subtarget, DAG); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, RoundingMode), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_1OP_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); // We add rounding mode to the Node when // - RM Opcode is specified and // - RM is not "current direction". unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); unsigned Round = cast(Rnd)->getZExtValue(); if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, Rnd), Mask, PassThru, Subtarget, DAG); } } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue passThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src0 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // There are 2 kinds of intrinsics in this group: // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands // (2) With rounding mode and sae - 7 operands. if (Op.getNumOperands() == 6) { SDValue Sae = Op.getOperand(5); unsigned Opc = IntrData->Opc1 ? IntrData->Opc1 : IntrData->Opc0; return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Sae), Mask, Src0, Subtarget, DAG); } assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form"); SDValue RoundingMode = Op.getOperand(5); SDValue Sae = Op.getOperand(6); return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, RoundingMode, Sae), Mask, Src0, Subtarget, DAG); } case INTR_TYPE_2OP_MASK: case INTR_TYPE_2OP_IMM8_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK) Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(5); unsigned Round = cast(Rnd)->getZExtValue(); if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } } // TODO: Intrinsics should have fast-math-flags to propagate. return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_2OP_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // We specify 2 possible modes for intrinsics, with/without rounding // modes. // First, we check if the intrinsic have rounding mode (6 operands), // if not, we set rounding mode to "current". SDValue Rnd; if (Op.getNumOperands() == 6) Rnd = Op.getOperand(5); else Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue PassThru = Op.getOperand(4); SDValue Mask = Op.getOperand(5); SDValue Sae = Op.getOperand(6); return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3, Sae), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Imm = Op.getOperand(3); SDValue PassThru = Op.getOperand(4); SDValue Mask = Op.getOperand(5); // We specify 2 possible modes for intrinsics, with/without rounding // modes. // First, we check if the intrinsic have rounding mode (7 operands), // if not, we set rounding mode to "current". SDValue Rnd; if (Op.getNumOperands() == 7) Rnd = Op.getOperand(6); else Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Imm, Rnd), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: case INTR_TYPE_3OP_MASK: case INSERT_SUBVEC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue PassThru = Op.getOperand(4); SDValue Mask = Op.getOperand(5); if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); else if (IntrData->Type == INSERT_SUBVEC) { // imm should be adapted to ISD::INSERT_SUBVECTOR behavior assert(isa(Src3) && "Expected a ConstantSDNode here!"); unsigned Imm = cast(Src3)->getZExtValue(); Imm *= Src2.getSimpleValueType().getVectorNumElements(); Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); } // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(6); unsigned Round = cast(Rnd)->getZExtValue(); if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd), Mask, PassThru, Subtarget, DAG); } } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } case VPERM_3OP_MASKZ: case VPERM_3OP_MASK:{ // Src2 is the PassThru SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); MVT VT = Op.getSimpleValueType(); SDValue PassThru = SDValue(); // set PassThru element if (IntrData->Type == VPERM_3OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else PassThru = DAG.getBitcast(VT, Src2); // Swap Src1 and Src2 in the node creation return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src2, Src1, Src3), Mask, PassThru, Subtarget, DAG); } case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); MVT VT = Op.getSimpleValueType(); SDValue PassThru = SDValue(); // set PassThru element if (IntrData->Type == FMA_OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else if (IntrData->Type == FMA_OP_MASK3) PassThru = Src3; else PassThru = Src1; // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(5); if (cast(Rnd)->getZExtValue() != X86::STATIC_ROUNDING::CUR_DIRECTION) return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd), Mask, PassThru, Subtarget, DAG); } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } case TERLOG_OP_MASK: case TERLOG_OP_MASKZ: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4)); SDValue Mask = Op.getOperand(5); MVT VT = Op.getSimpleValueType(); SDValue PassThru = Src1; // Set PassThru element. if (IntrData->Type == TERLOG_OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3, Src4), Mask, PassThru, Subtarget, DAG); } case FPCLASS: { // FPclass intrinsics with mask SDValue Src1 = Op.getOperand(1); MVT VT = Src1.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); MVT BitcastVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm); SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask, DAG.getTargetConstant(0, dl, MaskVT), Subtarget, DAG); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), FPclassMask, DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } case FPCLASSS: { SDValue Src1 = Op.getOperand(1); SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm); SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask); } case CMP_MASK: case CMP_MASK_CC: { // Comparison intrinsics with masks. // Example of transformation: // (i8 (int_x86_avx512_mask_pcmpeq_q_128 // (v2i64 %a), (v2i64 %b), (i8 %mask))) -> // (i8 (bitcast // (v8i1 (insert_subvector undef, // (v2i1 (and (PCMPEQM %a, %b), // (extract_subvector // (v8i1 (bitcast %mask)), 0))), 0)))) MVT VT = Op.getOperand(1).getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3); MVT BitcastVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); SDValue Cmp; if (IntrData->Type == CMP_MASK_CC) { SDValue CC = Op.getOperand(3); CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); if (cast(Rnd)->getZExtValue() != X86::STATIC_ROUNDING::CUR_DIRECTION) Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), Op.getOperand(2), CC, Rnd); } //default rounding mode if(!Cmp.getNode()) Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), Op.getOperand(2), CC); } else { assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!"); Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), Op.getOperand(2)); } SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, DAG.getTargetConstant(0, dl, MaskVT), Subtarget, DAG); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CmpMask, DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); SDValue Mask = Op.getOperand(4); SDValue Cmp; if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); if (cast(Rnd)->getZExtValue() != X86::STATIC_ROUNDING::CUR_DIRECTION) Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); } //default rounding mode if(!Cmp.getNode()) Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC); SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8, DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask), DAG.getValueType(MVT::i1)); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG); assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!"); SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, dl, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case COMI_RM: { // Comparison intrinsics with Sae SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); SDValue CC = Op.getOperand(3); SDValue Sae = Op.getOperand(4); auto ComiType = TranslateX86ConstCondToX86CC(CC); // choose between ordered and unordered (comi/ucomi) unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1; SDValue Cond; if (cast(Sae)->getZExtValue() != X86::STATIC_ROUNDING::CUR_DIRECTION) Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae); else Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); case VSHIFT_MASK: return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG), Op.getOperand(4), Op.getOperand(3), Subtarget, DAG); case COMPRESS_EXPAND_IN_REG: { SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); if (isAllOnesConstant(Mask)) // return data as is return Op.getOperand(1); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), Mask, PassThru, Subtarget, DAG); } case BROADCASTM: { SDValue Mask = Op.getOperand(1); MVT MaskVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); Mask = DAG.getBitcast(MaskVT, Mask); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask); } case BLEND: { SDValue Mask = Op.getOperand(3); MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); } case KUNPCK: { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2); SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); // Arguments should be swapped. SDValue Res = DAG.getNode(IntrData->Opc0, dl, MVT::getVectorVT(MVT::i1, VT.getSizeInBits()), Src2, Src1); return DAG.getBitcast(VT, Res); } case CONVERT_TO_MASK: { MVT SrcVT = Op.getOperand(1).getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1)); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CvtMask, DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } case CONVERT_MASK_TO_VEC: { SDValue Mask = Op.getOperand(1); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, VMask); } case BRCST_SUBVEC_TO_VEC: { SDValue Src = Op.getOperand(1); SDValue Passthru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); EVT resVT = Passthru.getValueType(); SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT, DAG.getUNDEF(resVT), Src, DAG.getIntPtrConstant(0, dl)); SDValue immVal; if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector()) immVal = DAG.getConstant(0x44, dl, MVT::i8); else immVal = DAG.getConstant(0, dl, MVT::i8); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, subVec, subVec, immVal), Mask, Passthru, Subtarget, DAG); } default: break; } } switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: // Operands intentionally swapped. Mask is last operand to intrinsic, // but second operand for node/instruction. return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(1)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_avx_ptestz_256: case Intrinsic::x86_avx_ptestc_256: case Intrinsic::x86_avx_ptestnzc_256: case Intrinsic::x86_avx_vtestz_ps: case Intrinsic::x86_avx_vtestc_ps: case Intrinsic::x86_avx_vtestnzc_ps: case Intrinsic::x86_avx_vtestz_pd: case Intrinsic::x86_avx_vtestc_pd: case Intrinsic::x86_avx_vtestnzc_pd: case Intrinsic::x86_avx_vtestz_ps_256: case Intrinsic::x86_avx_vtestc_ps_256: case Intrinsic::x86_avx_vtestnzc_ps_256: case Intrinsic::x86_avx_vtestz_pd_256: case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestnzc_pd_256: { bool IsTestPacked = false; unsigned X86CC; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); case Intrinsic::x86_avx_vtestz_ps: case Intrinsic::x86_avx_vtestz_pd: case Intrinsic::x86_avx_vtestz_ps_256: case Intrinsic::x86_avx_vtestz_pd_256: IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_avx_ptestz_256: // ZF = 1 X86CC = X86::COND_E; break; case Intrinsic::x86_avx_vtestc_ps: case Intrinsic::x86_avx_vtestc_pd: case Intrinsic::x86_avx_vtestc_ps_256: case Intrinsic::x86_avx_vtestc_pd_256: IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_avx_ptestc_256: // CF = 1 X86CC = X86::COND_B; break; case Intrinsic::x86_avx_vtestnzc_ps: case Intrinsic::x86_avx_vtestnzc_pd: case Intrinsic::x86_avx_vtestnzc_ps_256: case Intrinsic::x86_avx_vtestnzc_pd_256: IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestnzc: case Intrinsic::x86_avx_ptestnzc_256: // ZF and CF = 0 X86CC = X86::COND_A; break; } SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST; SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case Intrinsic::x86_avx512_kortestz_w: case Intrinsic::x86_avx512_kortestc_w: { unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B; SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8); SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: case Intrinsic::x86_sse42_pcmpestric128: case Intrinsic::x86_sse42_pcmpistrio128: case Intrinsic::x86_sse42_pcmpestrio128: case Intrinsic::x86_sse42_pcmpistris128: case Intrinsic::x86_sse42_pcmpestris128: case Intrinsic::x86_sse42_pcmpistriz128: case Intrinsic::x86_sse42_pcmpestriz128: { unsigned Opcode; unsigned X86CC; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse42_pcmpistria128: Opcode = X86ISD::PCMPISTRI; X86CC = X86::COND_A; break; case Intrinsic::x86_sse42_pcmpestria128: Opcode = X86ISD::PCMPESTRI; X86CC = X86::COND_A; break; case Intrinsic::x86_sse42_pcmpistric128: Opcode = X86ISD::PCMPISTRI; X86CC = X86::COND_B; break; case Intrinsic::x86_sse42_pcmpestric128: Opcode = X86ISD::PCMPESTRI; X86CC = X86::COND_B; break; case Intrinsic::x86_sse42_pcmpistrio128: Opcode = X86ISD::PCMPISTRI; X86CC = X86::COND_O; break; case Intrinsic::x86_sse42_pcmpestrio128: Opcode = X86ISD::PCMPESTRI; X86CC = X86::COND_O; break; case Intrinsic::x86_sse42_pcmpistris128: Opcode = X86ISD::PCMPISTRI; X86CC = X86::COND_S; break; case Intrinsic::x86_sse42_pcmpestris128: Opcode = X86ISD::PCMPESTRI; X86CC = X86::COND_S; break; case Intrinsic::x86_sse42_pcmpistriz128: Opcode = X86ISD::PCMPISTRI; X86CC = X86::COND_E; break; case Intrinsic::x86_sse42_pcmpestriz128: Opcode = X86ISD::PCMPESTRI; X86CC = X86::COND_E; break; } SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, dl, MVT::i8), SDValue(PCMP.getNode(), 1)); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case Intrinsic::x86_sse42_pcmpistri128: case Intrinsic::x86_sse42_pcmpestri128: { unsigned Opcode; if (IntNo == Intrinsic::x86_sse42_pcmpistri128) Opcode = X86ISD::PCMPISTRI; else Opcode = X86ISD::PCMPESTRI; SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps); } case Intrinsic::x86_seh_lsda: { // Compute the symbol for the LSDA. We know it'll get emitted later. MachineFunction &MF = DAG.getMachineFunction(); SDValue Op1 = Op.getOperand(1); auto *Fn = cast(cast(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( GlobalValue::getRealLinkageName(Fn->getName())); // Generate a simple absolute symbol reference. This intrinsic is only // supported on 32-bit Windows, which isn't PIC. SDValue Result = DAG.getMCSymbol(LSDASym, VT); return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); } case Intrinsic::x86_seh_recoverfp: { SDValue FnOp = Op.getOperand(1); SDValue IncomingFPOp = Op.getOperand(2); GlobalAddressSDNode *GSD = dyn_cast(FnOp); auto *Fn = dyn_cast_or_null(GSD ? GSD->getGlobal() : nullptr); if (!Fn) report_fatal_error( "llvm.x86.seh.recoverfp must take a function as the first argument"); return recoverFramePointer(DAG, Fn, IncomingFPOp); } case Intrinsic::localaddress: { // Returns one of the stack, base, or frame pointer registers, depending on // which is used to reference local variables. MachineFunction &MF = DAG.getMachineFunction(); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned Reg; if (RegInfo->hasBasePointer(MF)) Reg = RegInfo->getBaseRegister(); else // This function handles the SP or FP case. Reg = RegInfo->getPtrSizedFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } } } static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget * Subtarget) { SDLoc dl(Op); auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); if (Src.getOpcode() == ISD::UNDEF) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; return DAG.getMergeValues(RetOps, dl); } static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, &Subtarget, DAG, dl); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); return SDValue(Res, 1); } static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, &Subtarget, DAG, dl); //SDVTList VTs = DAG.getVTList(MVT::Other); SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); return SDValue(Res, 0); } // getReadPerformanceCounter - Handles the lowering of builtin intrinsics that // read performance monitor counters (x86_rdpmc). static void getReadPerformanceCounter(SDNode *N, SDLoc DL, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl &Results) { assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue LO, HI; // The ECX register is used to select the index of the performance counter // to read. SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2)); SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain); // Reads the content of a 64-bit performance counter and returns it in the // registers EDX:EAX. if (Subtarget->is64Bit()) { LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); } else { LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, LO.getValue(2)); } Chain = HI.getValue(1); if (Subtarget->is64Bit()) { // The EAX register is loaded with the low-order 32 bits. The EDX register // is loaded with the supported high-order bits of the counter. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, DAG.getConstant(32, DL, MVT::i8)); Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); Results.push_back(Chain); return; } // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { LO, HI }; SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); Results.push_back(Pair); Results.push_back(Chain); } // getReadTimeStampCounter - Handles the lowering of builtin intrinsics that // read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is // also used to custom lower READCYCLECOUNTER nodes. static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl &Results) { SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0)); SDValue LO, HI; // The processor's time-stamp counter (a 64-bit MSR) is stored into the // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR // and the EAX register is loaded with the low-order 32 bits. if (Subtarget->is64Bit()) { LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); } else { LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, LO.getValue(2)); } SDValue Chain = HI.getValue(1); if (Opcode == X86ISD::RDTSCP_DAG) { assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into // the ECX register. Add 'ecx' explicitly to the chain. SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, HI.getValue(2)); // Explicitly store the content of ECX at the location passed in input // to the 'rdtscp' intrinsic. Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2), MachinePointerInfo(), false, false, 0); } if (Subtarget->is64Bit()) { // The EDX register is loaded with the high-order 32 bits of the MSR, and // the EAX register is loaded with the low-order 32 bits. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, DAG.getConstant(32, DL, MVT::i8)); Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); Results.push_back(Chain); return; } // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { LO, HI }; SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); Results.push_back(Pair); Results.push_back(Chain); } static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SmallVector Results; SDLoc DL(Op); getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); return DAG.getMergeValues(Results, DL); } static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue RegNode = Op.getOperand(2); WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); if (!EHInfo) report_fatal_error("EH registrations only live in functions using WinEH"); // Cast the operand to an alloca, and remember the frame index. auto *FINode = dyn_cast(RegNode); if (!FINode) report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); // Return the chain operand without making any DAG nodes. return Chain; } /// \brief Lower intrinsics for TRUNCATE_TO_MEM case /// return truncate Store/MaskedStore Node static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op, SelectionDAG &DAG, MVT ElementType) { SDLoc dl(Op); SDValue Mask = Op.getOperand(4); SDValue DataToTruncate = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); MVT VT = DataToTruncate.getSimpleValueType(); MVT SVT = MVT::getVectorVT(ElementType, VT.getVectorNumElements()); if (isAllOnesConstant(Mask)) // return just a truncate store return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MachinePointerInfo(), SVT, false, false, SVT.getScalarSizeInBits()/8); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); MVT BitcastVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, SVT.getStoreSize(), SVT.getScalarSizeInBits()/8); return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, SVT, MMO, true); } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) return MarkEHRegistrationNode(Op, DAG); if (IntNo == llvm::Intrinsic::x86_flags_read_u32 || IntNo == llvm::Intrinsic::x86_flags_read_u64 || IntNo == llvm::Intrinsic::x86_flags_write_u32 || IntNo == llvm::Intrinsic::x86_flags_write_u64) { // We need a frame pointer because this will get lowered to a PUSH/POP // sequence. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setHasCopyImplyingStackAdjustment(true); // Don't do anything here, we will expand these intrinsics out later // during ExpandISelPseudos in EmitInstrWithCustomInserter. return SDValue(); } return SDValue(); } SDLoc dl(Op); switch(IntrData->Type) { default: llvm_unreachable("Unknown Intrinsic Type"); case RDSEED: case RDRAND: { // Emit the node with the right value type. SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other); SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), DAG.getConstant(1, dl, Op->getValueType(1)), DAG.getConstant(X86::COND_B, dl, MVT::i32), SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, DAG.getVTList(Op->getValueType(1), MVT::Glue), Ops); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } case GATHER: { //gather(v1, mask, index, base, scale); SDValue Chain = Op.getOperand(0); SDValue Src = Op.getOperand(2); SDValue Base = Op.getOperand(3); SDValue Index = Op.getOperand(4); SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); } case SCATTER: { //scatter(base, mask, index, v1, scale); SDValue Chain = Op.getOperand(0); SDValue Base = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue Index = Op.getOperand(4); SDValue Src = Op.getOperand(5); SDValue Scale = Op.getOperand(6); return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, *Subtarget); } case PREFETCH: { SDValue Hint = Op.getOperand(6); unsigned HintVal = cast(Hint)->getZExtValue(); assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1"); unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); SDValue Index = Op.getOperand(3); SDValue Base = Op.getOperand(4); SDValue Scale = Op.getOperand(5); return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain, *Subtarget); } // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP). case RDTSC: { SmallVector Results; getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results); return DAG.getMergeValues(Results, dl); } // Read Performance Monitoring Counters. case RDPMC: { SmallVector Results; getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results); return DAG.getMergeValues(Results, dl); } // XTEST intrinsics. case XTEST: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86::COND_NE, dl, MVT::i8), InTrans); SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret, SDValue(InTrans.getNode(), 1)); } // ADC/ADCX/SBB case ADX: { SmallVector Results; SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other); SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), Op.getOperand(4), GenCF.getValue(1)); SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0), Op.getOperand(5), MachinePointerInfo(), false, false, 0); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86::COND_B, dl, MVT::i8), Res.getValue(1)); Results.push_back(SetCC); Results.push_back(Store); return DAG.getMergeValues(Results, dl); } case COMPRESS_TO_MEM: { SDValue Mask = Op.getOperand(4); SDValue DataToCompress = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); MVT VT = DataToCompress.getSimpleValueType(); if (isAllOnesConstant(Mask)) // return just a store return DAG.getStore(Chain, dl, DataToCompress, Addr, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); SDValue Compressed = getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), Mask, DAG.getUNDEF(VT), Subtarget, DAG); return DAG.getStore(Chain, dl, Compressed, Addr, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); } case TRUNCATE_TO_MEM_VI8: return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8); case TRUNCATE_TO_MEM_VI16: return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16); case TRUNCATE_TO_MEM_VI32: return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32); case EXPAND_FROM_MEM: { SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); MVT VT = Op.getSimpleValueType(); if (isAllOnesConstant(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); SDValue Results[] = { getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand), Mask, PassThru, Subtarget, DAG), Chain}; return DAG.getMergeValues(Results, dl); } case LOADU: case LOADA: { SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); MVT VT = Op.getSimpleValueType(); MemIntrinsicSDNode *MemIntr = dyn_cast(Op); assert(MemIntr && "Expected MemIntrinsicSDNode!"); if (isAllOnesConstant(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand()); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT, MemIntr->getMemOperand(), ISD::NON_EXTLOAD); } } } SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo(), false, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); EVT VT = Op.getValueType(); MFI->setFrameAddressIsTaken(true); if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { // Depth > 0 makes no sense on targets which use Windows unwind codes. It // is not possible to crawl up the stack without looking at the unwind codes // simultaneously. int FrameAddrIndex = FuncInfo->getFAIndex(); if (!FrameAddrIndex) { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); FrameAddrIndex = MF.getFrameInfo()->CreateFixedObject( SlotSize, /*Offset=*/0, /*IsImmutable=*/false); FuncInfo->setFAIndex(FrameAddrIndex); } return DAG.getFrameIndex(FrameAddrIndex, VT); } unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && "Invalid Frame Register!"); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), false, false, false, 0); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); const MachineFunction &MF = DAG.getMachineFunction(); unsigned Reg = StringSwitch(RegName) .Case("esp", X86::ESP) .Case("rsp", X86::RSP) .Case("ebp", X86::EBP) .Case("rbp", X86::RBP) .Default(0); if (Reg == X86::EBP || Reg == X86::RBP) { if (!TFI.hasFP(MF)) report_fatal_error("register " + StringRef(RegName) + " is allocatable: function has no frame pointer"); #ifndef NDEBUG else { const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && "Invalid Frame Register!"); } #endif } if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); } unsigned X86TargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX; } unsigned X86TargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { // Funclet personalities don't use selectors (the runtime does the selection). assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))); return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); SDLoc dl (Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || (FrameReg == X86::EBP && PtrVT == MVT::i32)) && "Invalid Frame Register!"); SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, DAG.getIntPtrConstant(RegInfo->getSlotSize(), dl)); StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, DAG.getRegister(StoreAddrReg, PtrVT)); } SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); } SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { return Op.getOperand(0); } SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl (Op); const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); if (Subtarget->is64Bit()) { SDValue OutChains[6]; // Large code-model. const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix // Load the pointer to the nested function into R11. unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr), false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, dl, MVT::i64)); OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2), false, false, 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, dl, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr, 10), false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, dl, MVT::i64)); OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12), false, false, 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, dl, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr, 20), false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, dl, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8), Addr, MachinePointerInfo(TrmpAddr, 22), false, false, 0); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } else { const Function *Func = cast(cast(Op.getOperand(5))->getValue()); CallingConv::ID CC = Func->getCallingConv(); unsigned NestReg; switch (CC) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::X86_StdCall: { // Pass 'nest' parameter in ECX. // Must be kept in sync with X86CallingConv.td NestReg = X86::ECX; // Check that ECX wasn't needed by an 'inreg' parameter. FunctionType *FTy = Func->getFunctionType(); const AttributeSet &Attrs = Func->getAttributes(); if (!Attrs.isEmpty() && !Func->isVarArg()) { unsigned InRegCount = 0; unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) if (Attrs.hasAttribute(Idx, Attribute::InReg)) { auto &DL = DAG.getDataLayout(); // FIXME: should only count parameters that are lowered to integers. InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; } if (InRegCount > 2) { report_fatal_error("Nest register in use - reduce number of inreg" " parameters!"); } } break; } case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::Fast: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; break; } SDValue OutChains[4]; SDValue Addr, Disp; Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(10, dl, MVT::i32)); Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); // This is storing the opcode for MOV32ri. const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, dl, MVT::i8), Trmp, MachinePointerInfo(TrmpAddr), false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, dl, MVT::i32)); OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1), false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, dl, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr, MachinePointerInfo(TrmpAddr, 5), false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, dl, MVT::i32)); OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6), false, false, 1); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } } SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { /* The rounding mode is in bits 11:10 of FPSR, and has the following settings: 00 Round to nearest 01 Round to -inf 10 Round to +inf 11 Round to 0 FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf To perform the conversion, we do: (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) */ MachineFunction &MF = DAG.getMachineFunction(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); // Save FP Control Word to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, 2, 2); SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo(), false, false, false, 0); // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x800, DL, MVT::i16)), DAG.getConstant(11, DL, MVT::i8)); SDValue CWD2 = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x400, DL, MVT::i16)), DAG.getConstant(9, DL, MVT::i8)); SDValue RetVal = DAG.getNode(ISD::AND, DL, MVT::i16, DAG.getNode(ISD::ADD, DL, MVT::i16, DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), DAG.getConstant(1, DL, MVT::i16)), DAG.getConstant(3, DL, MVT::i16)); return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } /// \brief Lower a vector CTLZ using native supported vector CTLZ instruction. // // 1. i32/i64 128/256-bit vector (native support require VLX) are expended // to 512-bit vector. // 2. i8/i16 vector implemented using dword LZCNT vector instruction // ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, // split the vector, perform operation on it's Lo a Hi part and // concatenate the results. static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); if (EltVT == MVT::i64 || EltVT == MVT::i32) { // Extend to 512 bit vector. assert((VT.is256BitVector() || VT.is128BitVector()) && "Unsupported value type for operation"); MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits()); SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, DAG.getUNDEF(NewVT), Op.getOperand(0), DAG.getIntPtrConstant(0, dl)); SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode, DAG.getIntPtrConstant(0, dl)); } assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type"); if (16 < NumElems) { // Split vector, it's Lo and Hi parts will be handled in next iteration. SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2); Lo = DAG.getNode(Op.getOpcode(), dl, OutVT, Lo); Hi = DAG.getNode(Op.getOpcode(), dl, OutVT, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); } MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && "Unsupported value type for operation"); // Use native supported vector instruction vplzcntd. Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op); SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); } static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); if (VT.isVector() && Subtarget->hasAVX512()) return LowerVectorCTLZ_AVX512(Op, DAG); Op = Op.getOperand(0); if (VT == MVT::i8) { // Zero extend to i32 since there is not an i8 bsr. OpVT = MVT::i32; Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); } // Issue a bsr (scan bits in reverse) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); // If src is zero (i.e. bsr sets ZF), returns NumBits. SDValue Ops[] = { Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1) }; Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); // Finally xor with NumBits-1. Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits - 1, dl, OpVT)); if (VT == MVT::i8) Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); return Op; } static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); Op = Op.getOperand(0); if (VT == MVT::i8) { // Zero extend to i32 since there is not an i8 bsr. OpVT = MVT::i32; Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); } // Issue a bsr (scan bits in reverse). SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); // And xor with NumBits-1. Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits - 1, dl, OpVT)); if (VT == MVT::i8) Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); return Op; } static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); unsigned NumBits = VT.getScalarSizeInBits(); SDLoc dl(Op); if (VT.isVector()) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue N0 = Op.getOperand(0); SDValue Zero = DAG.getConstant(0, dl, VT); // lsb(x) = (x & -x) SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0, DAG.getNode(ISD::SUB, dl, VT, Zero, N0)); // cttz_undef(x) = (width - 1) - ctlz(lsb) if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF && TLI.isOperationLegal(ISD::CTLZ, VT)) { SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT); return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne, DAG.getNode(ISD::CTLZ, dl, VT, LSB)); } // cttz(x) = ctpop(lsb - 1) SDValue One = DAG.getConstant(1, dl, VT); return DAG.getNode(ISD::CTPOP, dl, VT, DAG.getNode(ISD::SUB, dl, VT, LSB, One)); } assert(Op.getOpcode() == ISD::CTTZ && "Only scalar CTTZ requires custom lowering"); // Issue a bsf (scan bits forward) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(VT, MVT::i32); Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0)); // If src is zero (i.e. bsf sets ZF), returns NumBits. SDValue Ops[] = { Op, DAG.getConstant(NumBits, dl, VT), DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1) }; return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } // Lower256IntArith - Break a 256-bit integer operation into two new 128-bit // ones, and then concatenate the result back. static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is256BitVector() && VT.isInteger() && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); SDLoc dl(Op); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl); SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl); SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); MVT EltVT = VT.getVectorElementType(); MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); } static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) { if (Op.getValueType() == MVT::i1) return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(), Op.getOperand(0), Op.getOperand(1)); assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { if (Op.getValueType() == MVT::i1) return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(), Op.getOperand(0), Op.getOperand(1)); assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); if (VT == MVT::i1) return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1)); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntArith(Op, DAG); SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); // Lower v16i8/v32i8 mul as promotion to v8i16/v16i16 vector // pairs, multiply and truncate. if (VT == MVT::v16i8 || VT == MVT::v32i8) { if (Subtarget->hasInt256()) { if (VT == MVT::v32i8) { MVT SubVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() / 2); SDValue Lo = DAG.getIntPtrConstant(0, dl); SDValue Hi = DAG.getIntPtrConstant(VT.getVectorNumElements() / 2, dl); SDValue ALo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, A, Lo); SDValue BLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, B, Lo); SDValue AHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, A, Hi); SDValue BHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, B, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(ISD::MUL, dl, SubVT, ALo, BLo), DAG.getNode(ISD::MUL, dl, SubVT, AHi, BHi)); } MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); return DAG.getNode( ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::MUL, dl, ExVT, DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A), DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B))); } assert(VT == MVT::v16i8 && "Pre-AVX2 support only supports v16i8 multiplication"); MVT ExVT = MVT::v8i16; // Extract the lo parts and sign extend to i16 SDValue ALo, BLo; if (Subtarget->hasSSE41()) { ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A); BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B); } else { const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7}; ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); ALo = DAG.getBitcast(ExVT, ALo); BLo = DAG.getBitcast(ExVT, BLo); ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT)); BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT)); } // Extract the hi parts and sign extend to i16 SDValue AHi, BHi; if (Subtarget->hasSSE41()) { const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi); BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi); } else { const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); AHi = DAG.getBitcast(ExVT, AHi); BHi = DAG.getBitcast(ExVT, BHi); AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT)); BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT)); } // Multiply, mask the lower 8bits of the lo/hi results and pack SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() && "Should not custom lower when pmuldq is available!"); // Extract the odd parts. static const int UnpackMask[] = { 1, -1, 3, -1 }; SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); // Multiply the even parts. SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B); // Now multiply odd parts. SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds); Evens = DAG.getBitcast(VT, Evens); Odds = DAG.getBitcast(VT, Odds); // Merge the two vectors back together with a shuffle. This expands into 2 // shuffles. static const int ShufMask[] = { 0, 4, 2, 6 }; return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); } assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && "Only know how to lower V2I64/V4I64/V8I64 multiply"); // Ahi = psrlqi(a, 32); // Bhi = psrlqi(b, 32); // // AloBlo = pmuludq(a, b); // AloBhi = pmuludq(a, Bhi); // AhiBlo = pmuludq(Ahi, b); // AloBhi = psllqi(AloBhi, 32); // AhiBlo = psllqi(AhiBlo, 32); // return AloBlo + AloBhi + AhiBlo; SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); SDValue AhiBlo = Ahi; SDValue AloBhi = Bhi; // Bit cast to 32-bit vectors for MULUDQ MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32; A = DAG.getBitcast(MulVT, A); B = DAG.getBitcast(MulVT, B); Ahi = DAG.getBitcast(MulVT, Ahi); Bhi = DAG.getBitcast(MulVT, Bhi); SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); // After shifting right const values the result may be all-zero. if (!ISD::isBuildVectorAllZeros(Ahi.getNode())) { AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG); } if (!ISD::isBuildVectorAllZeros(Bhi.getNode())) { AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG); } SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWin64() && "Unexpected target"); EVT VT = Op.getValueType(); assert(VT.isInteger() && VT.getSizeInBits() == 128 && "Unexpected return type for lowering"); RTLIB::Libcall LC; bool isSigned; switch (Op->getOpcode()) { default: llvm_unreachable("Unexpected request for libcall!"); case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break; case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break; } SDLoc dl(Op); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { EVT ArgVT = Op->getOperand(i).getValueType(); assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && "Unexpected argument type for lowering"); SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); Entry.Node = StackPtr; InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(), false, false, 16); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Ty = PointerType::get(ArgTy,0); Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(getLibcallCallingConv(LC), static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee, std::move(Args), 0) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = LowerCallTo(CLI); return DAG.getBitcast(VT, CallInfo.first); } static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); MVT VT = Op0.getSimpleValueType(); SDLoc dl(Op); assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || (VT == MVT::v8i32 && Subtarget->hasInt256())); // PMULxD operations multiply each even value (starting at 0) of LHS with // the related value of RHS and produce a widen result. // E.g., PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> // // In other word, to have all the results, we need to perform two PMULxD: // 1. one with the even values. // 2. one with the odd values. // To achieve #2, with need to place the odd values at an even position. // // Place the odd value at an even position (basically, shift all values 1 // step to the left): const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1}; // => SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); // => SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); // Emit two multiplies, one for the lower 2 ints and one for the higher 2 // ints. MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; unsigned Opcode = (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; // PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); // PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); // Shuffle it back into the right order. SDValue Highs, Lows; if (VT == MVT::v8i32) { const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); } else { const int HighMask[] = {1, 5, 3, 7}; Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); const int LowMask[] = {0, 4, 2, 6}; Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); } // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. if (IsSigned && !Subtarget->hasSSE41()) { SDValue ShAmt = DAG.getConstant( 31, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout())); SDValue T1 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1); SDValue T2 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0); SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup); } // The first result of MUL_LOHI is actually the low value, followed by the // high value. SDValue Ops[] = {Lows, Highs}; return DAG.getMergeValues(Ops, dl); } // Return true if the required (according to Opcode) shift-imm form is natively // supported by the Subtarget static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { if (VT.getScalarSizeInBits() < 16) return false; if (VT.is512BitVector() && (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI())) return true; bool LShift = VT.is128BitVector() || (VT.is256BitVector() && Subtarget->hasInt256()); bool AShift = LShift && (Subtarget->hasVLX() || (VT != MVT::v2i64 && VT != MVT::v4i64)); return (Opcode == ISD::SRA) ? AShift : LShift; } // The shift amount is a variable, but it is the same for all vector lanes. // These instructions are defined together with shift-immediate. static bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } // Return true if the required (according to Opcode) variable-shift form is // natively supported by the Subtarget static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16) return false; // vXi16 supported only on AVX-512, BWI if (VT.getScalarSizeInBits() == 16 && !Subtarget->hasBWI()) return false; if (VT.is512BitVector() || Subtarget->hasVLX()) return true; bool LShift = VT.is128BitVector() || VT.is256BitVector(); bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; return (Opcode == ISD::SRA) ? AShift : LShift; } static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI : (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI; auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); SDValue Ex = DAG.getBitcast(ExVT, R); if (ShiftAmt >= 32) { // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32. SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, ShiftAmt - 32, DAG); if (VT == MVT::v2i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); if (VT == MVT::v4i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {9, 1, 11, 3, 13, 5, 15, 7}); } else { // SRA upper i32, SHL whole i64 and select lower i32. SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, ShiftAmt, DAG); SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); Lower = DAG.getBitcast(ExVT, Lower); if (VT == MVT::v2i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); if (VT == MVT::v4i64) Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {8, 1, 10, 3, 12, 5, 14, 7}); } return DAG.getBitcast(VT, Ex); }; // Optimize shl/srl/sra with constant shift amount. if (auto *BVAmt = dyn_cast(Amt)) { if (auto *ShiftConst = BVAmt->getConstantSplatNode()) { uint64_t ShiftAmt = ShiftConst->getZExtValue(); if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); // i64 SRA needs to be performed as partial shifts. if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP()) return ArithmeticShiftRight64(ShiftAmt); if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8) || VT == MVT::v64i8) { unsigned NumElts = VT.getVectorNumElements(); MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); // Simple i8 add case if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) return DAG.getNode(ISD::ADD, dl, VT, R, R); // ashr(R, 7) === cmp_slt(R, 0) if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); } // XOP can shift v16i8 directly instead of as shift v8i16 + mask. if (VT == MVT::v16i8 && Subtarget->hasXOP()) return SDValue(); if (Op.getOpcode() == ISD::SHL) { // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, ShiftAmt, DAG); SHL = DAG.getBitcast(VT, SHL); // Zero out the rightmost bits. return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, ShiftAmt, DAG); SRL = DAG.getBitcast(VT, SRL); // Zero out the leftmost bits. return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT)); } if (Op.getOpcode() == ISD::SRA) { // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask) SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; } llvm_unreachable("Unknown shift opcode."); } } } // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && !Subtarget->hasXOP() && (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) { // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) if (SVN->isSplat()) { SplatIndex = SVN->getSplatIndex(); Amt = Amt.getOperand(0); assert(SplatIndex < (int)VT.getVectorNumElements() && "Splat shuffle referencing second operand"); } if (Amt.getOpcode() != ISD::BITCAST || Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR) return SDValue(); Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); for (unsigned i = 0; i != Ratio; ++i) { ConstantSDNode *C = dyn_cast(Amt.getOperand(i + BaseOp)); if (!C) return SDValue(); // 6 == Log2(64) ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); } // Check remaining shift amounts (if not a splat). if (SplatIndex < 0) { for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { uint64_t ShAmt = 0; for (unsigned j = 0; j != Ratio; ++j) { ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); if (!C) return SDValue(); // 6 == Log2(64) ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); } if (ShAmt != ShiftAmt) return SDValue(); } } if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); if (Op.getOpcode() == ISD::SRA) return ArithmeticShiftRight64(ShiftAmt); } return SDValue(); } static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget* Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); unsigned X86OpcI = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI : (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI; unsigned X86OpcV = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHL : (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA; if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) { SDValue BaseShAmt; MVT EltVT = VT.getVectorElementType(); if (BuildVectorSDNode *BV = dyn_cast(Amt)) { // Check if this build_vector node is doing a splat. // If so, then set BaseShAmt equal to the splat value. BaseShAmt = BV->getSplatValue(); if (BaseShAmt && BaseShAmt.getOpcode() == ISD::UNDEF) BaseShAmt = SDValue(); } else { if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) Amt = Amt.getOperand(0); ShuffleVectorSDNode *SVN = dyn_cast(Amt); if (SVN && SVN->isSplat()) { unsigned SplatIdx = (unsigned)SVN->getSplatIndex(); SDValue InVec = Amt.getOperand(0); if (InVec.getOpcode() == ISD::BUILD_VECTOR) { assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) && "Unexpected shuffle index found!"); BaseShAmt = InVec.getOperand(SplatIdx); } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast(InVec.getOperand(2))) { if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } } if (!BaseShAmt) // Avoid introducing an extract element from a shuffle. BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec, DAG.getIntPtrConstant(SplatIdx, dl)); } } if (BaseShAmt.getNode()) { assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG); } } // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); std::vector Vals(Ratio); for (unsigned i = 0; i != Ratio; ++i) Vals[i] = Amt.getOperand(i); for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { for (unsigned j = 0; j != Ratio; ++j) if (Vals[j] != Amt.getOperand(i + j)) return SDValue(); } if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); } return SDValue(); } static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); assert(VT.isVector() && "Custom lowering only for vector shifts!"); assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!"); if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) return V; if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) return V; if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode())) return Op; // XOP has 128-bit variable logical/arithmetic shifts. // +ve/-ve Amt = shift left/right. if (Subtarget->hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) { if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) { SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl); Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt); } if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL) return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); if (Op.getOpcode() == ISD::SRA) return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); } // 2i64 vector logical shifts can efficiently avoid scalarization - do the // shifts per-lane and then shuffle the partial results back together. if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) { // Splat the shift amounts so the scalar shifts above will catch it. SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0); SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1); return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); } // i64 vector arithmetic shift can be emulated with the transform: // M = lshr(SIGN_BIT, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) && Op.getOpcode() == ISD::SRA) { SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); R = DAG.getNode(ISD::XOR, dl, VT, R, M); R = DAG.getNode(ISD::SUB, dl, VT, R, M); return R; } // If possible, lower this packed shift into a vector multiply instead of // expanding it into a sequence of scalar shifts. // Do this only if the vector shift count is a constant build_vector. if (Op.getOpcode() == ISD::SHL && (VT == MVT::v8i16 || VT == MVT::v4i32 || (Subtarget->hasInt256() && VT == MVT::v16i16)) && ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { SmallVector Elts; MVT SVT = VT.getVectorElementType(); unsigned SVTBits = SVT.getSizeInBits(); APInt One(SVTBits, 1); unsigned NumElems = VT.getVectorNumElements(); for (unsigned i=0; i !=NumElems; ++i) { SDValue Op = Amt->getOperand(i); if (Op->getOpcode() == ISD::UNDEF) { Elts.push_back(Op); continue; } ConstantSDNode *ND = cast(Op); APInt C(SVTBits, ND->getAPIntValue().getZExtValue()); uint64_t ShAmt = C.getZExtValue(); if (ShAmt >= SVTBits) { Elts.push_back(DAG.getUNDEF(SVT)); continue; } Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT)); } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); return DAG.getNode(ISD::MUL, dl, VT, R, BV); } // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, dl, VT)); Op = DAG.getBitcast(MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } // If possible, lower this shift as a sequence of two shifts by // constant plus a MOVSS/MOVSD instead of scalarizing it. // Example: // (v4i32 (srl A, (build_vector < X, Y, Y, Y>))) // // Could be rewritten as: // (v4i32 (MOVSS (srl A, ), (srl A, ))) // // The advantage is that the two shifts from the example would be // lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing // the vector shift into four scalar shifts plus four pairs of vector // insert/extract. if ((VT == MVT::v8i16 || VT == MVT::v4i32) && ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { unsigned TargetOpcode = X86ISD::MOVSS; bool CanBeSimplified; // The splat value for the first packed shift (the 'X' from the example). SDValue Amt1 = Amt->getOperand(0); // The splat value for the second packed shift (the 'Y' from the example). SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : Amt->getOperand(2); // See if it is possible to replace this node with a sequence of // two shifts followed by a MOVSS/MOVSD if (VT == MVT::v4i32) { // Check if it is legal to use a MOVSS. CanBeSimplified = Amt2 == Amt->getOperand(2) && Amt2 == Amt->getOperand(3); if (!CanBeSimplified) { // Otherwise, check if we can still simplify this node using a MOVSD. CanBeSimplified = Amt1 == Amt->getOperand(1) && Amt->getOperand(2) == Amt->getOperand(3); TargetOpcode = X86ISD::MOVSD; Amt2 = Amt->getOperand(2); } } else { // Do similar checks for the case where the machine value type // is MVT::v8i16. CanBeSimplified = Amt1 == Amt->getOperand(1); for (unsigned i=3; i != 8 && CanBeSimplified; ++i) CanBeSimplified = Amt2 == Amt->getOperand(i); if (!CanBeSimplified) { TargetOpcode = X86ISD::MOVSD; CanBeSimplified = true; Amt2 = Amt->getOperand(4); for (unsigned i=0; i != 4 && CanBeSimplified; ++i) CanBeSimplified = Amt1 == Amt->getOperand(i); for (unsigned j=4; j != 8 && CanBeSimplified; ++j) CanBeSimplified = Amt2 == Amt->getOperand(j); } } if (CanBeSimplified && isa(Amt1) && isa(Amt2)) { // Replace this node with two shifts followed by a MOVSS/MOVSD. MVT CastVT = MVT::v4i32; SDValue Splat1 = DAG.getConstant(cast(Amt1)->getAPIntValue(), dl, VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); SDValue Splat2 = DAG.getConstant(cast(Amt2)->getAPIntValue(), dl, VT); SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); if (TargetOpcode == X86ISD::MOVSD) CastVT = MVT::v2i64; SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1); SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2); SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2, BitCast1, DAG); return DAG.getBitcast(VT, Result); } } // v4i32 Non Uniform Shifts. // If the shift amount is constant we can shift each lane using the SSE2 // immediate shifts, else we need to zero-extend each lane to the lower i64 // and shift using the SSE2 variable shifts. // The separate results can then be blended together. if (VT == MVT::v4i32) { unsigned Opc = Op.getOpcode(); SDValue Amt0, Amt1, Amt2, Amt3; if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); } else { // ISD::SHL is handled above but we include it here for completeness. switch (Opc) { default: llvm_unreachable("Unknown target vector shift node"); case ISD::SHL: Opc = X86ISD::VSHL; break; case ISD::SRL: Opc = X86ISD::VSRL; break; case ISD::SRA: Opc = X86ISD::VSRA; break; } // The SSE2 shifts use the lower i64 as the same shift amount for // all lanes and the upper i64 is ignored. These shuffle masks // optimally zero-extend each lanes on SSE2/SSE41/AVX targets. SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); } SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2); SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3); SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); } if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. if (Subtarget->hasSSE41()) { V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); return DAG.getBitcast(SelVT, DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); } // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl); SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 5; // We can safely do this using i16 shifts as we're only interested in // the 3 lower bits of each byte. Amt = DAG.getBitcast(ExtVT, Amt); Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT)); Amt = DAG.getBitcast(VT, Amt); if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) { // r = VSELECT(r, shift(r, 4), a); SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT)); R = SignBitSelect(VT, Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // r = VSELECT(r, shift(r, 2), a); M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT)); R = SignBitSelect(VT, Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // return VSELECT(r, shift(r, 1), a); M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT)); R = SignBitSelect(VT, Amt, M, R); return R; } if (Op->getOpcode() == ISD::SRA) { // For SRA we need to unpack each byte to the higher byte of a i16 vector // so we can correctly sign extend. We don't care what happens to the // lower byte. SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt); SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt); SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R); SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R); ALo = DAG.getBitcast(ExtVT, ALo); AHi = DAG.getBitcast(ExtVT, AHi); RLo = DAG.getBitcast(ExtVT, RLo); RHi = DAG.getBitcast(ExtVT, RHi); // r = VSELECT(r, shift(r, 4), a); SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo, DAG.getConstant(4, dl, ExtVT)); SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi, DAG.getConstant(4, dl, ExtVT)); RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); // a += a ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); // r = VSELECT(r, shift(r, 2), a); MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo, DAG.getConstant(2, dl, ExtVT)); MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi, DAG.getConstant(2, dl, ExtVT)); RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); // a += a ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); // r = VSELECT(r, shift(r, 1), a); MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo, DAG.getConstant(1, dl, ExtVT)); MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi, DAG.getConstant(1, dl, ExtVT)); RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); // Logical shift the result back to the lower byte, leaving a zero upper // byte // meaning that we can safely pack with PACKUSWB. RLo = DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT)); RHi = DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT)); return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } } // It's worth extending once and using the v8i32 shifts for 16-bit types, but // the extra overheads to get from v16i8 to v8i32 make the existing SSE // solution better. if (Subtarget->hasInt256() && VT == MVT::v8i16) { MVT ExtVT = MVT::v8i32; unsigned ExtOpc = Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; R = DAG.getNode(ExtOpc, dl, ExtVT, R); Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); } if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z); SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z); SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R); SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R); ALo = DAG.getBitcast(ExtVT, ALo); AHi = DAG.getBitcast(ExtVT, AHi); RLo = DAG.getBitcast(ExtVT, RLo); RHi = DAG.getBitcast(ExtVT, RHi); SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo); SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi); Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT)); Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT)); return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); } if (VT == MVT::v8i16) { unsigned ShiftOpcode = Op->getOpcode(); auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. if (Subtarget->hasSSE41()) { MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); V0 = DAG.getBitcast(ExtVT, V0); V1 = DAG.getBitcast(ExtVT, V1); Sel = DAG.getBitcast(ExtVT, Sel); return DAG.getBitcast( VT, DAG.getNode(ISD::VSELECT, dl, ExtVT, Sel, V0, V1)); } // On pre-SSE41 targets we splat the sign bit - a negative value will // set all bits of the lanes to true and VSELECT uses that in // its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue C = DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT)); return DAG.getNode(ISD::VSELECT, dl, VT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 12; if (Subtarget->hasSSE41()) { // On SSE41 targets we need to replicate the shift mask in both // bytes for PBLENDVB. Amt = DAG.getNode( ISD::OR, dl, VT, DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)), DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT))); } else { Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)); } // r = VSELECT(r, shift(r, 8), a); SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT)); R = SignBitSelect(Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // r = VSELECT(r, shift(r, 4), a); M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT)); R = SignBitSelect(Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // r = VSELECT(r, shift(r, 2), a); M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT)); R = SignBitSelect(Amt, M, R); // a += a Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); // return VSELECT(r, shift(r, 1), a); M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT)); R = SignBitSelect(Amt, M, R); return R; } // Decompose 256-bit shifts into smaller 128-bit shifts. if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType(); MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); // Extract the two vectors SDValue V1 = Extract128BitVector(R, 0, DAG, dl); SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl); // Recreate the shift amount vectors SDValue Amt1, Amt2; if (Amt.getOpcode() == ISD::BUILD_VECTOR) { // Constant shift amount SmallVector Ops(Amt->op_begin(), Amt->op_begin() + NumElems); ArrayRef Amt1Csts = makeArrayRef(Ops).slice(0, NumElems / 2); ArrayRef Amt2Csts = makeArrayRef(Ops).slice(NumElems / 2); Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts); Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts); } else { // Variable shift amount Amt1 = Extract128BitVector(Amt, 0, DAG, dl); Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl); } // Issue new vector shifts for the smaller types V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); // Concatenate the result back return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); } return SDValue(); } static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); assert(VT.isVector() && "Custom lowering only for vector rotates!"); assert(Subtarget->hasXOP() && "XOP support required for vector rotates!"); assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported"); // XOP has 128-bit vector variable + immediate rotates. // +ve/-ve Amt = rotate left/right. // Split 256-bit integers. if (VT.is256BitVector()) return Lower256IntArith(Op, DAG); assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); // Attempt to rotate by immediate. if (auto *BVAmt = dyn_cast(Amt)) { if (auto *RotateConst = BVAmt->getConstantSplatNode()) { uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue(); assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range"); return DAG.getNode(X86ISD::VPROTI, DL, VT, R, DAG.getConstant(RotateAmt, DL, MVT::i8)); } } // Use general rotate by variable (per-element). return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt); } static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering // looks for this combo and may remove the "setcc" instruction if the "setcc" // has only one use. SDNode *N = Op.getNode(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); unsigned BaseOp = 0; unsigned Cond = 0; SDLoc DL(Op); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. if (isOneConstant(RHS)) { BaseOp = X86ISD::INC; Cond = X86::COND_O; break; } BaseOp = X86ISD::ADD; Cond = X86::COND_O; break; case ISD::UADDO: BaseOp = X86ISD::ADD; Cond = X86::COND_B; break; case ISD::SSUBO: // A subtract of one will be selected as a DEC. Note that DEC doesn't // set CF, so we can't do this for USUBO. if (isOneConstant(RHS)) { BaseOp = X86ISD::DEC; Cond = X86::COND_O; break; } BaseOp = X86ISD::SUB; Cond = X86::COND_O; break; case ISD::USUBO: BaseOp = X86ISD::SUB; Cond = X86::COND_B; break; case ISD::SMULO: BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL; Cond = X86::COND_O; break; case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs if (N->getValueType(0) == MVT::i8) { BaseOp = X86ISD::UMUL8; Cond = X86::COND_O; break; } SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), MVT::i32); SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(X86::COND_O, DL, MVT::i32), SDValue(Sum.getNode(), 2)); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } } // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1), DAG.getConstant(Cond, DL, MVT::i32), SDValue(Sum.getNode(), 1)); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } /// Returns true if the operand type is exactly twice the native width, and /// the corresponding cmpxchg8b or cmpxchg16b instruction is available. /// Used to know whether to use cmpxchg8/16b when expanding atomic operations /// (otherwise we leave them alone to become __sync_fetch_and_... calls). bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) return !Subtarget->is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b else if (OpWidth == 128) return Subtarget->hasCmpxchg16b(); else return false; } bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { return needsCmpXchgNb(SI->getValueOperand()->getType()); } // Note: this turns large loads into lock cmpxchg8b/16b. // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { auto PTy = cast(LI->getPointerOperand()->getType()); return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. if (MemType->getPrimitiveSizeInBits() > NativeWidth) { return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { default: llvm_unreachable("Unknown atomic operation"); case AtomicRMWInst::Xchg: case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. return AtomicExpansionKind::None; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. return !AI->use_empty() ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. return AtomicExpansionKind::CmpXChg; } } static bool hasMFENCE(const X86Subtarget& Subtarget) { // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for // no-sse2). There isn't any reason to disable it if the target processor // supports it. return Subtarget.hasSSE2() || Subtarget.is64Bit(); } LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; Type *MemType = AI->getType(); // Accesses larger than the native width are turned into cmpxchg/libcalls, so // there is no benefit in turning such RMWs into loads, and it is actually // harmful as it introduces a mfence. if (MemType->getPrimitiveSizeInBits() > NativeWidth) return nullptr; auto Builder = IRBuilder<>(AI); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); auto SynchScope = AI->getSynchScope(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); auto Ptr = AI->getPointerOperand(); // Before the load we need a fence. Here is an example lifted from // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence // is required: // Thread 0: // x.store(1, relaxed); // r1 = y.fetch_add(0, release); // Thread 1: // y.fetch_add(42, acquire); // r2 = x.load(relaxed); // r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is // lowered to just a load without a fence. A mfence flushes the store buffer, // making the optimization clearly correct. // FIXME: it is required if isAtLeastRelease(Order) but it is not clear // otherwise, we might be able to be more aggressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. if (SynchScope == SingleThread) // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at // the IR level, so we must wrap it in an intrinsic. return nullptr; if (!hasMFENCE(*Subtarget)) // FIXME: it might make sense to use a locked operation here but on a // different cache-line to prevent cache-line bouncing. In practice it // is probably a small win, and x86 processors without mfence are rare // enough that we do not bother. return nullptr; Function *MFence = llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence); Builder.CreateCall(MFence, {}); // Finally we can emit the atomic load. LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr, AI->getType()->getPrimitiveSizeInBits()); Loaded->setAtomic(Order, SynchScope); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return Loaded; } static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); SynchronizationScope FenceScope = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { if (hasMFENCE(*Subtarget)) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); SDValue Zero = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, dl, MVT::i8), // Scale DAG.getRegister(0, MVT::i32), // Index DAG.getTargetConstant(0, dl, MVT::i32), // Disp DAG.getRegister(0, MVT::i32), // Segment. Zero, Chain }; SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops); return SDValue(Res, 0); } // MEMBARRIER is a compiler barrier; it codegens to a no-op. return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); } static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT T = Op.getSimpleValueType(); SDLoc DL(Op); unsigned Reg = 0; unsigned size = 0; switch(T.SimpleTy) { default: llvm_unreachable("Invalid value type!"); case MVT::i8: Reg = X86::AL; size = 1; break; case MVT::i16: Reg = X86::AX; size = 2; break; case MVT::i32: Reg = X86::EAX; size = 4; break; case MVT::i64: assert(Subtarget->is64Bit() && "Node not type legal!"); Reg = X86::RAX; size = 8; break; } SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue()); SDValue Ops[] = { cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO); SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, MVT::i32, cpOut.getValue(2)); SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1), DAG.getConstant(X86::COND_E, DL, MVT::i8), EFLAGS); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1)); return SDValue(); } static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || SrcVT == MVT::i64) { assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); if (DstVT != MVT::f64) // This conversion needs to be expanded. return SDValue(); SDValue Op0 = Op->getOperand(0); SmallVector Elts; SDLoc dl(Op); unsigned NumElts; MVT SVT; if (SrcVT.isVector()) { NumElts = SrcVT.getVectorNumElements(); SVT = SrcVT.getVectorElementType(); // Widen the vector in input in the case of MVT::v2i32. // Example: from MVT::v2i32 to MVT::v4i32. for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0, DAG.getIntPtrConstant(i, dl))); } else { assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() && "Unexpected source type in LowerBITCAST"); Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, DAG.getIntPtrConstant(0, dl))); Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, DAG.getIntPtrConstant(1, dl))); NumElts = 2; SVT = MVT::i32; } // Explicitly mark the extra elements as Undef. Elts.append(NumElts, DAG.getUNDEF(SVT)); EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts); SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64, DAG.getIntPtrConstant(0, dl)); } assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && "Unexpected custom BITCAST"); // i64 <=> MMX conversions are Legal. if (SrcVT==MVT::i64 && DstVT.isVector()) return Op; if (DstVT==MVT::i64 && SrcVT.isVector()) return Op; // MMX <=> MMX conversions are Legal. if (SrcVT.isVector() && DstVT.isVector()) return Op; // All other conversions need to be expanded. return SDValue(); } /// Compute the horizontal sum of bytes in V for the elements of VT. /// /// Requires V to be a byte vector and VT to be an integer vector type with /// wider elements than V's type. The width of the elements of VT determines /// how many bytes of V are summed horizontally to produce each element of the /// result. static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(V); MVT ByteVecVT = V.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); int NumElts = VT.getVectorNumElements(); assert(ByteVecVT.getVectorElementType() == MVT::i8 && "Expected value to have byte element type."); assert(EltVT != MVT::i8 && "Horizontal byte sum only makes sense for wider elements!"); unsigned VecSize = VT.getSizeInBits(); assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); // PSADBW instruction horizontally add all bytes and leave the result in i64 // chunks, thus directly computes the pop count for v2i64 and v4i64. if (EltVT == MVT::i64) { SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); return DAG.getBitcast(VT, V); } if (EltVT == MVT::i32) { // We unpack the low half and high half into i32s interleaved with zeros so // that we can use PSADBW to horizontally sum them. The most useful part of // this is that it lines up the results of two PSADBW instructions to be // two v2i64 vectors which concatenated are the 4 population counts. We can // then use PACKUSWB to shrink and concatenate them into a v4i32 again. SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL); SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V, Zeros); SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V, Zeros); // Do the horizontal sums into two v2i64s. Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, Low), Zeros); High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, High), Zeros); // Merge them together. MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, DAG.getBitcast(ShortVecVT, Low), DAG.getBitcast(ShortVecVT, High)); return DAG.getBitcast(VT, V); } // The only element type left is i16. assert(EltVT == MVT::i16 && "Unknown how to handle type"); // To obtain pop count for each i16 element starting from the pop count for // i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s // right by 8. It is important to shift as i16s as i8 vector shift isn't // directly supported. SmallVector Shifters(NumElts, DAG.getConstant(8, DL, EltVT)); SDValue Shifter = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), Shifter); V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), DAG.getBitcast(ByteVecVT, V)); return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), Shifter); } static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned VecSize = VT.getSizeInBits(); // Implement a lookup table in register by using an algorithm based on: // http://wm.ite.pl/articles/sse-popcount.html // // The general idea is that every lower byte nibble in the input vector is an // index into a in-register pre-computed pop count table. We then split up the // input vector in two new ones: (1) a vector with only the shifted-right // higher nibbles for each byte and (2) a vector with the lower nibbles (and // masked out higher ones) for each byte. PSHUB is used separately with both // to index the in-register table. Next, both are added and the result is a // i8 vector where each element contains the pop count for input byte. // // To obtain the pop count for elements != i8, we follow up with the same // approach and use additional tricks as described below. // const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2, /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3, /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3, /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4}; int NumByteElts = VecSize / 8; MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts); SDValue In = DAG.getBitcast(ByteVecVT, Op); SmallVector LUTVec; for (int i = 0; i < NumByteElts; ++i) LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec); SmallVector Mask0F(NumByteElts, DAG.getConstant(0x0F, DL, MVT::i8)); SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Mask0F); // High nibbles SmallVector Four(NumByteElts, DAG.getConstant(4, DL, MVT::i8)); SDValue FourV = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Four); SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV); // Low nibbles SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F); // The input vector is used as the shuffle mask that index elements into the // LUT. After counting low and high nibbles, add the vector to obtain the // final pop count per i8 element. SDValue HighPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles); SDValue LowPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles); SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt); if (EltVT == MVT::i8) return PopCnt; return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG); } static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is128BitVector() && "Only 128-bit vector bitmath lowering supported."); int VecSize = VT.getSizeInBits(); MVT EltVT = VT.getVectorElementType(); int Len = EltVT.getSizeInBits(); // This is the vectorized version of the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel // with a minor tweak to use a series of adds + shifts instead of vector // multiplications. Implemented for all integer vector types. We only use // this when we don't have SSSE3 which allows a LUT-based lowering that is // much faster, even faster than using native popcnt instructions. auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) { MVT VT = V.getSimpleValueType(); SmallVector Shifters( VT.getVectorNumElements(), DAG.getConstant(Shifter, DL, VT.getVectorElementType())); return DAG.getNode(OpCode, DL, VT, V, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters)); }; auto GetMask = [&](SDValue V, APInt Mask) { MVT VT = V.getSimpleValueType(); SmallVector Masks( VT.getVectorNumElements(), DAG.getConstant(Mask, DL, VT.getVectorElementType())); return DAG.getNode(ISD::AND, DL, VT, V, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Masks)); }; // We don't want to incur the implicit masks required to SRL vNi8 vectors on // x86, so set the SRL type to have elements at least i16 wide. This is // correct because all of our SRLs are followed immediately by a mask anyways // that handles any bits that sneak into the high bits of the byte elements. MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16); SDValue V = Op; // v = v - ((v >> 1) & 0x55555555...) SDValue Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1)); SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55))); V = DAG.getNode(ISD::SUB, DL, VT, V, And); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33))); Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2)); SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33))); V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS); // v = (v + (v >> 4)) & 0x0F0F0F0F... Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl); V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F))); // At this point, V contains the byte-wise population count, and we are // merely doing a horizontal sum if necessary to get the wider element // counts. if (EltVT == MVT::i8) return V; return LowerHorizontalByteSum( DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget, DAG); } static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); // FIXME: Need to add AVX-512 support here! assert((VT.is256BitVector() || VT.is128BitVector()) && "Unknown CTPOP type to handle"); SDLoc DL(Op.getNode()); SDValue Op0 = Op.getOperand(0); if (!Subtarget->hasSSSE3()) { // We can't use the fast LUT approach, so fall back on vectorized bitmath. assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG); } if (VT.is256BitVector() && !Subtarget->hasInt256()) { unsigned NumElems = VT.getVectorNumElements(); // Extract each 128-bit vector, compute pop count and concat the result. SDValue LHS = Extract128BitVector(Op0, 0, DAG, DL); SDValue RHS = Extract128BitVector(Op0, NumElems/2, DAG, DL); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG), LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG)); } return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); } static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Op.getSimpleValueType().isVector() && "We only do custom lowering for vector population count."); return LowerVectorCTPOP(Op, Subtarget, DAG); } static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); SDLoc dl(Node); EVT T = Node->getValueType(0); SDValue negOp = DAG.getNode(ISD::SUB, dl, T, DAG.getConstant(0, dl, T), Node->getOperand(2)); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), negOp, cast(Node)->getMemOperand(), cast(Node)->getOrdering(), cast(Node)->getSynchScope()); } static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); SDLoc dl(Node); EVT VT = cast(Node)->getMemoryVT(); // Convert seq_cst store -> xchg // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b) // FIXME: On 32-bit, store -> fist or movq would be more efficient // (The only way to get a 16-byte store is cmpxchg16b) // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment. if (cast(Node)->getOrdering() == SequentiallyConsistent || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), cast(Node)->getMemOperand(), cast(Node)->getOrdering(), cast(Node)->getSynchScope()); return Swap.getValue(1); } // Other atomic stores have a simple pattern. return Op; } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getNode()->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = X86ISD::ADD; break; case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break; case ISD::SUBC: Opc = X86ISD::SUB; break; case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break; } if (!ExtraOp) return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values as { float, float } (in XMM0) or // { double, double } (which is returned in XMM0, XMM1). SDLoc dl(Op); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); bool isF64 = ArgVT == MVT::f64; // Only optimize x86_64 for now. i386 is a bit messy. For f32, // the small struct {f32, f32} is returned in (eax, edx). For f64, // the results are returned via SRet in memory. const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret"; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = isF64 ? (Type*)StructType::get(ArgTy, ArgTy, nullptr) : (Type*)VectorType::get(ArgTy, 4); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0); std::pair CallResult = TLI.LowerCallTo(CLI); if (isF64) // Returned in xmm0 and xmm1. return CallResult.first; // Returned in bits 0:31 and 32:64 xmm0. SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, DAG.getIntPtrConstant(0, dl)); SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, DAG.getIntPtrConstant(1, dl)); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } /// Widen a vector input to a vector of NVT. The /// input vector must have the same element type as NVT. static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, bool FillWithZeroes = false) { // Check if InOp already has the right width. MVT InVT = InOp.getSimpleValueType(); if (InVT == NVT) return InOp; if (InOp.isUndef()) return DAG.getUNDEF(NVT); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); unsigned InNumElts = InVT.getVectorNumElements(); unsigned WidenNumElts = NVT.getVectorNumElements(); assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && "Unexpected request for vector widening"); EVT EltVT = NVT.getVectorElementType(); SDLoc dl(InOp); if (InOp.getOpcode() == ISD::CONCAT_VECTORS && InOp.getNumOperands() == 2) { SDValue N1 = InOp.getOperand(1); if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || N1.isUndef()) { InOp = InOp.getOperand(0); InVT = InOp.getSimpleValueType(); InNumElts = InVT.getVectorNumElements(); } } if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { SmallVector Ops; for (unsigned i = 0; i < InNumElts; ++i) Ops.push_back(InOp.getOperand(i)); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) Ops.push_back(FillVal); return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : DAG.getUNDEF(NVT); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, InOp, DAG.getIntPtrConstant(0, dl)); } static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && "MGATHER/MSCATTER are supported on AVX-512 arch only"); // X86 scatter kills mask register, so its type should be added to // the list of return values. // If the "scatter" has 2 return values, it is already handled. if (Op.getNode()->getNumValues() == 2) return Op; MaskedScatterSDNode *N = cast(Op.getNode()); SDValue Src = N->getValue(); MVT VT = Src.getSimpleValueType(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); SDLoc dl(Op); SDValue NewScatter; SDValue Index = N->getIndex(); SDValue Mask = N->getMask(); SDValue Chain = N->getChain(); SDValue BasePtr = N->getBasePtr(); MVT MemVT = N->getMemoryVT().getSimpleVT(); MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) { // The v2i32 value was promoted to v2i64. // Now we "redo" the type legalizer's work and widen the original // v2i32 value to v4i32. The original v2i32 is retrieved from v2i64 // with a shuffle. assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) && "Unexpected memory type"); int ShuffleMask[] = {0, 2, -1, -1}; Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src), DAG.getUNDEF(MVT::v4i32), ShuffleMask); // Now we have 4 elements instead of 2. // Expand the index. MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4); Index = ExtendToType(Index, NewIndexVT, DAG); // Expand the mask with zeroes // Mask may be <2 x i64> or <2 x i1> at this moment assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) && "Unexpected mask type"); MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4); Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); VT = MVT::v4i32; } unsigned NumElts = VT.getVectorNumElements(); if (!Subtarget->hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { // AVX512F supports only 512-bit vectors. Or data or index should // be 512 bit wide. If now the both index and data are 256-bit, but // the vector contains 8 elements, we just sign-extend the index if (IndexVT == MVT::v8i32) // Just extend index Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); else { // The minimal number of elts in scatter is 8 NumElts = 8; // Index MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); // Use original index here, do not modify the index twice Index = ExtendToType(N->getIndex(), NewIndexVT, DAG); if (IndexVT.getScalarType() == MVT::i32) Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); // Mask // At this point we have promoted mask operand assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); // Use the original mask here, do not modify the mask twice Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true); // The value that should be stored MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); Src = ExtendToType(Src, NewVT, DAG); } } // If the mask is "wide" at this point - truncate it to i1 vector MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts); Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask); // The mask is killed by scatter, add it to the values SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index}; NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops, N->getMemOperand()); DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); return SDValue(NewScatter.getNode(), 0); } static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); MVT VT = Op.getSimpleValueType(); SDValue Mask = N->getMask(); SDLoc dl(Op); if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); SDValue Src0 = N->getSrc0(); Src0 = ExtendToType(Src0, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), N->getExtensionType()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), DAG.getIntPtrConstant(0, dl)); SDValue RetOps[] = {Exract, NewLoad.getValue(1)}; return DAG.getMergeValues(RetOps, dl); } return Op; } static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MaskedStoreSDNode *N = cast(Op.getNode()); SDValue DataToStore = N->getValue(); MVT VT = DataToStore.getSimpleValueType(); SDValue Mask = N->getMask(); SDLoc dl(Op); if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), N->isTruncatingStore()); } return Op; } static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && "MGATHER/MSCATTER are supported on AVX-512 arch only"); MaskedGatherSDNode *N = cast(Op.getNode()); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); SDValue Index = N->getIndex(); SDValue Mask = N->getMask(); SDValue Src0 = N->getValue(); MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); unsigned NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); if (!Subtarget->hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { // AVX512F supports only 512-bit vectors. Or data or index should // be 512 bit wide. If now the both index and data are 256-bit, but // the vector contains 8 elements, we just sign-extend the index if (NumElts == 8) { Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), Index }; DAG.UpdateNodeOperands(N, Ops); return Op; } // Minimal number of elements in Gather NumElts = 8; // Index MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); Index = ExtendToType(Index, NewIndexVT, DAG); if (IndexVT.getScalarType() == MVT::i32) Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); // Mask MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); // At this point we have promoted mask operand assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); // The pass-thru value MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); Src0 = ExtendToType(Src0, NewVT, DAG); SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewGather.getValue(0), DAG.getIntPtrConstant(0, dl)); SDValue RetOps[] = {Exract, NewGather.getValue(1)}; return DAG.getMergeValues(RetOps, dl); } return Op; } SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const { // TODO: Eventually, the lowering of these nodes should be informed by or // deferred to the GC strategy for the function in which they appear. For // now, however, they must be lowered to something. Since they are logically // no-ops in the case of a null GC strategy (or a GC strategy which does not // require special handling for these nodes), lower them as literal NOOPs for // the time being. SmallVector Ops; Ops.push_back(Op.getOperand(0)); if (Op->getGluedNode()) Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); SDLoc OpDL(Op); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); return NOOP; } SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const { // TODO: Eventually, the lowering of these nodes should be informed by or // deferred to the GC strategy for the function in which they appear. For // now, however, they must be lowered to something. Since they are logically // no-ops in the case of a null GC strategy (or a GC strategy which does not // require special handling for these nodes), lower them as literal NOOPs for // the time being. SmallVector Ops; Ops.push_back(Op.getOperand(0)); if (Op->getGluedNode()) Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); SDLoc OpDL(Op); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); return NOOP; } /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::SHL_PARTS: case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); case ISD::SIGN_EXTEND_VECTOR_INREG: return LowerSIGN_EXTEND_VECTOR_INREG(Op, Subtarget, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG); case ISD::FABS: case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, Subtarget, DAG); case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, Subtarget, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); case ISD::UMUL_LOHI: case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG); case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: case ISD::USUBO: case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: case ISD::UMIN: return LowerMINMAX(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); case ISD::GC_TRANSITION_START: return LowerGC_TRANSITION_START(Op, DAG); case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG); } } /// ReplaceNodeResults - Replace a node with an illegal result type /// with a new node built out of custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { SDLoc dl(N); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); case X86ISD::AVG: { // Legalize types for X86ISD::AVG by expanding vectors. assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); auto InVT = N->getValueType(0); auto InVTSize = InVT.getSizeInBits(); const unsigned RegSize = (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128; assert((!Subtarget->hasAVX512() || RegSize < 512) && "512-bit vector requires AVX512"); assert((!Subtarget->hasAVX2() || RegSize < 256) && "256-bit vector requires AVX2"); auto ElemVT = InVT.getVectorElementType(); auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, RegSize / ElemVT.getSizeInBits()); assert(RegSize % InVT.getSizeInBits() == 0); unsigned NumConcat = RegSize / InVT.getSizeInBits(); SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = N->getOperand(0); SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); Ops[0] = N->getOperand(1); SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1); Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res, DAG.getIntPtrConstant(0, dl))); return; } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: case X86ISD::FMAXC: case X86ISD::FMAX: { EVT VT = N->getValueType(0); assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); SDValue UNDEF = DAG.getUNDEF(VT); SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(0), UNDEF); SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(1), UNDEF); Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); return; } case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: // We don't want to expand or promote these. return; case ISD::SDIV: case ISD::UDIV: case ISD::SREM: case ISD::UREM: case ISD::SDIVREM: case ISD::UDIVREM: { SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); Results.push_back(V); return; } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; std::pair Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode()) { EVT VT = N->getValueType(0); // Return a load from the stack slot. if (StackSlot.getNode()) Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo(), false, false, false, 0)); else Results.push_back(FIST); } return; } case ISD::UINT_TO_FP: { assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N->getOperand(0)); SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, DAG.getBitcast(MVT::v2i64, VBias)); Or = DAG.getBitcast(MVT::v2f64, Or); // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; } case ISD::FP_ROUND: { if (!TLI.isTypeLegal(N->getOperand(0).getValueType())) return; SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); return; } case ISD::FP_EXTEND: { // Right now, only MVT::v2f32 has OperationAction for FP_EXTEND. // No other ValueType for FP_EXTEND should reach this point. assert(N->getValueType(0) == MVT::v2f32 && "Do not know how to legalize this Node"); return; } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default : llvm_unreachable("Do not know how to custom type " "legalize this intrinsic operation!"); case Intrinsic::x86_rdtsc: return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); case Intrinsic::x86_rdtscp: return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget, Results); case Intrinsic::x86_rdpmc: return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results); } } case ISD::INTRINSIC_WO_CHAIN: { if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)) Results.push_back(V); return; } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(0, dl, HalfT)); cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(1, dl, HalfT)); cpInL = DAG.getCopyToReg(N->getOperand(0), dl, Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue()); cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, Regs64bit ? X86::RDX : X86::EDX, cpInH, cpInL.getValue(1)); SDValue swapInL, swapInH; swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), DAG.getConstant(0, dl, HalfT)); swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), DAG.getConstant(1, dl, HalfT)); swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RBX : X86::EBX, swapInL, cpInH.getValue(1)); swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX, swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), swapInH.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(N)->getMemOperand(); unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, Regs64bit ? X86::RDX : X86::EDX, HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, MVT::i32, cpOutH.getValue(2)); SDValue Success = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86::COND_E, dl, MVT::i8), EFLAGS); Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); Results.push_back(Success); Results.push_back(EFLAGS.getValue(1)); return; } case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_MIN: case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_LOAD: { // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; } case ISD::BITCAST: { assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); EVT DstVT = N->getValueType(0); EVT SrcVT = N->getOperand(0)->getValueType(0); if (SrcVT != MVT::f64 || (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8)) return; unsigned NumElts = DstVT.getVectorNumElements(); EVT SVT = DstVT.getVectorElementType(); EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded); if (ExperimentalVectorWideningLegalization) { // If we are legalizing vectors by widening, we already have the desired // legal vector type, just return it. Results.push_back(ToVecInt); return; } SmallVector Elts; for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, ToVecInt, DAG.getIntPtrConstant(i, dl))); Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts)); } } } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((X86ISD::NodeType)Opcode) { case X86ISD::FIRST_NUMBER: break; case X86ISD::BSF: return "X86ISD::BSF"; case X86ISD::BSR: return "X86ISD::BSR"; case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::FAND: return "X86ISD::FAND"; case X86ISD::FANDN: return "X86ISD::FANDN"; case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; case X86ISD::CALL: return "X86ISD::CALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG"; case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG"; case X86ISD::BT: return "X86ISD::BT"; case X86ISD::CMP: return "X86ISD::CMP"; case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::CMPM: return "X86ISD::CMPM"; case X86ISD::CMPMU: return "X86ISD::CMPMU"; case X86ISD::CMPM_RND: return "X86ISD::CMPM_RND"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::FSETCC: return "X86ISD::FSETCC"; case X86ISD::FGETSIGNx86: return "X86ISD::FGETSIGNx86"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; case X86ISD::IRET: return "X86ISD::IRET"; case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND"; case X86ISD::ADDUS: return "X86ISD::ADDUS"; case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP"; case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VZEXT: return "X86ISD::VZEXT"; case X86ISD::VSEXT: return "X86ISD::VSEXT"; case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD"; case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD"; case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; case X86ISD::VSRL: return "X86ISD::VSRL"; case X86ISD::VSRA: return "X86ISD::VSRA"; case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; case X86ISD::CMPP: return "X86ISD::CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM"; case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM"; case X86ISD::ADD: return "X86ISD::ADD"; case X86ISD::SUB: return "X86ISD::SUB"; case X86ISD::ADC: return "X86ISD::ADC"; case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SMUL: return "X86ISD::SMUL"; case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::SMUL8: return "X86ISD::SMUL8"; case X86ISD::UMUL8: return "X86ISD::UMUL8"; case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG"; case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG"; case X86ISD::INC: return "X86ISD::INC"; case X86ISD::DEC: return "X86ISD::DEC"; case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; case X86ISD::BEXTR: return "X86ISD::BEXTR"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; case X86ISD::TESTM: return "X86ISD::TESTM"; case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::PACKSS: return "X86ISD::PACKSS"; case X86ISD::PACKUS: return "X86ISD::PACKUS"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::VALIGN: return "X86ISD::VALIGN"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; case X86ISD::SHUFP: return "X86ISD::SHUFP"; case X86ISD::SHUF128: return "X86ISD::SHUF128"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; case X86ISD::MOVLPS: return "X86ISD::MOVLPS"; case X86ISD::MOVLPD: return "X86ISD::MOVLPD"; case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; case X86ISD::PSADBW: return "X86ISD::PSADBW"; case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::MFENCE: return "X86ISD::MFENCE"; case X86ISD::SFENCE: return "X86ISD::SFENCE"; case X86ISD::LFENCE: return "X86ISD::LFENCE"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; case X86ISD::RDSEED: return "X86ISD::RDSEED"; case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW"; case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD"; case X86ISD::VPROT: return "X86ISD::VPROT"; case X86ISD::VPROTI: return "X86ISD::VPROTI"; case X86ISD::VPSHA: return "X86ISD::VPSHA"; case X86ISD::VPSHL: return "X86ISD::VPSHL"; case X86ISD::VPCOM: return "X86ISD::VPCOM"; case X86ISD::VPCOMU: return "X86ISD::VPCOMU"; case X86ISD::FMADD: return "X86ISD::FMADD"; case X86ISD::FMSUB: return "X86ISD::FMSUB"; case X86ISD::FNMADD: return "X86ISD::FNMADD"; case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND"; case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND"; case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND"; case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; case X86ISD::VREDUCE: return "X86ISD::VREDUCE"; case X86ISD::VGETMANT: return "X86ISD::VGETMANT"; case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; case X86ISD::XTEST: return "X86ISD::XTEST"; case X86ISD::COMPRESS: return "X86ISD::COMPRESS"; case X86ISD::EXPAND: return "X86ISD::EXPAND"; case X86ISD::SELECT: return "X86ISD::SELECT"; case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; case X86ISD::RCP28: return "X86ISD::RCP28"; case X86ISD::EXP2: return "X86ISD::EXP2"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; case X86ISD::AVG: return "X86ISD::AVG"; case X86ISD::MULHRS: return "X86ISD::MULHRS"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND"; case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND"; case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; } return nullptr; } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); Reloc::Model R = getTargetMachine().getRelocationModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr)) return false; if (AM.BaseGV) { unsigned GVFlags = Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine()); // If a reference to this global requires an extra load, we can't fold it. if (isGlobalStubReference(GVFlags)) return false; // If BaseGV requires a register for the PIC base, we cannot also have a // BaseReg specified. if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) return false; // If lower 4G is not available, then we must use rip-relative addressing. if ((M != CodeModel::Small || R != Reloc::Static) && Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) return false; } switch (AM.Scale) { case 0: case 1: case 2: case 4: case 8: // These scales always work. break; case 3: case 5: case 9: // These scales are formed with basereg+scalereg. Only accept if there is // no basereg yet. if (AM.HasBaseReg) return false; break; default: // Other stuff never works. return false; } return true; } bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { unsigned Bits = Ty->getScalarSizeInBits(); // 8-bit shifts are always expensive, but versions with a scalar amount aren't // particularly cheaper than those without. if (Bits == 8) return false; // On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make // variable shifts just as cheap as scalar ones. if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64)) return false; // Otherwise, it's significantly cheaper to shift by a scalar amount than by a // fully general vector. return true; } bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 > NumBits2; } bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; if (!isTypeLegal(EVT::getEVT(Ty1))) return false; assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); // Assuming the caller doesn't have a zeroext or signext return parameter, // truncation all the way down to i1 is valid. return true; } bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<32>(Imm); } bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { // Can also use sub to handle negated immediates. return isInt<32>(Imm); } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 > NumBits2; } bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { EVT VT1 = Val.getValueType(); if (isZExtFree(VT1, VT2)) return true; if (Val.getOpcode() != ISD::LOAD) return false; if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() || !VT2.isInteger()) return false; switch (VT1.getSimpleVT().SimpleTy) { default: break; case MVT::i8: case MVT::i16: case MVT::i32: // X86 has 8, 16, and 32-bit zero-extending loads. return true; } return false; } bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; } bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { if (!Subtarget->hasAnyFMA()) return false; VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; default: break; } return false; } bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { // i16 instructions are longer (0x66 prefix) and potentially slower. return !(VT1 == MVT::i32 && VT2 == MVT::i16); } /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const { if (!VT.isSimple()) return false; // Not for i1 vectors if (VT.getSimpleVT().getScalarType() == MVT::i1) return false; // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSimpleVT().getSizeInBits() == 64) return false; // We only care that the types being shuffled are legal. The lowering can // handle any possible shuffle mask that results. return isTypeLegal(VT.getSimpleVT()); } bool X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, EVT VT) const { // Just delegate to the generic legality, clear masks aren't special. return isShuffleMaskLegal(Mask, VT); } //===----------------------------------------------------------------------===// // X86 Scheduler Hooks //===----------------------------------------------------------------------===// /// Utility function to emit xbegin specifying the start of an RTM region. static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII) { DebugLoc DL = MI->getDebugLoc(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // For the v = xbegin(), we generate // // thisMBB: // xbegin sinkMBB // // mainMBB: // eax = -1 // // sinkMBB: // v = eax MachineBasicBlock *thisMBB = MBB; MachineFunction *MF = MBB->getParent(); MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // thisMBB: // xbegin sinkMBB // # fallthrough to mainMBB // # abortion to sinkMBB BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB); thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(sinkMBB); // mainMBB: // EAX = -1 BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1); mainMBB->addSuccessor(sinkMBB); // sinkMBB: // EAX is live into the sinkMBB sinkMBB->addLiveIn(X86::EAX); BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::EAX); MI->eraseFromParent(); return sinkMBB; } // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII) { unsigned Opc; switch (MI->getOpcode()) { default: llvm_unreachable("illegal opcode!"); case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break; case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break; case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break; case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break; case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break; case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break; case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break; case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break; } DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); unsigned NumArgs = MI->getNumOperands(); for (unsigned i = 1; i < NumArgs; ++i) { MachineOperand &Op = MI->getOperand(i); if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } if (MI->hasOneMemOperand()) MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); return BB; } // FIXME: Custom handling because TableGen doesn't support multiple implicit // defs in an instruction pattern static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII) { unsigned Opc; switch (MI->getOpcode()) { default: llvm_unreachable("illegal opcode!"); case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break; case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break; case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break; case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break; case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break; case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break; case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break; case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break; } DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); unsigned NumArgs = MI->getNumOperands(); // remove the results for (unsigned i = 1; i < NumArgs; ++i) { MachineOperand &Op = MI->getOperand(i); if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } if (MI->hasOneMemOperand()) MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::ECX); MI->eraseFromParent(); return BB; } static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB, const X86Subtarget *Subtarget) { DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); // insert input VAL into EAX BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) .addReg(MI->getOperand(0).getReg()); // insert zero to ECX BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX) .addReg(X86::ECX) .addReg(X86::ECX); // insert zero to EDX BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::EDX) .addReg(X86::EDX) .addReg(X86::EDX); // insert WRPKRU instruction BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr)); MI->eraseFromParent(); // The pseudo is gone now. return BB; } static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB, const X86Subtarget *Subtarget) { DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); // insert zero to ECX BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX) .addReg(X86::ECX) .addReg(X86::ECX); // insert RDPKRU instruction BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr)); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::EAX); MI->eraseFromParent(); // The pseudo is gone now. return BB; } static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB, const X86Subtarget *Subtarget) { DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); // Address into RAX/EAX, other two args into ECX, EDX. unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); for (int i = 0; i < X86::AddrNumOperands; ++i) MIB.addOperand(MI->getOperand(i)); unsigned ValOps = X86::AddrNumOperands; BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) .addReg(MI->getOperand(ValOps).getReg()); BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) .addReg(MI->getOperand(ValOps+1).getReg()); // The instruction doesn't actually take any operands though. BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr)); MI->eraseFromParent(); // The pseudo is gone now. return BB; } MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { // Emit va_arg instruction on X86-64. // Operands to this pseudo-instruction: // 0 ) Output : destination address (reg) // 1-5) Input : va_list address (addr, i64mem) // 6 ) ArgSize : Size (in bytes) of vararg type // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset // 8 ) Align : Alignment of type // 9 ) EFLAGS (implicit-def) assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); static_assert(X86::AddrNumOperands == 5, "VAARG_64 assumes 5 address operands"); unsigned DestReg = MI->getOperand(0).getReg(); MachineOperand &Base = MI->getOperand(1); MachineOperand &Scale = MI->getOperand(2); MachineOperand &Index = MI->getOperand(3); MachineOperand &Disp = MI->getOperand(4); MachineOperand &Segment = MI->getOperand(5); unsigned ArgSize = MI->getOperand(6).getImm(); unsigned ArgMode = MI->getOperand(7).getImm(); unsigned Align = MI->getOperand(8).getImm(); // Memory Reference assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand"); MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); // Machine Information const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); DebugLoc DL = MI->getDebugLoc(); // struct va_list { // i32 gp_offset // i32 fp_offset // i64 overflow_area (address) // i64 reg_save_area (address) // } // sizeof(va_list) = 24 // alignment(va_list) = 8 unsigned TotalNumIntRegs = 6; unsigned TotalNumXMMRegs = 8; bool UseGPOffset = (ArgMode == 1); bool UseFPOffset = (ArgMode == 2); unsigned MaxOffset = TotalNumIntRegs * 8 + (UseFPOffset ? TotalNumXMMRegs * 16 : 0); /* Align ArgSize to a multiple of 8 */ unsigned ArgSizeA8 = (ArgSize + 7) & ~7; bool NeedsAlign = (Align > 8); MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *overflowMBB; MachineBasicBlock *offsetMBB; MachineBasicBlock *endMBB; unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB unsigned OffsetReg = 0; if (!UseGPOffset && !UseFPOffset) { // If we only pull from the overflow region, we don't create a branch. // We don't need to alter control flow. OffsetDestReg = 0; // unused OverflowDestReg = DestReg; offsetMBB = nullptr; overflowMBB = thisMBB; endMBB = thisMBB; } else { // First emit code to check if gp_offset (or fp_offset) is below the bound. // If so, pull the argument from reg_save_area. (branch to offsetMBB) // If not, pull from overflow_area. (branch to overflowMBB) // // thisMBB // | . // | . // offsetMBB overflowMBB // | . // | . // endMBB // Registers for the PHI in endMBB OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineFunction *MF = MBB->getParent(); overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); endMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator MBBIter = ++MBB->getIterator(); // Insert the new basic blocks MF->insert(MBBIter, offsetMBB); MF->insert(MBBIter, overflowMBB); MF->insert(MBBIter, endMBB); // Transfer the remainder of MBB and its successor edges to endMBB. endMBB->splice(endMBB->begin(), thisMBB, std::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Make offsetMBB and overflowMBB successors of thisMBB thisMBB->addSuccessor(offsetMBB); thisMBB->addSuccessor(overflowMBB); // endMBB is a successor of both offsetMBB and overflowMBB offsetMBB->addSuccessor(endMBB); overflowMBB->addSuccessor(endMBB); // Load the offset value into a register OffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) .addOperand(Base) .addOperand(Scale) .addOperand(Index) .addDisp(Disp, UseFPOffset ? 4 : 0) .addOperand(Segment) .setMemRefs(MMOBegin, MMOEnd); // Check if there is enough room left to pull this argument. BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) .addReg(OffsetReg) .addImm(MaxOffset + 8 - ArgSizeA8); // Branch to "overflowMBB" if offset >= max // Fall through to "offsetMBB" otherwise BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE))) .addMBB(overflowMBB); } // In offsetMBB, emit code to use the reg_save_area. if (offsetMBB) { assert(OffsetReg != 0); // Read the reg_save_area address. unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) .addOperand(Base) .addOperand(Scale) .addOperand(Index) .addDisp(Disp, 16) .addOperand(Segment) .setMemRefs(MMOBegin, MMOEnd); // Zero-extend the offset unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) .addImm(0) .addReg(OffsetReg) .addImm(X86::sub_32bit); // Add the offset to the reg_save_area to get the final address. BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) .addReg(OffsetReg64) .addReg(RegSaveReg); // Compute the offset for the next argument unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) .addReg(OffsetReg) .addImm(UseFPOffset ? 16 : 8); // Store it back into the va_list. BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) .addOperand(Base) .addOperand(Scale) .addOperand(Index) .addDisp(Disp, UseFPOffset ? 4 : 0) .addOperand(Segment) .addReg(NextOffsetReg) .setMemRefs(MMOBegin, MMOEnd); // Jump to endMBB BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) .addMBB(endMBB); } // // Emit code to use overflow area // // Load the overflow_area address into a register. unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) .addOperand(Base) .addOperand(Scale) .addOperand(Index) .addDisp(Disp, 8) .addOperand(Segment) .setMemRefs(MMOBegin, MMOEnd); // If we need to align it, do so. Otherwise, just copy the address // to OverflowDestReg. if (NeedsAlign) { // Align the overflow address assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2"); unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); // aligned_addr = (addr + (align-1)) & ~(align-1) BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) .addReg(OverflowAddrReg) .addImm(Align-1); BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) .addReg(TmpReg) .addImm(~(uint64_t)(Align-1)); } else { BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) .addReg(OverflowAddrReg); } // Compute the next overflow address after this argument. // (the overflow address should be kept 8-byte aligned) unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) .addReg(OverflowDestReg) .addImm(ArgSizeA8); // Store the new overflow address. BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) .addOperand(Base) .addOperand(Scale) .addOperand(Index) .addDisp(Disp, 8) .addOperand(Segment) .addReg(NextAddrReg) .setMemRefs(MMOBegin, MMOEnd); // If we branched, emit the PHI to the front of endMBB. if (offsetMBB) { BuildMI(*endMBB, endMBB->begin(), DL, TII->get(X86::PHI), DestReg) .addReg(OffsetDestReg).addMBB(offsetMBB) .addReg(OverflowDestReg).addMBB(overflowMBB); } // Erase the pseudo instruction MI->eraseFromParent(); return endMBB; } MachineBasicBlock * X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { // Emit code to save XMM registers to the stack. The ABI says that the // number of registers to save is given in %al, so it's theoretically // possible to do an indirect jump trick to avoid saving all of them, // however this code takes a simpler approach and just executes all // of the stores if %al is non-zero. It's less code, and it's probably // easier on the hardware branch predictor, and stores aren't all that // expensive anyway. // Create the new basic blocks. One block contains all the XMM stores, // and one block is the final destination regardless of whether any // stores were performed. const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineFunction *F = MBB->getParent(); MachineFunction::iterator MBBIter = ++MBB->getIterator(); MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(MBBIter, XMMSaveMBB); F->insert(MBBIter, EndMBB); // Transfer the remainder of MBB and its successor edges to EndMBB. EndMBB->splice(EndMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); EndMBB->transferSuccessorsAndUpdatePHIs(MBB); // The original block will now fall through to the XMM save block. MBB->addSuccessor(XMMSaveMBB); // The XMMSaveMBB will fall through to the end block. XMMSaveMBB->addSuccessor(EndMBB); // Now add the instructions. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned CountReg = MI->getOperand(0).getReg(); int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } // Make sure the last operand is EFLAGS, which gets clobbered by the branch // that was just emitted, but clearly shouldn't be "saved". assert((MI->getNumOperands() <= 3 || !MI->getOperand(MI->getNumOperands() - 1).isReg() || MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS) && "Expected last argument to be EFLAGS"); unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; MachineMemOperand *MMO = F->getMachineMemOperand( MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset), MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) .addFrameIndex(RegSaveFrameIndex) .addImm(/*Scale=*/1) .addReg(/*IndexReg=*/0) .addImm(/*Disp=*/Offset) .addReg(/*Segment=*/0) .addReg(MI->getOperand(i).getReg()) .addMemOperand(MMO); } MI->eraseFromParent(); // The pseudo instruction is gone now. return EndMBB; } // The EFLAGS operand of SelectItr might be missing a kill marker // because there were multiple uses of EFLAGS, and ISel didn't know // which to mark. Figure out whether SelectItr should have had a // kill marker, and set it if it should. Returns the correct kill // marker value. static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock* BB, const TargetRegisterInfo* TRI) { // Scan forward through BB for a use/def of EFLAGS. MachineBasicBlock::iterator miI(std::next(SelectItr)); for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) { const MachineInstr& mi = *miI; if (mi.readsRegister(X86::EFLAGS)) return false; if (mi.definesRegister(X86::EFLAGS)) break; // Should have kill-flag - update below. } // If we hit the end of the block, check whether EFLAGS is live into a // successor. if (miI == BB->end()) { for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), sEnd = BB->succ_end(); sItr != sEnd; ++sItr) { MachineBasicBlock* succ = *sItr; if (succ->isLiveIn(X86::EFLAGS)) return false; } } // We found a def, or hit the end of the basic block and EFLAGS wasn't live // out. SelectMI should have a kill flag on EFLAGS. SelectItr->addRegisterKilled(X86::EFLAGS, TRI); return true; } // Return true if it is OK for this CMOV pseudo-opcode to be cascaded // together with other CMOV pseudo-opcodes into a single basic-block with // conditional jump around it. static bool isCMOVPseudo(MachineInstr *MI) { switch (MI->getOpcode()) { case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: case X86::CMOV_V2F64: case X86::CMOV_V2I64: case X86::CMOV_V4F32: case X86::CMOV_V4F64: case X86::CMOV_V4I64: case X86::CMOV_V16F32: case X86::CMOV_V8F32: case X86::CMOV_V8F64: case X86::CMOV_V8I64: case X86::CMOV_V8I1: case X86::CMOV_V16I1: case X86::CMOV_V32I1: case X86::CMOV_V64I1: return true; default: return false; } } MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); // This code lowers all pseudo-CMOV instructions. Generally it lowers these // as described above, by inserting a BB, and then making a PHI at the join // point to select the true and false operands of the CMOV in the PHI. // // The code also handles two different cases of multiple CMOV opcodes // in a row. // // Case 1: // In this case, there are multiple CMOVs in a row, all which are based on // the same condition setting (or the exact opposite condition setting). // In this case we can lower all the CMOVs using a single inserted BB, and // then make a number of PHIs at the join point to model the CMOVs. The only // trickiness here, is that in a case like: // // t2 = CMOV cond1 t1, f1 // t3 = CMOV cond1 t2, f2 // // when rewriting this into PHIs, we have to perform some renaming on the // temps since you cannot have a PHI operand refer to a PHI result earlier // in the same block. The "simple" but wrong lowering would be: // // t2 = PHI t1(BB1), f1(BB2) // t3 = PHI t2(BB1), f2(BB2) // // but clearly t2 is not defined in BB1, so that is incorrect. The proper // renaming is to note that on the path through BB1, t2 is really just a // copy of t1, and do that renaming, properly generating: // // t2 = PHI t1(BB1), f1(BB2) // t3 = PHI t1(BB1), f2(BB2) // // Case 2, we lower cascaded CMOVs such as // // (CMOV (CMOV F, T, cc1), T, cc2) // // to two successives branches. For that, we look for another CMOV as the // following instruction. // // Without this, we would add a PHI between the two jumps, which ends up // creating a few copies all around. For instance, for // // (sitofp (zext (fcmp une))) // // we would generate: // // ucomiss %xmm1, %xmm0 // movss <1.0f>, %xmm0 // movaps %xmm0, %xmm1 // jne .LBB5_2 // xorps %xmm1, %xmm1 // .LBB5_2: // jp .LBB5_4 // movaps %xmm1, %xmm0 // .LBB5_4: // retq // // because this custom-inserter would have generated: // // A // | \ // | B // | / // C // | \ // | D // | / // E // // A: X = ...; Y = ... // B: empty // C: Z = PHI [X, A], [Y, B] // D: empty // E: PHI [X, C], [Z, D] // // If we lower both CMOVs in a single step, we can instead generate: // // A // | \ // | C // | /| // |/ | // | | // | D // | / // E // // A: X = ...; Y = ... // D: empty // E: PHI [X, A], [X, C], [Y, D] // // Which, in our sitofp/fcmp example, gives us something like: // // ucomiss %xmm1, %xmm0 // movss <1.0f>, %xmm0 // jne .LBB5_4 // jp .LBB5_4 // xorps %xmm0, %xmm0 // .LBB5_4: // retq // MachineInstr *CascadedCMOV = nullptr; MachineInstr *LastCMOV = MI; X86::CondCode CC = X86::CondCode(MI->getOperand(3).getImm()); X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); // Check for case 1, where there are multiple CMOVs with the same condition // first. Of the two cases of multiple CMOV lowerings, case 1 reduces the // number of jumps the most. if (isCMOVPseudo(MI)) { // See if we have a string of CMOVS with the same condition. while (NextMIIt != BB->end() && isCMOVPseudo(NextMIIt) && (NextMIIt->getOperand(3).getImm() == CC || NextMIIt->getOperand(3).getImm() == OppCC)) { LastCMOV = &*NextMIIt; ++NextMIIt; } } // This checks for case 2, but only do this if we didn't already find // case 1, as indicated by LastCMOV == MI. if (LastCMOV == MI && NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() && NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() && NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg() && NextMIIt->getOperand(1).isKill()) { CascadedCMOV = &*NextMIIt; } MachineBasicBlock *jcc1MBB = nullptr; // If we have a cascaded CMOV, we lower it to two successive branches to // the same block. EFLAGS is used by both, so mark it as live in the second. if (CascadedCMOV) { jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, jcc1MBB); jcc1MBB->addLiveIn(X86::EFLAGS); } MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV; if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) && !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) { copy0MBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); } // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Add the true and fallthrough blocks as its successors. if (CascadedCMOV) { // The fallthrough block may be jcc1MBB, if we have a cascaded CMOV. BB->addSuccessor(jcc1MBB); // In that case, jcc1MBB will itself fallthrough the copy0MBB, and // jump to the sinkMBB. jcc1MBB->addSuccessor(copy0MBB); jcc1MBB->addSuccessor(sinkMBB); } else { BB->addSuccessor(copy0MBB); } // The true block target of the first (or only) branch is always sinkMBB. BB->addSuccessor(sinkMBB); // Create the conditional branch instruction. unsigned Opc = X86::GetCondBranchFromCond(CC); BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); if (CascadedCMOV) { unsigned Opc2 = X86::GetCondBranchFromCond( (X86::CondCode)CascadedCMOV->getOperand(3).getImm()); BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB copy0MBB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); MachineBasicBlock::iterator MIItEnd = std::next(MachineBasicBlock::iterator(LastCMOV)); MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin(); DenseMap> RegRewriteTable; MachineInstrBuilder MIB; // As we are creating the PHIs, we have to be careful if there is more than // one. Later CMOVs may reference the results of earlier CMOVs, but later // PHIs have to reference the individual true/false inputs from earlier PHIs. // That also means that PHI construction must work forward from earlier to // later, and that the code must maintain a mapping from earlier PHI's // destination registers, and the registers that went into the PHI. for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { unsigned DestReg = MIIt->getOperand(0).getReg(); unsigned Op1Reg = MIIt->getOperand(1).getReg(); unsigned Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the // PHI that is going to be generated. if (MIIt->getOperand(3).getImm() == OppCC) std::swap(Op1Reg, Op2Reg); if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) Op1Reg = RegRewriteTable[Op1Reg].first; if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) Op2Reg = RegRewriteTable[Op2Reg].second; MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) .addReg(Op1Reg).addMBB(copy0MBB) .addReg(Op2Reg).addMBB(thisMBB); // Add this PHI to the rewrite table. RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); } // If we have a cascaded CMOV, the second Jcc provides the same incoming // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes). if (CascadedCMOV) { MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB); // Copy the PHI result to the register defined by the second CMOV. BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, TII->get(TargetOpcode::COPY), CascadedCMOV->getOperand(0).getReg()) .addReg(MI->getOperand(0).getReg()); CascadedCMOV->eraseFromParent(); } // Now remove the CMOV(s). for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ) (MIIt++)->eraseFromParent(); return sinkMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI, MachineBasicBlock *BB) const { // Combine the following atomic floating-point modification pattern: // a.store(reg OP a.load(acquire), release) // Transform them into: // OPss (%gpr), %xmm // movss %xmm, (%gpr) // Or sd equivalent for 64-bit operations. unsigned MOp, FOp; switch (MI->getOpcode()) { default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP"); case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break; case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break; } const X86InstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); MachineOperand MSrc = MI->getOperand(0); unsigned VSrc = MI->getOperand(5).getReg(); const MachineOperand &Disp = MI->getOperand(3); MachineOperand ZeroDisp = MachineOperand::CreateImm(0); bool hasDisp = Disp.isGlobal() || Disp.isImm(); if (hasDisp && MSrc.isReg()) MSrc.setIsKill(false); MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp)) .addOperand(/*Base=*/MSrc) .addImm(/*Scale=*/1) .addReg(/*Index=*/0) .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0) .addReg(0); MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp), MRI.createVirtualRegister(MRI.getRegClass(VSrc))) .addReg(VSrc) .addOperand(/*Base=*/MSrc) .addImm(/*Scale=*/1) .addReg(/*Index=*/0) .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0) .addReg(/*Segment=*/0); MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); assert(MF->shouldSplitStack()); const bool Is64Bit = Subtarget->is64Bit(); const bool IsLP64 = Subtarget->isTarget64BitLP64(); const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; // BB: // ... [Till the alloca] // If stacklet is not large enough, jump to mallocMBB // // bumpMBB: // Allocate by subtracting from RSP // Jump to continueMBB // // mallocMBB: // Allocate by call to runtime // // continueMBB: // ... // [rest of original BB] // MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(getPointerTy(MF->getDataLayout())); unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), sizeVReg = MI->getOperand(1).getReg(), physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP; MachineFunction::iterator MBBIter = ++BB->getIterator(); MF->insert(MBBIter, bumpMBB); MF->insert(MBBIter, mallocMBB); MF->insert(MBBIter, continueMBB); continueMBB->splice(continueMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); continueMBB->transferSuccessorsAndUpdatePHIs(BB); // Add code to the main basic block to check if the stack limit has been hit, // and if so, jump to mallocMBB otherwise to bumpMBB. BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) .addReg(tmpSPVReg).addReg(sizeVReg); BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) .addReg(SPLimitVReg); BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB); // bumpMBB simply decreases the stack pointer, since we know the current // stacklet has enough space. BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); // Calls into a routine in libgcc to allocate more space from the heap. const uint32_t *RegMask = Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); if (IsLP64) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) .addReg(X86::RDI, RegState::Implicit) .addReg(X86::RAX, RegState::ImplicitDefine); } else if (Is64Bit) { BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) .addReg(X86::EDI, RegState::Implicit) .addReg(X86::EAX, RegState::ImplicitDefine); } else { BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) .addImm(12); BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) .addReg(X86::EAX, RegState::ImplicitDefine); } if (!Is64Bit) BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) .addImm(16); BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) .addReg(IsLP64 ? X86::RAX : X86::EAX); BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); // Set up the CFG correctly. BB->addSuccessor(bumpMBB); BB->addSuccessor(mallocMBB); mallocMBB->addSuccessor(continueMBB); bumpMBB->addSuccessor(continueMBB); // Take care of the PHI nodes. BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(mallocPtrVReg).addMBB(mallocMBB) .addReg(bumpSPPtrVReg).addMBB(bumpMBB); // Delete the original pseudo instruction. MI->eraseFromParent(); // And we're done. return continueMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { assert(!Subtarget->isTargetMachO()); DebugLoc DL = MI->getDebugLoc(); MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe( *BB->getParent(), *BB, MI, DL, false); MachineBasicBlock *ResumeBB = ResumeMI->getParent(); MI->eraseFromParent(); // The pseudo instruction is gone now. return ResumeBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB(); DebugLoc DL = MI->getDebugLoc(); assert(!isAsynchronousEHPersonality( classifyEHPersonality(MF->getFunction()->getPersonalityFn())) && "SEH does not use catchret!"); // Only 32-bit EH needs to worry about manually restoring stack pointers. if (!Subtarget->is32Bit()) return BB; // C++ EH creates a new target block to hold the restore code, and wires up // the new block to the return destination with a normal JMP_4. MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB->getBasicBlock()); assert(BB->succ_size() == 1); MF->insert(std::next(BB->getIterator()), RestoreMBB); RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RestoreMBB); MI->getOperand(0).setMBB(RestoreMBB); auto RestoreMBBI = RestoreMBB->begin(); BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE)); BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); return BB; } MachineBasicBlock * X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const Constant *PerFn = MF->getFunction()->getPersonalityFn(); bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn)); // Only 32-bit SEH requires special handling for catchpad. if (IsSEH && Subtarget->is32Bit()) { const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE)); } MI->eraseFromParent(); return BB; } MachineBasicBlock * +X86TargetLowering::EmitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const { + // So, here we replace TLSADDR with the sequence: + // adjust_stackdown -> TLSADDR -> adjust_stackup. + // We need this because TLSADDR is lowered into calls + // inside MC, therefore without the two markers shrink-wrapping + // may push the prologue/epilogue pass them. + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction &MF = *BB->getParent(); + + // Emit CALLSEQ_START right before the instruction. + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + MachineInstrBuilder CallseqStart = + BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0); + BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); + + // Emit CALLSEQ_END right after the instruction. + // We don't call erase from parent because we want to keep the + // original instruction around. + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + MachineInstrBuilder CallseqEnd = + BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); + BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); + + return BB; +} + +MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const { // This is pretty easy. We're taking the value that we received from // our load from the relocation, sticking it in either RDI (x86-64) // or EAX and doing an indirect call. The return value will then // be in the normal return register. MachineFunction *F = BB->getParent(); const X86InstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?"); assert(MI->getOperand(3).isGlobal() && "This should be a global"); // Get a register mask for the lowered call. // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = Subtarget->is64Bit() ? Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() : Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) .addReg(X86::RIP) .addImm(0).addReg(0) .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); addDirectMem(MIB, X86::RDI); MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); } else if (F->getTarget().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(0) .addImm(0).addReg(0) .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); } else { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(TII->getGlobalBaseReg(F)) .addImm(0).addReg(0) .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); } MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); unsigned DstReg; unsigned MemOpndSlot = 0; unsigned CurOp = 0; DstReg = MI->getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(RC->hasType(MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); MemOpndSlot = CurOp; MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // // thisMBB: // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB // SjLjSetup restoreMBB // // mainMBB: // v_main = 0 // // sinkMBB: // v = phi(main, restore) // // restoreMBB: // if base pointer being used, load it from frame // v_restore = 1 MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MF->push_back(restoreMBB); restoreMBB->setHasAddressTaken(); MachineInstrBuilder MIB; // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // thisMBB: unsigned PtrStoreOpc = 0; unsigned LabelReg = 0; const int64_t LabelOffset = 1 * PVT.getStoreSize(); Reloc::Model RM = MF->getTarget().getRelocationModel(); bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && (RM == Reloc::Static || RM == Reloc::DynamicNoPIC); // Prepare IP either in reg or imm. if (!UseImmLabel) { PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; const TargetRegisterClass *PtrRC = getRegClassFor(PVT); LabelReg = MRI.createVirtualRegister(PtrRC); if (Subtarget->is64Bit()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) .addReg(X86::RIP) .addImm(0) .addReg(0) .addMBB(restoreMBB) .addReg(0); } else { const X86InstrInfo *XII = static_cast(TII); MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) .addReg(XII->getGlobalBaseReg(MF)) .addImm(0) .addReg(0) .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference()) .addReg(0); } } else PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; // Store IP MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset); else MIB.addOperand(MI->getOperand(MemOpndSlot + i)); } if (!UseImmLabel) MIB.addReg(LabelReg); else MIB.addMBB(restoreMBB); MIB.setMemRefs(MMOBegin, MMOEnd); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) .addMBB(restoreMBB); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); MIB.addRegMask(RegInfo->getNoPreservedMask()); thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(restoreMBB); // mainMBB: // EAX = 0 BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); mainMBB->addSuccessor(sinkMBB); // sinkMBB: BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(restoreMBB); // restoreMBB: if (RegInfo->hasBasePointer(*MF)) { const bool Uses64BitFramePtr = Subtarget->isTarget64BitLP64() || Subtarget->isTargetNaCl64(); X86MachineFunctionInfo *X86FI = MF->getInfo(); X86FI->setRestoreBasePointer(MF); unsigned FramePtr = RegInfo->getFrameRegister(*MF); unsigned BasePtr = RegInfo->getBaseRegister(); unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), FramePtr, true, X86FI->getRestoreBasePointerOffset()) .setMIFlag(MachineInstr::FrameSetup); } BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); restoreMBB->addSuccessor(sinkMBB); MI->eraseFromParent(); return sinkMBB; } MachineBasicBlock * X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; unsigned SP = RegInfo->getStackRegister(); MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; // Reload FP MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) MIB.addOperand(MI->getOperand(i)); MIB.setMemRefs(MMOBegin, MMOEnd); // Reload IP MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI->getOperand(i), LabelOffset); else MIB.addOperand(MI->getOperand(i)); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload SP MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI->getOperand(i), SPOffset); else MIB.addOperand(MI->getOperand(i)); } MIB.setMemRefs(MMOBegin, MMOEnd); // Jump BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); MI->eraseFromParent(); return MBB; } // Replace 213-type (isel default) FMA3 instructions with 231-type for // accumulator loops. Writing back to the accumulator allows the coalescer // to remove extra copies in the loop. // FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937). MachineBasicBlock * X86TargetLowering::emitFMA3Instr(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineOperand &AddendOp = MI->getOperand(3); // Bail out early if the addend isn't a register - we can't switch these. if (!AddendOp.isReg()) return MBB; MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Check whether the addend is defined by a PHI: assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?"); MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg()); if (!AddendDef.isPHI()) return MBB; // Look for the following pattern: // loop: // %addend = phi [%entry, 0], [%loop, %result] // ... // %result = FMA213 %m2, %m1, %addend // Replace with: // loop: // %addend = phi [%entry, 0], [%loop, %result] // ... // %result = FMA231 %addend, %m1, %m2 for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) { assert(AddendDef.getOperand(i).isReg()); MachineOperand PHISrcOp = AddendDef.getOperand(i); MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg()); if (&PHISrcInst == MI) { // Found a matching instruction. unsigned NewFMAOpc = 0; switch (MI->getOpcode()) { case X86::VFMADDPDr213r: NewFMAOpc = X86::VFMADDPDr231r; break; case X86::VFMADDPSr213r: NewFMAOpc = X86::VFMADDPSr231r; break; case X86::VFMADDSDr213r: NewFMAOpc = X86::VFMADDSDr231r; break; case X86::VFMADDSSr213r: NewFMAOpc = X86::VFMADDSSr231r; break; case X86::VFMSUBPDr213r: NewFMAOpc = X86::VFMSUBPDr231r; break; case X86::VFMSUBPSr213r: NewFMAOpc = X86::VFMSUBPSr231r; break; case X86::VFMSUBSDr213r: NewFMAOpc = X86::VFMSUBSDr231r; break; case X86::VFMSUBSSr213r: NewFMAOpc = X86::VFMSUBSSr231r; break; case X86::VFNMADDPDr213r: NewFMAOpc = X86::VFNMADDPDr231r; break; case X86::VFNMADDPSr213r: NewFMAOpc = X86::VFNMADDPSr231r; break; case X86::VFNMADDSDr213r: NewFMAOpc = X86::VFNMADDSDr231r; break; case X86::VFNMADDSSr213r: NewFMAOpc = X86::VFNMADDSSr231r; break; case X86::VFNMSUBPDr213r: NewFMAOpc = X86::VFNMSUBPDr231r; break; case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break; case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break; case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break; case X86::VFMADDSUBPDr213r: NewFMAOpc = X86::VFMADDSUBPDr231r; break; case X86::VFMADDSUBPSr213r: NewFMAOpc = X86::VFMADDSUBPSr231r; break; case X86::VFMSUBADDPDr213r: NewFMAOpc = X86::VFMSUBADDPDr231r; break; case X86::VFMSUBADDPSr213r: NewFMAOpc = X86::VFMSUBADDPSr231r; break; case X86::VFMADDPDr213rY: NewFMAOpc = X86::VFMADDPDr231rY; break; case X86::VFMADDPSr213rY: NewFMAOpc = X86::VFMADDPSr231rY; break; case X86::VFMSUBPDr213rY: NewFMAOpc = X86::VFMSUBPDr231rY; break; case X86::VFMSUBPSr213rY: NewFMAOpc = X86::VFMSUBPSr231rY; break; case X86::VFNMADDPDr213rY: NewFMAOpc = X86::VFNMADDPDr231rY; break; case X86::VFNMADDPSr213rY: NewFMAOpc = X86::VFNMADDPSr231rY; break; case X86::VFNMSUBPDr213rY: NewFMAOpc = X86::VFNMSUBPDr231rY; break; case X86::VFNMSUBPSr213rY: NewFMAOpc = X86::VFNMSUBPSr231rY; break; case X86::VFMADDSUBPDr213rY: NewFMAOpc = X86::VFMADDSUBPDr231rY; break; case X86::VFMADDSUBPSr213rY: NewFMAOpc = X86::VFMADDSUBPSr231rY; break; case X86::VFMSUBADDPDr213rY: NewFMAOpc = X86::VFMSUBADDPDr231rY; break; case X86::VFMSUBADDPSr213rY: NewFMAOpc = X86::VFMSUBADDPSr231rY; break; default: llvm_unreachable("Unrecognized FMA variant."); } const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(3)) .addOperand(MI->getOperand(2)) .addOperand(MI->getOperand(1)); MBB->insert(MachineBasicBlock::iterator(MI), MIB); MI->eraseFromParent(); } } return MBB; } MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case X86::TAILJMPd64: case X86::TAILJMPr64: case X86::TAILJMPm64: case X86::TAILJMPd64_REX: case X86::TAILJMPr64_REX: case X86::TAILJMPm64_REX: llvm_unreachable("TAILJMP64 would not be touched here."); case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: return BB; + case X86::TLS_addr32: + case X86::TLS_addr64: + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: + return EmitLoweredTLSAddr(MI, BB); case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: return EmitLoweredCatchPad(MI, BB); case X86::SEG_ALLOCA_32: case X86::SEG_ALLOCA_64: return EmitLoweredSegAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_FR128: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: case X86::CMOV_V2F64: case X86::CMOV_V2I64: case X86::CMOV_V4F32: case X86::CMOV_V4F64: case X86::CMOV_V4I64: case X86::CMOV_V16F32: case X86::CMOV_V8F32: case X86::CMOV_V8F64: case X86::CMOV_V8I64: case X86::CMOV_V8I1: case X86::CMOV_V16I1: case X86::CMOV_V32I1: case X86::CMOV_V64I1: return EmitLoweredSelect(MI, BB); case X86::RDFLAGS32: case X86::RDFLAGS64: { DebugLoc DL = MI->getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); unsigned PushF = MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; unsigned Pop = MI->getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; BuildMI(*BB, MI, DL, TII->get(PushF)); BuildMI(*BB, MI, DL, TII->get(Pop), MI->getOperand(0).getReg()); MI->eraseFromParent(); // The pseudo is gone now. return BB; } case X86::WRFLAGS32: case X86::WRFLAGS64: { DebugLoc DL = MI->getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); unsigned Push = MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; unsigned PopF = MI->getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI->getOperand(0).getReg()); BuildMI(*BB, MI, DL, TII->get(PopF)); MI->eraseFromParent(); // The pseudo is gone now. return BB; } case X86::RELEASE_FADD32mr: case X86::RELEASE_FADD64mr: return EmitLoweredAtomicFP(MI, BB); case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: case X86::FP32_TO_INT64_IN_MEM: case X86::FP64_TO_INT16_IN_MEM: case X86::FP64_TO_INT32_IN_MEM: case X86::FP64_TO_INT64_IN_MEM: case X86::FP80_TO_INT16_IN_MEM: case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { MachineFunction *F = BB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(&X86::GR16RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. unsigned Opc; switch (MI->getOpcode()) { default: llvm_unreachable("illegal opcode!"); case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; } X86AddressMode AM; MachineOperand &Op = MI->getOperand(0); if (Op.isReg()) { AM.BaseType = X86AddressMode::RegBase; AM.Base.Reg = Op.getReg(); } else { AM.BaseType = X86AddressMode::FrameIndexBase; AM.Base.FrameIndex = Op.getIndex(); } Op = MI->getOperand(1); if (Op.isImm()) AM.Scale = Op.getImm(); Op = MI->getOperand(2); if (Op.isImm()) AM.IndexReg = Op.getImm(); Op = MI->getOperand(3); if (Op.isGlobal()) { AM.GV = Op.getGlobal(); } else { AM.Disp = Op.getImm(); } addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) .addReg(MI->getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } // String/text processing lowering. case X86::PCMPISTRM128REG: case X86::VPCMPISTRM128REG: case X86::PCMPISTRM128MEM: case X86::VPCMPISTRM128MEM: case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: case X86::PCMPESTRM128MEM: case X86::VPCMPESTRM128MEM: assert(Subtarget->hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); return EmitPCMPSTRM(MI, BB, Subtarget->getInstrInfo()); // String/text processing lowering. case X86::PCMPISTRIREG: case X86::VPCMPISTRIREG: case X86::PCMPISTRIMEM: case X86::VPCMPISTRIMEM: case X86::PCMPESTRIREG: case X86::VPCMPESTRIREG: case X86::PCMPESTRIMEM: case X86::VPCMPESTRIMEM: assert(Subtarget->hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); return EmitPCMPSTRI(MI, BB, Subtarget->getInstrInfo()); // Thread synchronization. case X86::MONITOR: return EmitMonitor(MI, BB, Subtarget); // PKU feature case X86::WRPKRU: return EmitWRPKRU(MI, BB, Subtarget); case X86::RDPKRU: return EmitRDPKRU(MI, BB, Subtarget); // xbegin case X86::XBEGIN: return EmitXBegin(MI, BB, Subtarget->getInstrInfo()); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); case X86::VAARG_64: return EmitVAARG64WithCustomInserter(MI, BB); case X86::EH_SjLj_SetJmp32: case X86::EH_SjLj_SetJmp64: return emitEHSjLjSetJmp(MI, BB); case X86::EH_SjLj_LongJmp32: case X86::EH_SjLj_LongJmp64: return emitEHSjLjLongJmp(MI, BB); case TargetOpcode::STATEPOINT: // As an implementation detail, STATEPOINT shares the STACKMAP format at // this point in the process. We diverge later. return emitPatchPoint(MI, BB); case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); case X86::VFMADDPDr213r: case X86::VFMADDPSr213r: case X86::VFMADDSDr213r: case X86::VFMADDSSr213r: case X86::VFMSUBPDr213r: case X86::VFMSUBPSr213r: case X86::VFMSUBSDr213r: case X86::VFMSUBSSr213r: case X86::VFNMADDPDr213r: case X86::VFNMADDPSr213r: case X86::VFNMADDSDr213r: case X86::VFNMADDSSr213r: case X86::VFNMSUBPDr213r: case X86::VFNMSUBPSr213r: case X86::VFNMSUBSDr213r: case X86::VFNMSUBSSr213r: case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPSr213r: case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPSr213r: case X86::VFMADDPDr213rY: case X86::VFMADDPSr213rY: case X86::VFMSUBPDr213rY: case X86::VFMSUBPSr213rY: case X86::VFNMADDPDr213rY: case X86::VFNMADDPSr213rY: case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPSr213rY: case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPSr213rY: case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPSr213rY: return emitFMA3Instr(MI, BB); } } //===----------------------------------------------------------------------===// // X86 Optimization Hooks //===----------------------------------------------------------------------===// void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = KnownZero.getBitWidth(); unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN || Opc == ISD::INTRINSIC_W_CHAIN || Opc == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. switch (Opc) { default: break; case X86ISD::ADD: case X86ISD::SUB: case X86ISD::ADC: case X86ISD::SBB: case X86ISD::SMUL: case X86ISD::UMUL: case X86ISD::INC: case X86ISD::DEC: case X86ISD::OR: case X86ISD::XOR: case X86ISD::AND: // These nodes' second result is a boolean. if (Op.getResNo() == 0) break; // Fallthrough case X86ISD::SETCC: KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::INTRINSIC_WO_CHAIN: { unsigned IntId = cast(Op.getOperand(0))->getZExtValue(); unsigned NumLoBits = 0; switch (IntId) { default: break; case Intrinsic::x86_sse_movmsk_ps: case Intrinsic::x86_avx_movmsk_ps_256: case Intrinsic::x86_sse2_movmsk_pd: case Intrinsic::x86_avx_movmsk_pd_256: case Intrinsic::x86_mmx_pmovmskb: case Intrinsic::x86_sse2_pmovmskb_128: case Intrinsic::x86_avx2_pmovmskb: { // High bits of movmskp{s|d}, pmovmskb are known zero. switch (IntId) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break; case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break; case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break; case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break; case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break; case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break; } KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits); break; } } break; } } } unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const SelectionDAG &, unsigned Depth) const { // SETCC_CARRY sets the dest to ~0 for true or 0 for false. if (Op.getOpcode() == X86ISD::SETCC_CARRY) return Op.getValueType().getScalarSizeInBits(); // Fallback case. return 1; } /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. bool X86TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const { if (N->getOpcode() == X86ISD::Wrapper) { if (isa(N->getOperand(0))) { GA = cast(N->getOperand(0))->getGlobal(); Offset = cast(N->getOperand(0))->getOffset(); return true; } } return TargetLowering::isGAPlusOffset(N, GA, Offset); } /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. /// FIXME: This could be expanded to support 512 bit vectors as well. static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget* Subtarget) { SDLoc dl(N); ShuffleVectorSDNode *SVOp = cast(N); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); MVT VT = SVOp->getSimpleValueType(0); unsigned NumElems = VT.getVectorNumElements(); if (V1.getOpcode() == ISD::CONCAT_VECTORS && V2.getOpcode() == ISD::CONCAT_VECTORS) { // // 0,0,0,... // | // V UNDEF BUILD_VECTOR UNDEF // \ / \ / // CONCAT_VECTOR CONCAT_VECTOR // \ / // \ / // RESULT: V + zero extended // if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR || V2.getOperand(1).getOpcode() != ISD::UNDEF || V1.getOperand(1).getOpcode() != ISD::UNDEF) return SDValue(); if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode())) return SDValue(); // To match the shuffle mask, the first half of the mask should // be exactly the first vector, and all the rest a splat with the // first element of the second one. for (unsigned i = 0; i != NumElems/2; ++i) if (!isUndefOrEqual(SVOp->getMaskElt(i), i) || !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) return SDValue(); // If V1 is coming from a vector load then just fold to a VZEXT_LOAD. if (LoadSDNode *Ld = dyn_cast(V1.getOperand(0))) { if (Ld->hasNUsesOfValue(1, 0)) { SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other); SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, Ld->getMemoryVT(), Ld->getPointerInfo(), Ld->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); // Make sure the newly-created LOAD is in the same position as Ld in // terms of dependency. We create a TokenFactor for Ld and ResNode, // and update uses of Ld's output chain to use the TokenFactor. if (Ld->hasAnyUseOfValue(1)) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SDValue(Ld, 1), SDValue(ResNode.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), SDValue(ResNode.getNode(), 1)); } return DAG.getBitcast(VT, ResNode); } } // Emit a zeroed vector and insert the desired subvector on its // first half. SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl); return DCI.CombineTo(N, InsV); } return SDValue(); } /// \brief Combine an arbitrary chain of shuffles into a single instruction if /// possible. /// /// This is the leaf of the recursive combinine below. When we have found some /// chain of single-use x86 shuffle instructions and accumulated the combined /// shuffle mask represented by them, this will try to pattern match that mask /// into either a single instruction if there is a special purpose instruction /// for this operation, or into a PSHUFB instruction which is a fully general /// instruction but should only be used to replace chains over a certain depth. static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, int Depth, bool HasPSHUFB, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { assert(!Mask.empty() && "Cannot combine an empty shuffle mask!"); // Find the operand that enters the chain. Note that multiple uses are OK // here, we're not going to remove the operand we find. SDValue Input = Op.getOperand(0); while (Input.getOpcode() == ISD::BITCAST) Input = Input.getOperand(0); MVT VT = Input.getSimpleValueType(); MVT RootVT = Root.getSimpleValueType(); SDLoc DL(Root); if (Mask.size() == 1) { int Index = Mask[0]; assert((Index >= 0 || Index == SM_SentinelUndef || Index == SM_SentinelZero) && "Invalid shuffle index found!"); // We may end up with an accumulated mask of size 1 as a result of // widening of shuffle operands (see function canWidenShuffleElements). // If the only shuffle index is equal to SM_SentinelZero then propagate // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle // mask, and therefore the entire chain of shuffles can be folded away. if (Index == SM_SentinelZero) DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL)); else DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), /*AddTo*/ true); return true; } // Use the float domain if the operand type is a floating point type. bool FloatDomain = VT.isFloatingPoint(); // For floating point shuffles, we don't have free copies in the shuffle // instructions or the ability to load as part of the instruction, so // canonicalize their shuffles to UNPCK or MOV variants. // // Note that even with AVX we prefer the PSHUFD form of shuffle for integer // vectors because it can have a load folded into it that UNPCK cannot. This // doesn't preclude something switching to the shorter encoding post-RA. // // FIXME: Should teach these routines about AVX vector widths. if (FloatDomain && VT.is128BitVector()) { if (Mask.equals({0, 0}) || Mask.equals({1, 1})) { bool Lo = Mask.equals({0, 0}); unsigned Shuffle; MVT ShuffleVT; // Check if we have SSE3 which will let us use MOVDDUP. That instruction // is no slower than UNPCKLPD but has the option to fold the input operand // into even an unaligned memory load. if (Lo && Subtarget->hasSSE3()) { Shuffle = X86ISD::MOVDDUP; ShuffleVT = MVT::v2f64; } else { // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller // than the UNPCK variants. Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS; ShuffleVT = MVT::v4f32; } if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); if (Shuffle == X86ISD::MOVDDUP) Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); else Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } if (Subtarget->hasSSE3() && (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) { bool Lo = Mask.equals({0, 0, 2, 2}); unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP; MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); DCI.AddToWorklist(Op.getNode()); DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } if (Mask.equals({0, 0, 1, 1}) || Mask.equals({2, 2, 3, 3})) { bool Lo = Mask.equals({0, 0, 1, 1}); unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH; MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } } // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK // variants as none of these have single-instruction variants that are // superior to the UNPCK formulation. if (!FloatDomain && VT.is128BitVector() && (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) || Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) || Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) || Mask.equals( {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}))) { bool Lo = Mask[0] == 0; unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! MVT ShuffleVT; switch (Mask.size()) { case 8: ShuffleVT = MVT::v8i16; break; case 16: ShuffleVT = MVT::v16i8; break; default: llvm_unreachable("Impossible mask size!"); }; Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) return false; // If we have 3 or more shuffle instructions or a chain involving PSHUFB, we // can replace them with a single PSHUFB instruction profitably. Intel's // manuals suggest only using PSHUFB if doing so replacing 5 instructions, but // in practice PSHUFB tends to be *very* fast so we're more aggressive. if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) { SmallVector PSHUFBMask; int NumBytes = VT.getSizeInBits() / 8; int Ratio = NumBytes / Mask.size(); for (int i = 0; i < NumBytes; ++i) { if (Mask[i / Ratio] == SM_SentinelUndef) { PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); continue; } int M = Mask[i / Ratio] != SM_SentinelZero ? Ratio * Mask[i / Ratio] + i % Ratio : 255; PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); Op = DAG.getBitcast(ByteVT, Input); DCI.AddToWorklist(Op.getNode()); SDValue PSHUFBMaskOp = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask); DCI.AddToWorklist(PSHUFBMaskOp.getNode()); Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp); DCI.AddToWorklist(Op.getNode()); DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } // Failed to find any combines. return false; } /// \brief Fully generic combining of x86 shuffle instructions. /// /// This should be the last combine run over the x86 shuffle instructions. Once /// they have been fully optimized, this will recursively consider all chains /// of single-use shuffle instructions, build a generic model of the cumulative /// shuffle operation, and check for simpler instructions which implement this /// operation. We use this primarily for two purposes: /// /// 1) Collapse generic shuffles to specialized single instructions when /// equivalent. In most cases, this is just an encoding size win, but /// sometimes we will collapse multiple generic shuffles into a single /// special-purpose shuffle. /// 2) Look for sequences of shuffle instructions with 3 or more total /// instructions, and replace them with the slightly more expensive SSSE3 /// PSHUFB instruction if available. We do this as the last combining step /// to ensure we avoid using PSHUFB if we can implement the shuffle with /// a suitable short sequence of other instructions. The PHUFB will either /// use a register or have to read from memory and so is slightly (but only /// slightly) more expensive than the other shuffle instructions. /// /// Because this is inherently a quadratic operation (for each shuffle in /// a chain, we recurse up the chain), the depth is limited to 8 instructions. /// This should never be an issue in practice as the shuffle lowering doesn't /// produce sequences of more than 8 instructions. /// /// FIXME: We will currently miss some cases where the redundant shuffling /// would simplify under the threshold for PSHUFB formation because of /// combine-ordering. To fix this, we should do the redundant instruction /// combining in this recursive walk. static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, ArrayRef RootMask, int Depth, bool HasPSHUFB, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. if (Depth > 8) return false; // Directly rip through bitcasts to find the underlying operand. while (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).hasOneUse()) Op = Op.getOperand(0); MVT VT = Op.getSimpleValueType(); if (!VT.isVector()) return false; // Bail if we hit a non-vector. assert(Root.getSimpleValueType().isVector() && "Shuffles operate on vector types!"); assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() && "Can only combine shuffles of the same vector register size."); if (!isTargetShuffle(Op.getOpcode())) return false; SmallVector OpMask; bool IsUnary; bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, true, OpMask, IsUnary); // We only can combine unary shuffles which we can decode the mask for. if (!HaveMask || !IsUnary) return false; assert(VT.getVectorNumElements() == OpMask.size() && "Different mask size from vector size!"); assert(((RootMask.size() > OpMask.size() && RootMask.size() % OpMask.size() == 0) || (OpMask.size() > RootMask.size() && OpMask.size() % RootMask.size() == 0) || OpMask.size() == RootMask.size()) && "The smaller number of elements must divide the larger."); int RootRatio = std::max(1, OpMask.size() / RootMask.size()); int OpRatio = std::max(1, RootMask.size() / OpMask.size()); assert(((RootRatio == 1 && OpRatio == 1) || (RootRatio == 1) != (OpRatio == 1)) && "Must not have a ratio for both incoming and op masks!"); SmallVector Mask; Mask.reserve(std::max(OpMask.size(), RootMask.size())); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the // root mask to get us all the way to the root value arrangement. The reason // for this order is that we are recursing up the operation chain. for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) { int RootIdx = i / RootRatio; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. Mask.push_back(RootMask[RootIdx]); continue; } int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio; int OpIdx = RootMaskedIdx / OpRatio; if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. Mask.push_back(OpMask[OpIdx]); continue; } // Ok, we have non-zero lanes, map them through. Mask.push_back(OpMask[OpIdx] * OpRatio + RootMaskedIdx % OpRatio); } // See if we can recurse into the operand to combine more things. switch (Op.getOpcode()) { case X86ISD::PSHUFB: HasPSHUFB = true; case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: if (Op.getOperand(0).hasOneUse() && combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1, HasPSHUFB, DAG, DCI, Subtarget)) return true; break; case X86ISD::UNPCKL: case X86ISD::UNPCKH: assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!"); // We can't check for single use, we have to check that this shuffle is the // only user. if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) && combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1, HasPSHUFB, DAG, DCI, Subtarget)) return true; break; } // Minor canonicalization of the accumulated shuffle mask to make it easier // to match below. All this does is detect masks with squential pairs of // elements, and shrink them to the half-width mask. It does this in a loop // so it will reduce the size of the mask to the minimal width mask which // performs an equivalent shuffle. SmallVector WidenedMask; while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) { Mask = std::move(WidenedMask); WidenedMask.clear(); } return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI, Subtarget); } /// \brief Get the PSHUF-style mask from PSHUF node. /// /// This is a very minor wrapper around getTargetShuffleMask to easy forming v4 /// PSHUF-style masks that can be reused with such instructions. static SmallVector getPSHUFShuffleMask(SDValue N) { MVT VT = N.getSimpleValueType(); SmallVector Mask; bool IsUnary; bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary); (void)HaveMask; assert(HaveMask); // If we have more than 128-bits, only the low 128-bits of shuffle mask // matter. Check that the upper masks are repeats and remove them. if (VT.getSizeInBits() > 128) { int LaneElts = 128 / VT.getScalarSizeInBits(); #ifndef NDEBUG for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) for (int j = 0; j < LaneElts; ++j) assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && "Mask doesn't repeat in high 128-bit lanes!"); #endif Mask.resize(LaneElts); } switch (N.getOpcode()) { case X86ISD::PSHUFD: return Mask; case X86ISD::PSHUFLW: Mask.resize(4); return Mask; case X86ISD::PSHUFHW: Mask.erase(Mask.begin(), Mask.begin() + 4); for (int &M : Mask) M -= 4; return Mask; default: llvm_unreachable("No valid shuffle instruction found!"); } } /// \brief Search for a combinable shuffle across a chain ending in pshufd. /// /// We walk up the chain and look for a combinable shuffle, skipping over /// shuffles that we could hoist this shuffle's transformation past without /// altering anything. static SDValue combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); // Walk up a single-use chain looking for a combinable shuffle. Keep a stack // of the shuffles in the chain so that we can form a fresh chain to replace // this one. SmallVector Chain; SDValue V = N.getOperand(0); for (; V.hasOneUse(); V = V.getOperand(0)) { switch (V.getOpcode()) { default: return SDValue(); // Nothing combined! case ISD::BITCAST: // Skip bitcasts as we always know the type for the target specific // instructions. continue; case X86ISD::PSHUFD: // Found another dword shuffle. break; case X86ISD::PSHUFLW: // Check that the low words (being shuffled) are the identity in the // dword shuffle, and the high words are self-contained. if (Mask[0] != 0 || Mask[1] != 1 || !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4)) return SDValue(); Chain.push_back(V); continue; case X86ISD::PSHUFHW: // Check that the high words (being shuffled) are the identity in the // dword shuffle, and the low words are self-contained. if (Mask[2] != 2 || Mask[3] != 3 || !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2)) return SDValue(); Chain.push_back(V); continue; case X86ISD::UNPCKL: case X86ISD::UNPCKH: // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword // shuffle into a preceding word shuffle. if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && V.getSimpleValueType().getVectorElementType() != MVT::i16) return SDValue(); // Search for a half-shuffle which we can combine with. unsigned CombineOp = V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; if (V.getOperand(0) != V.getOperand(1) || !V->isOnlyUserOf(V.getOperand(0).getNode())) return SDValue(); Chain.push_back(V); V = V.getOperand(0); do { switch (V.getOpcode()) { default: return SDValue(); // Nothing to combine. case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: if (V.getOpcode() == CombineOp) break; Chain.push_back(V); // Fallthrough! case ISD::BITCAST: V = V.getOperand(0); continue; } break; } while (V.hasOneUse()); break; } // Break out of the loop if we break out of the switch. break; } if (!V.hasOneUse()) // We fell out of the loop without finding a viable combining instruction. return SDValue(); // Merge this node's mask and our incoming mask. SmallVector VMask = getPSHUFShuffleMask(V); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Rebuild the chain around this new shuffle. while (!Chain.empty()) { SDValue W = Chain.pop_back_val(); if (V.getValueType() != W.getOperand(0).getValueType()) V = DAG.getBitcast(W.getOperand(0).getValueType(), V); switch (W.getOpcode()) { default: llvm_unreachable("Only PSHUF and UNPCK instructions get here!"); case X86ISD::UNPCKL: case X86ISD::UNPCKH: V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V); break; case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1)); break; } } if (V.getValueType() != N.getValueType()) V = DAG.getBitcast(N.getValueType(), V); // Return the new chain to replace N. return V; } /// \brief Search for a combinable shuffle across a chain ending in pshuflw or /// pshufhw. /// /// We walk up the chain, skipping shuffles of the other half and looking /// through shuffles which switch halves trying to find a shuffle of the same /// pair of dwords. static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { assert( (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); unsigned CombineOpcode = N.getOpcode(); // Walk up a single-use chain looking for a combinable shuffle. SDValue V = N.getOperand(0); for (; V.hasOneUse(); V = V.getOperand(0)) { switch (V.getOpcode()) { default: return false; // Nothing combined! case ISD::BITCAST: // Skip bitcasts as we always know the type for the target specific // instructions. continue; case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: if (V.getOpcode() == CombineOpcode) break; // Other-half shuffles are no-ops. continue; } // Break out of the loop if we break out of the switch. break; } if (!V.hasOneUse()) // We fell out of the loop without finding a viable combining instruction. return false; // Combine away the bottom node as its shuffle will be accumulated into // a preceding shuffle. DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true); // Record the old value. SDValue Old = V; // Merge this node's mask and our incoming mask (adjusted to account for all // the pshufd instructions encountered). SmallVector VMask = getPSHUFShuffleMask(V); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Check that the shuffles didn't cancel each other out. If not, we need to // combine to the new one. if (Old != V) // Replace the combinable shuffle with the combined one, updating all users // so that we re-evaluate the chain here. DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); return true; } /// \brief Try to combine x86 target specific shuffles. static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc DL(N); MVT VT = N.getSimpleValueType(); SmallVector Mask; switch (N.getOpcode()) { case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: Mask = getPSHUFShuffleMask(N); assert(Mask.size() == 4); break; case X86ISD::UNPCKL: { // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE // moves upper half elements into the lower half part. For example: // // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1, // undef:v16i8 // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2 // // will be combined to: // // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1 // This is only for 128-bit vectors. From SSE4.1 onward this combine may not // happen due to advanced instructions. if (!VT.is128BitVector()) return SDValue(); auto Op0 = N.getOperand(0); auto Op1 = N.getOperand(1); if (Op0.getOpcode() == ISD::UNDEF && Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) { ArrayRef Mask = cast(Op1.getNode())->getMask(); unsigned NumElts = VT.getVectorNumElements(); SmallVector ExpectedMask(NumElts, -1); std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2, NumElts / 2); auto ShufOp = Op1.getOperand(0); if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask)) return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp); } return SDValue(); } case X86ISD::BLENDI: { SDValue V0 = N->getOperand(0); SDValue V1 = N->getOperand(1); assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() && "Unexpected input vector types"); // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector // operands and changing the mask to 1. This saves us a bunch of // pattern-matching possibilities related to scalar math ops in SSE/AVX. // x86InstrInfo knows how to commute this back after instruction selection // if it would help register allocation. // TODO: If optimizing for size or a processor that doesn't suffer from // partial register update stalls, this should be transformed into a MOVSD // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD. if (VT == MVT::v2f64) if (auto *Mask = dyn_cast(N->getOperand(2))) if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) { SDValue NewMask = DAG.getConstant(1, DL, MVT::i8); return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); } return SDValue(); } default: return SDValue(); } // Nuke no-op shuffles that show up after combining. if (isNoopShuffleMask(Mask)) return DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true); // Look for simplifications involving one or two shuffle instructions. SDValue V = N.getOperand(0); switch (N.getOpcode()) { default: break; case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); if (combineRedundantHalfShuffle(N, Mask, DAG, DCI)) return SDValue(); // We combined away this shuffle, so we're done. // See if this reduces to a PSHUFD which is no more expensive and can // combine with more operations. Note that it has to at least flip the // dwords as otherwise it would have been removed as a no-op. if (makeArrayRef(Mask).equals({2, 3, 0, 1})) { int DMask[] = {0, 1, 2, 3}; int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; DMask[DOffset + 0] = DOffset + 1; DMask[DOffset + 1] = DOffset + 0; MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); V = DAG.getBitcast(DVT, V); DCI.AddToWorklist(V.getNode()); V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); DCI.AddToWorklist(V.getNode()); return DAG.getBitcast(VT, V); } // Look for shuffle patterns which can be implemented as a single unpack. // FIXME: This doesn't handle the location of the PSHUFD generically, and // only works when we have a PSHUFD followed by two half-shuffles. if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && (V.getOpcode() == X86ISD::PSHUFLW || V.getOpcode() == X86ISD::PSHUFHW) && V.getOpcode() != N.getOpcode() && V.hasOneUse()) { SDValue D = V.getOperand(0); while (D.getOpcode() == ISD::BITCAST && D.hasOneUse()) D = D.getOperand(0); if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { SmallVector VMask = getPSHUFShuffleMask(V); SmallVector DMask = getPSHUFShuffleMask(D); int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int WordMask[8]; for (int i = 0; i < 4; ++i) { WordMask[i + NOffset] = Mask[i] + NOffset; WordMask[i + VOffset] = VMask[i] + VOffset; } // Map the word mask through the DWord mask. int MappedMask[8]; for (int i = 0; i < 8; ++i) MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { // We can replace all three shuffles with an unpack. V = DAG.getBitcast(VT, D.getOperand(0)); DCI.AddToWorklist(V.getNode()); return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, VT, V, V); } } } break; case X86ISD::PSHUFD: if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI)) return NewN; break; } return SDValue(); } /// \brief Try to combine a shuffle into a target-specific add-sub node. /// /// We combine this directly on the abstract vector shuffle nodes so it is /// easier to generically match. We also insert dummy vector shuffle nodes for /// the operands which explicitly discard the lanes which are unused by this /// operation to try to flow through the rest of the combiner the fact that /// they're unused. static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(N); EVT VT = N->getValueType(0); if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) return SDValue(); // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask // extraction tool to support more. if (N->getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); auto *SVN = cast(N); SmallVector Mask; for (int M : SVN->getMask()) Mask.push_back(M); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); // We require the first shuffle operand to be the FSUB node, and the second to // be the FADD node. if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) { ShuffleVectorSDNode::commuteMask(Mask); std::swap(V1, V2); } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD) return SDValue(); // If there are other uses of these operations we can't fold them. if (!V1->hasOneUse() || !V2->hasOneUse()) return SDValue(); // Ensure that both operations have the same operands. Note that we can // commute the FADD operands. SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1); if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) return SDValue(); // We're looking for blends between FADD and FSUB nodes. We insist on these // nodes being lined up in a specific expected pattern. if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) || isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) || isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}))) return SDValue(); return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS); } /// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc dl(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); // Don't create instructions with illegal types after legalize types has run. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) return SDValue(); // If we have legalized the vector types, look for blends of FADD and FSUB // nodes that we can fuse into an ADDSUB node. if (TLI.isTypeLegal(VT)) if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG)) return AddSub; // Combine 256-bit vector shuffles. This is only profitable when in AVX mode if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() && N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); // During Type Legalization, when promoting illegal vector types, // the backend might introduce new shuffle dag nodes and bitcasts. // // This code performs the following transformation: // fold: (shuffle (bitcast (BINOP A, B)), Undef, ) -> // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, ) // // We do this only if both the bitcast and the BINOP dag nodes have // one use. Also, perform this transformation only if the new binary // operation is legal. This is to avoid introducing dag nodes that // potentially need to be further expanded (or custom lowered) into a // less optimal sequence of dag nodes. if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() && N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() && N0.getOpcode() == ISD::BITCAST) { SDValue BC0 = N0.getOperand(0); EVT SVT = BC0.getValueType(); unsigned Opcode = BC0.getOpcode(); unsigned NumElts = VT.getVectorNumElements(); if (BC0.hasOneUse() && SVT.isVector() && SVT.getVectorNumElements() * 2 == NumElts && TLI.isOperationLegal(Opcode, VT)) { bool CanFold = false; switch (Opcode) { default : break; case ISD::ADD : case ISD::FADD : case ISD::SUB : case ISD::FSUB : case ISD::MUL : case ISD::FMUL : CanFold = true; } unsigned SVTNumElts = SVT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast(N); for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i) CanFold = SVOp->getMaskElt(i) == (int)(i * 2); for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i) CanFold = SVOp->getMaskElt(i) < 0; if (CanFold) { SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]); } } } // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are // consecutive, non-overlapping, and in the right order. SmallVector Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) return LD; if (isTargetShuffle(N->getOpcode())) { SDValue Shuffle = PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget); if (Shuffle.getNode()) return Shuffle; // Try recursively combining arbitrary sequences of x86 shuffle // instructions into higher-order shuffles. We do this after combining // specific PSHUF instruction sequences into their minimal form so that we // can evaluate how many specialized shuffle instructions are involved in // a particular chain. SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); if (combineX86ShufflesRecursively(SDValue(N, 0), SDValue(N, 0), NonceMask, /*Depth*/ 1, /*HasPSHUFB*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. } return SDValue(); } /// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target /// specific shuffle of a load can be folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but /// shuffles have been custom lowered so we need to handle those here. static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue InVec = N->getOperand(0); SDValue EltNo = N->getOperand(1); EVT EltVT = N->getValueType(0); if (!isa(EltNo)) return SDValue(); EVT OriginalVT = InVec.getValueType(); if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) return SDValue(); InVec = InVec.getOperand(0); } EVT CurrentVT = InVec.getValueType(); if (!isTargetShuffle(InVec.getOpcode())) return SDValue(); // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); SmallVector ShuffleMask; bool UnaryShuffle; if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true, ShuffleMask, UnaryShuffle)) return SDValue(); // Select the input vector, guarding against out of range extract vector. unsigned NumElems = CurrentVT.getVectorNumElements(); int Elt = cast(EltNo)->getZExtValue(); int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt]; if (Idx == SM_SentinelZero) return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT) : DAG.getConstantFP(+0.0, SDLoc(N), EltVT); if (Idx == SM_SentinelUndef) return DAG.getUNDEF(EltVT); assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range"); SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); // If inputs to shuffle are the same for both ops, then allow 2 uses unsigned AllowedUses = InVec.getNumOperands() > 1 && InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1; if (LdNode.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0)) return SDValue(); AllowedUses = 1; // only allow 1 load use if we have a bitcast LdNode = LdNode.getOperand(0); } if (!ISD::isNormalLoad(LdNode.getNode())) return SDValue(); LoadSDNode *LN0 = cast(LdNode); if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) return SDValue(); // If there's a bitcast before the shuffle, check if the load type and // alignment is valid. unsigned Align = LN0->getAlignment(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( EltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT)) return SDValue(); // All checks match so transform back to vector_shuffle so that DAG combiner // can finish the job SDLoc dl(N); // Create shuffle node taking into account the case that its a unary shuffle SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : InVec.getOperand(1); Shuffle = DAG.getVectorShuffle(CurrentVT, dl, InVec.getOperand(0), Shuffle, &ShuffleMask[0]); Shuffle = DAG.getBitcast(OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); } static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // Detect bitcasts between i32 to x86mmx low word. Since MMX types are // special and don't usually play with other vector types, it's better to // handle them early to be sure we emit efficient code by avoiding // store-load conversions. if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getValueType() == MVT::v2i32 && isNullConstant(N0.getOperand(1))) { SDValue N00 = N0->getOperand(0); if (N00.getValueType() == MVT::i32) return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); } // Convert a bitcasted integer logic operation that has one bitcasted // floating-point operand and one constant operand into a floating-point // logic operation. This may create a load of the constant, but that is // cheaper than materializing the constant in an integer register and // transferring it to an SSE register or transferring the SSE operand to // integer register and back. unsigned FPOpcode; switch (N0.getOpcode()) { case ISD::AND: FPOpcode = X86ISD::FAND; break; case ISD::OR: FPOpcode = X86ISD::FOR; break; case ISD::XOR: FPOpcode = X86ISD::FXOR; break; default: return SDValue(); } if (((Subtarget->hasSSE1() && VT == MVT::f32) || (Subtarget->hasSSE2() && VT == MVT::f64)) && isa(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::BITCAST && N0.getOperand(0).getOperand(0).getValueType() == VT) { SDValue N000 = N0.getOperand(0).getOperand(0); SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1)); return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst); } return SDValue(); } /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// into a somewhat faster sequence. For i686, the best sequence is apparently /// storing the value and loading scalars back, while for x64 we should /// use 64-bit extracts and shifts. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) return NewOp; SDValue InputVector = N->getOperand(0); SDLoc dl(InputVector); // Detect mmx to i32 conversion through a v2i32 elt extract. if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && N->getValueType(0) == MVT::i32 && InputVector.getValueType() == MVT::v2i32) { // The bitcast source is a direct mmx result. SDValue MMXSrc = InputVector.getNode()->getOperand(0); if (MMXSrc.getValueType() == MVT::x86mmx) return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector), N->getValueType(0), InputVector.getNode()->getOperand(0)); // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))). if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() && MMXSrc.getValueType() == MVT::i64) { SDValue MMXSrcOp = MMXSrc.getOperand(0); if (MMXSrcOp.hasOneUse() && MMXSrcOp.getOpcode() == ISD::BITCAST && MMXSrcOp.getValueType() == MVT::v1i64 && MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx) return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector), N->getValueType(0), MMXSrcOp.getOperand(0)); } } EVT VT = N->getValueType(0); if (VT == MVT::i1 && isa(N->getOperand(1)) && InputVector.getOpcode() == ISD::BITCAST && isa(InputVector.getOperand(0))) { uint64_t ExtractedElt = cast(N->getOperand(1))->getZExtValue(); uint64_t InputValue = cast(InputVector.getOperand(0))->getZExtValue(); uint64_t Res = (InputValue >> ExtractedElt) & 1; return DAG.getConstant(Res, dl, MVT::i1); } // Only operate on vectors of 4 elements, where the alternative shuffling // gets to be more expensive. if (InputVector.getValueType() != MVT::v4i32) return SDValue(); // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a // single use which is a sign-extend or zero-extend, and all elements are // used. SmallVector Uses; unsigned ExtractedElements = 0; for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(), UE = InputVector.getNode()->use_end(); UI != UE; ++UI) { if (UI.getUse().getResNo() != InputVector.getResNo()) return SDValue(); SDNode *Extract = *UI; if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); if (Extract->getValueType(0) != MVT::i32) return SDValue(); if (!Extract->hasOneUse()) return SDValue(); if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND && Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND) return SDValue(); if (!isa(Extract->getOperand(1))) return SDValue(); // Record which element was extracted. ExtractedElements |= 1 << cast(Extract->getOperand(1))->getZExtValue(); Uses.push_back(Extract); } // If not all the elements were used, this may not be worthwhile. if (ExtractedElements != 15) return SDValue(); // Ok, we've now decided to do the transformation. // If 64-bit shifts are legal, use the extract-shift sequence, // otherwise bounce the vector off the cache. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vals[4]; if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) { SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector); auto &DL = DAG.getDataLayout(); EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL); SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, DAG.getConstant(0, dl, VecIdxTy)); SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, DAG.getConstant(1, dl, VecIdxTy)); SDValue ShAmt = DAG.getConstant( 32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL)); Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf); Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt)); Vals[2] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, TopHalf); Vals[3] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt)); } else { // Store the value to a temporary stack slot. SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, MachinePointerInfo(), false, false, 0); EVT ElementType = InputVector.getValueType().getVectorElementType(); unsigned EltSize = ElementType.getSizeInBits() / 8; // Replace each use (extract) with a load of the appropriate element. for (unsigned i = 0; i < 4; ++i) { uint64_t Offset = EltSize * i; auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal); // Load the scalar. Vals[i] = DAG.getLoad(ElementType, dl, Ch, ScalarAddr, MachinePointerInfo(), false, false, false, 0); } } // Replace the extracts for (SmallVectorImpl::iterator UI = Uses.begin(), UE = Uses.end(); UI != UE; ++UI) { SDNode *Extract = *UI; SDValue Idx = Extract->getOperand(1); uint64_t IdxVal = cast(Idx)->getZExtValue(); DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); } // The replacement was made in place; don't return anything. return SDValue(); } static SDValue transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDLoc dl(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); if (Cond.getOpcode() == ISD::SIGN_EXTEND) { SDValue CondSrc = Cond->getOperand(0); if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG) Cond = CondSrc->getOperand(0); } if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); // A vselect where all conditions and data are constants can be optimized into // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) return SDValue(); unsigned MaskValue = 0; if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue)) return SDValue(); MVT VT = N->getSimpleValueType(0); unsigned NumElems = VT.getVectorNumElements(); SmallVector ShuffleMask(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { // Be sure we emit undef where we can. if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF) ShuffleMask[i] = -1; else ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1); } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isShuffleMaskLegal(ShuffleMask, VT)) return SDValue(); return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc DL(N); SDValue Cond = N->getOperand(0); // Get the LHS/RHS of the select. SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); EVT VT = LHS.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom xhasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); unsigned Opcode = 0; // Check for x CC y ? x : y. if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { default: break; case ISD::SETULT: // Converting this to a min would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETOLE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMIN; break; case ISD::SETULE: // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: Opcode = X86ISD::FMIN; break; case ISD::SETOGE: // Converting this to a max would handle comparisons between positive // and negative zero incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMAX; break; case ISD::SETUGT: // Converting this to a max would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETUGE: // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: Opcode = X86ISD::FMAX; break; } // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { default: break; case ISD::SETOGE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETUGT: // Converting this to a min would handle NaNs incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; Opcode = X86ISD::FMIN; break; case ISD::SETUGE: // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: Opcode = X86ISD::FMIN; break; case ISD::SETULT: // Converting this to a max would handle NaNs incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; Opcode = X86ISD::FMAX; break; case ISD::SETOLE: // Converting this to a max would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETULE: // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: Opcode = X86ISD::FMAX; break; } } if (Opcode) return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } EVT CondVT = Cond.getValueType(); if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1) { // v16i8 (select v16i1, v16i8, v16i8) does not have a proper // lowering on KNL. In this case we convert it to // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. // The same situation for all 128 and 256-bit vectors of i8 and i16. // Since SKX these selects have a proper lowering. EVT OpVT = LHS.getValueType(); if ((OpVT.is128BitVector() || OpVT.is256BitVector()) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16) && !(Subtarget->hasBWI() && Subtarget->hasVLX())) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond); DCI.AddToWorklist(Cond.getNode()); return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS); } } // If this is a select between two integer constants, try to do some // optimizations. if (ConstantSDNode *TrueC = dyn_cast(LHS)) { if (ConstantSDNode *FalseC = dyn_cast(RHS)) // Don't do this for crazy integer types. if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) { // If this is efficiently invertible, canonicalize the LHSC/RHSC values // so that TrueC (the true value) is larger than FalseC. bool NeedsCondInvert = false; if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && // Efficiently invertible. (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible. isa(Cond.getOperand(1))))) { NeedsCondInvert = true; std::swap(TrueC, FalseC); } // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, DL, Cond.getValueType())); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); unsigned ShAmt = TrueC->getAPIntValue().logBase2(); return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, DAG.getConstant(ShAmt, DL, MVT::i8)); } // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, DL, Cond.getValueType())); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); } // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { default: break; case 1: // result = add base, cond case 2: // result = lea base( , cond*2) case 3: // result = lea base(cond, cond*2) case 4: // result = lea base( , cond*4) case 5: // result = lea base(cond, cond*4) case 8: // result = lea base( , cond*8) case 9: // result = lea base(cond, cond*8) isFastMultiplier = true; break; } } if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, DL, Cond.getValueType())); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); // Scale the condition by the difference. if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, DAG.getConstant(Diff, DL, Cond.getValueType())); // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); return Cond; } } } } // Canonicalize max and min: // (x > y) ? x : y -> (x >= y) ? x : y // (x < y) ? x : y -> (x <= y) ? x : y // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates // the need for an extra compare // against zero. e.g. // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0 // subl %esi, %edi // testl %edi, %edi // movl $0, %eax // cmovgl %edi, %eax // => // xorl %eax, %eax // subl %esi, $edi // cmovsl %eax, %edi if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); switch (CC) { default: break; case ISD::SETLT: case ISD::SETGT: { ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), Cond.getOperand(1), NewCC); return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS); } } } // Early exit check if (!TLI.isTypeLegal(VT)) return SDValue(); // Match VSELECTs into subs with unsigned saturation. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && // psubus is available in SSE2 and AVX2 for i8 and i16 vectors. ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); // Check if one of the arms of the VSELECT is a zero vector. If it's on the // left side invert the predicate to simplify logic below. SDValue Other; if (ISD::isBuildVectorAllZeros(LHS.getNode())) { Other = RHS; CC = ISD::getSetCCInverse(CC, true); } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) { Other = LHS; } if (Other.getNode() && Other->getNumOperands() == 2 && DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) { SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1); SDValue CondRHS = Cond->getOperand(1); // Look for a general sub with unsigned saturation first. // x >= y ? x-y : 0 --> subus x, y // x > y ? x-y : 0 --> subus x, y if ((CC == ISD::SETUGE || CC == ISD::SETUGT) && Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS)) return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); if (auto *OpRHSBV = dyn_cast(OpRHS)) if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { if (auto *CondRHSBV = dyn_cast(CondRHS)) if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode()) // If the RHS is a constant we have to reverse the const // canonicalization. // x > C-1 ? x+-C : 0 --> subus x, C if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && CondRHSConst->getAPIntValue() == (-OpRHSConst->getAPIntValue() - 1)) return DAG.getNode( X86ISD::SUBUS, DL, VT, OpLHS, DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT)); // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. // FIXME: Would it be better to use computeKnownBits to determine // whether it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> subus x, C if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && OpRHSConst->getAPIntValue().isSignBit()) // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return DAG.getNode( X86ISD::SUBUS, DL, VT, OpLHS, DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT)); } } } // Simplify vector selection if condition value type matches vselect // operand type if (N->getOpcode() == ISD::VSELECT && CondVT == VT) { assert(Cond.getValueType().isVector() && "vector select expects a vector selector!"); bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); // Try invert the condition if true value is not all 1s and false value // is not all 0s. if (!TValIsAllOnes && !FValIsAllZeros && // Check if the selector will be produced by CMPP*/PCMP* Cond.getOpcode() == ISD::SETCC && // Check if SETCC has already been promoted TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == CondVT) { bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); if (TValIsAllZeros || FValIsAllOnes) { SDValue CC = Cond.getOperand(2); ISD::CondCode NewCC = ISD::getSetCCInverse(cast(CC)->get(), Cond.getOperand(0).getValueType().isInteger()); Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC); std::swap(LHS, RHS); TValIsAllOnes = FValIsAllOnes; FValIsAllZeros = TValIsAllZeros; } } if (TValIsAllOnes || FValIsAllZeros) { SDValue Ret; if (TValIsAllOnes && FValIsAllZeros) Ret = Cond; else if (TValIsAllOnes) Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); else if (FValIsAllZeros) Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS)); return DAG.getBitcast(VT, Ret); } } // We should generate an X86ISD::BLENDI from a vselect if its argument // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of // constants. This specific pattern gets generated when we split a // selector for a 512 bit vector in a machine without AVX512 (but with // 256-bit vectors), during legalization: // // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) // // Iff we find this pattern and the build_vectors are built from // constants, we translate the vselect into a shuffle_vector that we // know will be matched by LowerVECTOR_SHUFFLEtoBlend. if ((N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SHRUNKBLEND) && !DCI.isBeforeLegalize() && !VT.is512BitVector()) { SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); if (Shuffle.getNode()) return Shuffle; } // If this is a *dynamic* select (non-constant condition) and we can match // this node with one of the variable blend instructions, restructure the // condition so that the blends can use the high bit of each element and use // SimplifyDemandedBits to simplify the condition operand. if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && !DCI.isBeforeLegalize() && !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { unsigned BitWidth = Cond.getValueType().getScalarSizeInBits(); // Don't optimize vector selects that map to mask-registers. if (BitWidth == 1) return SDValue(); // We can only handle the cases where VSELECT is directly legal on the // subtarget. We custom lower VSELECT nodes with constant conditions and // this makes it hard to see whether a dynamic VSELECT will correctly // lower, so we both check the operation's status and explicitly handle the // cases where a *dynamic* blend will fail even though a constant-condition // blend could be custom lowered. // FIXME: We should find a better way to handle this class of problems. // Potentially, we should combine constant-condition vselect nodes // pre-legalization into shuffles and not mark as many types as custom // lowered. if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) return SDValue(); // FIXME: We don't support i16-element blends currently. We could and // should support them by making *all* the bits in the condition be set // rather than just the high bit and using an i8-element blend. if (VT.getVectorElementType() == MVT::i16) return SDValue(); // Dynamic blending was only available from SSE4.1 onward. if (VT.is128BitVector() && !Subtarget->hasSSE41()) return SDValue(); // Byte blends are only available in AVX2 if (VT == MVT::v32i8 && !Subtarget->hasAVX2()) return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) { // If we changed the computation somewhere in the DAG, this change // will affect all users of Cond. // Make sure it is fine and update all the nodes so that we do not // use the generic VSELECT anymore. Otherwise, we may perform // wrong optimizations as we messed up with the actual expectation // for the vector boolean values. if (Cond != TLO.Old) { // Check all uses of that condition operand to check whether it will be // consumed by non-BLEND instructions, which may depend on all bits are // set properly. for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end(); I != E; ++I) if (I->getOpcode() != ISD::VSELECT) // TODO: Add other opcodes eventually lowered into BLEND. return SDValue(); // Update all the users of the condition, before committing the change, // so that the VSELECT optimizations that expect the correct vector // boolean value will not be triggered. for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end(); I != E; ++I) DAG.ReplaceAllUsesOfValueWith( SDValue(*I, 0), DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0), Cond, I->getOperand(1), I->getOperand(2))); DCI.CommitTargetLoweringOpt(TLO); return SDValue(); } // At this point, only Cond is changed. Change the condition // just for N to keep the opportunity to optimize all other // users their own way. DAG.ReplaceAllUsesOfValueWith( SDValue(N, 0), DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0), TLO.New, N->getOperand(1), N->getOperand(2))); return SDValue(); } } return SDValue(); } // Check whether a boolean test is testing a boolean value generated by // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition // code. // // Simplify the following patterns: // (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or // (Op (CMP (SETCC Cond EFLAGS) 0) NEQ) // to (Op EFLAGS Cond) // // (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or // (Op (CMP (SETCC Cond EFLAGS) 1) NEQ) // to (Op EFLAGS !Cond) // // where Op could be BRCOND or CMOV. // static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // Quit if not CMP and SUB with its value result used. if (Cmp.getOpcode() != X86ISD::CMP && (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) return SDValue(); // Quit if not used as a boolean value. if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); // Check CMP operands. One of them should be 0 or 1 and the other should be // an SetCC or extended from it. SDValue Op1 = Cmp.getOperand(0); SDValue Op2 = Cmp.getOperand(1); SDValue SetCC; const ConstantSDNode* C = nullptr; bool needOppositeCond = (CC == X86::COND_E); bool checkAgainstTrue = false; // Is it a comparison against 1? if ((C = dyn_cast(Op1))) SetCC = Op2; else if ((C = dyn_cast(Op2))) SetCC = Op1; else // Quit if all operands are not constants. return SDValue(); if (C->getZExtValue() == 1) { needOppositeCond = !needOppositeCond; checkAgainstTrue = true; } else if (C->getZExtValue() != 0) // Quit if the constant is neither 0 or 1. return SDValue(); bool truncatedToBoolWithAnd = false; // Skip (zext $x), (trunc $x), or (and $x, 1) node. while (SetCC.getOpcode() == ISD::ZERO_EXTEND || SetCC.getOpcode() == ISD::TRUNCATE || SetCC.getOpcode() == ISD::AND) { if (SetCC.getOpcode() == ISD::AND) { int OpIdx = -1; if (isOneConstant(SetCC.getOperand(0))) OpIdx = 1; if (isOneConstant(SetCC.getOperand(1))) OpIdx = 0; if (OpIdx == -1) break; SetCC = SetCC.getOperand(OpIdx); truncatedToBoolWithAnd = true; } else SetCC = SetCC.getOperand(0); } switch (SetCC.getOpcode()) { case X86ISD::SETCC_CARRY: // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1, // i.e. it's a comparison against true but the result of SETCC_CARRY is not // truncated to i1 using 'and'. if (checkAgainstTrue && !truncatedToBoolWithAnd) break; assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && "Invalid use of SETCC_CARRY!"); // FALL THROUGH case X86ISD::SETCC: // Set the condition code or opposite one if necessary. CC = X86::CondCode(SetCC.getConstantOperandVal(0)); if (needOppositeCond) CC = X86::GetOppositeBranchCondition(CC); return SetCC.getOperand(1); case X86ISD::CMOV: { // Check whether false/true value has canonical one, i.e. 0 or 1. ConstantSDNode *FVal = dyn_cast(SetCC.getOperand(0)); ConstantSDNode *TVal = dyn_cast(SetCC.getOperand(1)); // Quit if true value is not a constant. if (!TVal) return SDValue(); // Quit if false value is not a constant. if (!FVal) { SDValue Op = SetCC.getOperand(0); // Skip 'zext' or 'trunc' node. if (Op.getOpcode() == ISD::ZERO_EXTEND || Op.getOpcode() == ISD::TRUNCATE) Op = Op.getOperand(0); // A special case for rdrand/rdseed, where 0 is set if false cond is // found. if ((Op.getOpcode() != X86ISD::RDRAND && Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) return SDValue(); } // Quit if false value is not the constant 0 or 1. bool FValIsFalse = true; if (FVal && FVal->getZExtValue() != 0) { if (FVal->getZExtValue() != 1) return SDValue(); // If FVal is 1, opposite cond is needed. needOppositeCond = !needOppositeCond; FValIsFalse = false; } // Quit if TVal is not the constant opposite of FVal. if (FValIsFalse && TVal->getZExtValue() != 1) return SDValue(); if (!FValIsFalse && TVal->getZExtValue() != 0) return SDValue(); CC = X86::CondCode(SetCC.getConstantOperandVal(2)); if (needOppositeCond) CC = X86::GetOppositeBranchCondition(CC); return SetCC.getOperand(3); } } return SDValue(); } /// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS. /// Match: /// (X86or (X86setcc) (X86setcc)) /// (X86cmp (and (X86setcc) (X86setcc)), 0) static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, X86::CondCode &CC1, SDValue &Flags, bool &isAnd) { if (Cond->getOpcode() == X86ISD::CMP) { if (!isNullConstant(Cond->getOperand(1))) return false; Cond = Cond->getOperand(0); } isAnd = false; SDValue SetCC0, SetCC1; switch (Cond->getOpcode()) { default: return false; case ISD::AND: case X86ISD::AND: isAnd = true; // fallthru case ISD::OR: case X86ISD::OR: SetCC0 = Cond->getOperand(0); SetCC1 = Cond->getOperand(1); break; }; // Make sure we have SETCC nodes, using the same flags value. if (SetCC0.getOpcode() != X86ISD::SETCC || SetCC1.getOpcode() != X86ISD::SETCC || SetCC0->getOperand(1) != SetCC1->getOperand(1)) return false; CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); Flags = SetCC0->getOperand(1); return true; } /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc DL(N); // If the flag operand isn't dead, don't touch this CMOV. if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) return SDValue(); SDValue FalseOp = N->getOperand(0); SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); if (CC == X86::COND_E || CC == X86::COND_NE) { switch (Cond.getOpcode()) { default: break; case X86ISD::BSR: case X86ISD::BSF: // If operand of BSR / BSF are proven never zero, then ZF cannot be set. if (DAG.isKnownNeverZero(Cond.getOperand(0))) return (CC == X86::COND_E) ? FalseOp : TrueOp; } } SDValue Flags; Flags = checkBoolTestSetCCCombine(Cond, CC); if (Flags.getNode() && // Extra check as FCMOV only supports a subset of X86 cond. (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { SDValue Ops[] = { FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), Flags }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); } // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. if (ConstantSDNode *TrueC = dyn_cast(TrueOp)) { if (ConstantSDNode *FalseC = dyn_cast(FalseOp)) { // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is // larger than FalseC (the false value). if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueC, FalseC); std::swap(TrueOp, FalseOp); } // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. // This is efficient for any integer data type (including i8/i16) and // shift amount. if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, DL, MVT::i8), Cond); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); unsigned ShAmt = TrueC->getAPIntValue().logBase2(); Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, DAG.getConstant(ShAmt, DL, MVT::i8)); if (N->getNumValues() == 2) // Dead flag value? return DCI.CombineTo(N, Cond, SDValue()); return Cond; } // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient // for any integer data type, including i8/i16. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, DL, MVT::i8), Cond); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); if (N->getNumValues() == 2) // Dead flag value? return DCI.CombineTo(N, Cond, SDValue()); return Cond; } // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { default: break; case 1: // result = add base, cond case 2: // result = lea base( , cond*2) case 3: // result = lea base(cond, cond*2) case 4: // result = lea base( , cond*4) case 5: // result = lea base(cond, cond*4) case 8: // result = lea base( , cond*8) case 9: // result = lea base(cond, cond*8) isFastMultiplier = true; break; } } if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, DL, MVT::i8), Cond); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); // Scale the condition by the difference. if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, DAG.getConstant(Diff, DL, Cond.getValueType())); // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); if (N->getNumValues() == 2) // Dead flag value? return DCI.CombineTo(N, Cond, SDValue()); return Cond; } } } } // Handle these cases: // (select (x != c), e, c) -> select (x != c), e, x), // (select (x == c), c, e) -> select (x == c), x, e) // where the c is an integer constant, and the "select" is the combination // of CMOV and CMP. // // The rationale for this change is that the conditional-move from a constant // needs two instructions, however, conditional-move from a register needs // only one instruction. // // CAVEAT: By replacing a constant with a symbolic value, it may obscure // some instruction-combining opportunities. This opt needs to be // postponed as late as possible. // if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { // the DCI.xxxx conditions are provided to postpone the optimization as // late as possible. ConstantSDNode *CmpAgainst = nullptr; if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && (CmpAgainst = dyn_cast(Cond.getOperand(1))) && !isa(Cond.getOperand(0))) { if (CC == X86::COND_NE && CmpAgainst == dyn_cast(FalseOp)) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueOp, FalseOp); } if (CC == X86::COND_E && CmpAgainst == dyn_cast(TrueOp)) { SDValue Ops[] = { FalseOp, Cond.getOperand(0), DAG.getConstant(CC, DL, MVT::i8), Cond }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops); } } } // Fold and/or of setcc's to double CMOV: // (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2) // (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2) // // This combine lets us generate: // cmovcc1 (jcc1 if we don't have CMOV) // cmovcc2 (same) // instead of: // setcc1 // setcc2 // and/or // cmovne (jne if we don't have CMOV) // When we can't use the CMOV instruction, it might increase branch // mispredicts. // When we can use CMOV, or when there is no mispredict, this improves // throughput and reduces register pressure. // if (CC == X86::COND_NE) { SDValue Flags; X86::CondCode CC0, CC1; bool isAndSetCC; if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) { if (isAndSetCC) { std::swap(FalseOp, TrueOp); CC0 = X86::GetOppositeBranchCondition(CC0); CC1 = X86::GetOppositeBranchCondition(CC1); } SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), Flags}; SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps); SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1)); return CMOV; } } return SDValue(); } /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32) return SDValue(); ConstantSDNode *C = dyn_cast(N->getOperand(1)); if (!C) return SDValue(); uint64_t MulAmt = C->getZExtValue(); if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9) return SDValue(); uint64_t MulAmt1 = 0; uint64_t MulAmt2 = 0; if ((MulAmt % 9) == 0) { MulAmt1 = 9; MulAmt2 = MulAmt / 9; } else if ((MulAmt % 5) == 0) { MulAmt1 = 5; MulAmt2 = MulAmt / 5; } else if ((MulAmt % 3) == 0) { MulAmt1 = 3; MulAmt2 = MulAmt / 3; } SDLoc DL(N); SDValue NewMul; if (MulAmt2 && (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){ if (isPowerOf2_64(MulAmt2) && !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) // If second multiplifer is pow2, issue it first. We want the multiply by // 3, 5, or 9 to be folded into the addressing mode unless the lone use // is an add. std::swap(MulAmt1, MulAmt2); if (isPowerOf2_64(MulAmt1)) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(MulAmt1, DL, VT)); if (isPowerOf2_64(MulAmt2)) NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); } if (!NewMul) { assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && "Both cases that could cause potential overflows should have " "already been handled."); if (isPowerOf2_64(MulAmt - 1)) // (mul x, 2^N + 1) => (add (shl x, N), x) NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt - 1), DL, MVT::i8))); else if (isPowerOf2_64(MulAmt + 1)) // (mul x, 2^N - 1) => (sub (shl x, N), x) NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt + 1), DL, MVT::i8)), N->getOperand(0)); } if (NewMul) // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, NewMul, false); return SDValue(); } static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) // since the result of setcc_c is all zero's or all ones. if (VT.isInteger() && !VT.isVector() && N1C && N0.getOpcode() == ISD::AND && N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); APInt ShAmt = N1C->getAPIntValue(); Mask = Mask.shl(ShAmt); bool MaskOK = false; // We can handle cases concerning bit-widening nodes containing setcc_c if // we carefully interrogate the mask to make sure we are semantics // preserving. // The transform is not safe if the result of C1 << C2 exceeds the bitwidth // of the underlying setcc_c operation if the setcc_c was zero extended. // Consider the following example: // zext(setcc_c) -> i32 0x0000FFFF // c1 -> i32 0x0000FFFF // c2 -> i32 0x00000001 // (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE // (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE if (N00.getOpcode() == X86ISD::SETCC_CARRY) { MaskOK = true; } else if (N00.getOpcode() == ISD::SIGN_EXTEND && N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { MaskOK = true; } else if ((N00.getOpcode() == ISD::ZERO_EXTEND || N00.getOpcode() == ISD::ANY_EXTEND) && N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits()); } if (MaskOK && Mask != 0) { SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); } } // Hardware support for vector shifts is sparse which makes us scalarize the // vector operations in many cases. Also, on sandybridge ADD is faster than // shl. // (shl V, 1) -> add V,V if (auto *N1BV = dyn_cast(N1)) if (auto *N1SplatC = N1BV->getConstantSplatNode()) { assert(N0.getValueType().isVector() && "Invalid vector shift type"); // We shift all of the values by one. In many cases we do not have // hardware support for this operation. This is better expressed as an ADD // of two values. if (N1SplatC->getAPIntValue() == 1) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); } return SDValue(); } static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned Size = VT.getSizeInBits(); // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) // depending on sign of (SarConst - [56,48,32,24,16]) // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). // However the MOVs have 2 advantages to a SHIFT: // 1. MOVs can write to a register that differs from source // 2. MOVs accept memory operands if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant || N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || N0.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = (cast(N01))->getAPIntValue(); APInt SarConst = (cast(N1))->getAPIntValue(); EVT CVT = N1.getValueType(); if (SarConst.isNegative()) return SDValue(); for (MVT SVT : MVT::integer_valuetypes()) { unsigned ShiftSize = SVT.getSizeInBits(); // skipping types without corresponding sext/zext and // ShlConst that is not one of [56,48,32,24,16] if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); SarConst = SarConst - (Size - ShiftSize); if (SarConst == 0) return NN; else if (SarConst.isNegative()) return DAG.getNode(ISD::SHL, DL, VT, NN, DAG.getConstant(-SarConst, DL, CVT)); else return DAG.getNode(ISD::SRA, DL, VT, NN, DAG.getConstant(SarConst, DL, CVT)); } return SDValue(); } /// \brief Returns a vector of 0s if the node in input is a vector logical /// shift by a constant amount which is known to be bigger than or equal /// to the vector element size in bits. static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && (!Subtarget->hasInt256() || (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) return SDValue(); SDValue Amt = N->getOperand(1); SDLoc DL(N); if (auto *AmtBV = dyn_cast(Amt)) if (auto *AmtSplat = AmtBV->getConstantSplatNode()) { APInt ShiftAmt = AmtSplat->getAPIntValue(); unsigned MaxAmount = VT.getSimpleVT().getVectorElementType().getSizeInBits(); // SSE2/AVX2 logical shifts always return a vector of 0s // if the shift amount is bigger than or equal to // the element size. The constant shift amount will be // encoded as a 8-bit immediate. if (ShiftAmt.trunc(8).uge(MaxAmount)) return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL); } return SDValue(); } /// PerformShiftCombine - Combine shifts. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { if (N->getOpcode() == ISD::SHL) if (SDValue V = PerformSHLCombine(N, DAG)) return V; if (N->getOpcode() == ISD::SRA) if (SDValue V = PerformSRACombine(N, DAG)) return V; // Try to fold this logical shift into a zero vector. if (N->getOpcode() != ISD::SRA) if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget)) return V; return SDValue(); } // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) // where both setccs reference the same FP CMP, and rewrite for CMPEQSS // and friends. Likewise for OR -> CMPNEQSS. static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { unsigned opcode; // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but // we're requiring SSE2 for both. if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CMP0 = N0->getOperand(1); SDValue CMP1 = N1->getOperand(1); SDLoc DL(N); // The SETCCs should both refer to the same CMP. if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1) return SDValue(); SDValue CMP00 = CMP0->getOperand(0); SDValue CMP01 = CMP0->getOperand(1); EVT VT = CMP00.getValueType(); if (VT == MVT::f32 || VT == MVT::f64) { bool ExpectingFlags = false; // Check for any users that want flags: for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); !ExpectingFlags && UI != UE; ++UI) switch (UI->getOpcode()) { default: case ISD::BR_CC: case ISD::BRCOND: case ISD::SELECT: ExpectingFlags = true; break; case ISD::CopyToReg: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: break; } if (!ExpectingFlags) { enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { X86::CondCode tmp = cc0; cc0 = cc1; cc1 = tmp; } if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; if (Subtarget->hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); if (N->getValueType(0) != MVT::i1) return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), FSetCC); return FSetCC; } SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; if (is64BitFP && !Subtarget->is64Bit()) { // On a 32-bit target, we cannot bitcast the 64-bit float to a // 64-bit integer, since that's not a legal type. Since // OnesOrZeroesF is all ones of all zeroes, we don't need all the // bits, but can do this little dance to extract the lowest 32 bits // and work with those going forward. SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, OnesOrZeroesF); SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Vector32, DAG.getIntPtrConstant(0, DL)); IntVT = MVT::i32; } SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, DAG.getConstant(1, DL, IntVT)); SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); return OneBitOfTruth; } } } } return SDValue(); } /// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector /// so it can be folded inside ANDNP. static bool CanFoldXORWithAllOnes(const SDNode *N) { EVT VT = N->getValueType(0); // Match direct AllOnes for 128 and 256-bit vectors if (ISD::isBuildVectorAllOnes(N)) return true; // Look through a bit convert. if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); // Sometimes the operand may come from a insert_subvector building a 256-bit // allones vector if (VT.is256BitVector() && N->getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); if (V1.getOpcode() == ISD::INSERT_SUBVECTOR && V1.getOperand(0).getOpcode() == ISD::UNDEF && ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) && ISD::isBuildVectorAllOnes(V2.getNode())) return true; } return false; } // On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized // register. In most cases we actually compare or select YMM-sized registers // and mixing the two types creates horrible code. This method optimizes // some of the transition sequences. static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (!VT.is256BitVector()) return SDValue(); assert((N->getOpcode() == ISD::ANY_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow->getValueType(0); if (!NarrowVT.is128BitVector()) return SDValue(); if (Narrow->getOpcode() != ISD::XOR && Narrow->getOpcode() != ISD::AND && Narrow->getOpcode() != ISD::OR) return SDValue(); SDValue N0 = Narrow->getOperand(0); SDValue N1 = Narrow->getOperand(1); SDLoc DL(Narrow); // The Left side has to be a trunc. if (N0.getOpcode() != ISD::TRUNCATE) return SDValue(); // The type of the truncated inputs. EVT WideVT = N0->getOperand(0)->getValueType(0); if (WideVT != VT) return SDValue(); // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; ConstantSDNode *RHSConstSplat = nullptr; if (auto *RHSBV = dyn_cast(N1)) RHSConstSplat = RHSBV->getConstantSplatNode(); if (!RHSTrunc && !RHSConstSplat) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT)) return SDValue(); // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); if (RHSConstSplat) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(), SDValue(RHSConstSplat, 0)); SmallVector C(WideVT.getVectorNumElements(), N1); N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); } else if (RHSTrunc) { N1 = N1->getOperand(0); } // Generate the wide operation. SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1); unsigned Opcode = N->getOpcode(); switch (Opcode) { case ISD::ANY_EXTEND: return Op; case ISD::ZERO_EXTEND: { unsigned InBits = NarrowVT.getScalarSizeInBits(); APInt Mask = APInt::getAllOnesValue(InBits); Mask = Mask.zext(VT.getScalarSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, Op, DAG.getConstant(Mask, DL, VT)); } case ISD::SIGN_EXTEND: return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, DAG.getValueType(NarrowVT)); default: llvm_unreachable("Unexpected opcode"); } } static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); // A vector zext_in_reg may be represented as a shuffle, // feeding into a bitcast (this represents anyext) feeding into // an and with a mask. // We'd like to try to combine that into a shuffle with zero // plus a bitcast, removing the and. if (N0.getOpcode() != ISD::BITCAST || N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE) return SDValue(); // The other side of the AND should be a splat of 2^C, where C // is the number of bits in the source type. if (N1.getOpcode() == ISD::BITCAST) N1 = N1.getOperand(0); if (N1.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); BuildVectorSDNode *Vector = cast(N1); ShuffleVectorSDNode *Shuffle = cast(N0.getOperand(0)); EVT SrcType = Shuffle->getValueType(0); // We expect a single-source shuffle if (Shuffle->getOperand(1)->getOpcode() != ISD::UNDEF) return SDValue(); unsigned SrcSize = SrcType.getScalarSizeInBits(); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (!Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) return SDValue(); unsigned ResSize = N1.getValueType().getScalarSizeInBits(); // Make sure the splat matches the mask we expect if (SplatBitSize > ResSize || (SplatValue + 1).exactLogBase2() != (int)SrcSize) return SDValue(); // Make sure the input and output size make sense if (SrcSize >= ResSize || ResSize % SrcSize) return SDValue(); // We expect a shuffle of the form <0, u, u, u, 1, u, u, u...> // The number of u's between each two values depends on the ratio between // the source and dest type. unsigned ZextRatio = ResSize / SrcSize; bool IsZext = true; for (unsigned i = 0; i < SrcType.getVectorNumElements(); ++i) { if (i % ZextRatio) { if (Shuffle->getMaskElt(i) > 0) { // Expected undef IsZext = false; break; } } else { if (Shuffle->getMaskElt(i) != (int)(i / ZextRatio)) { // Expected element number IsZext = false; break; } } } if (!IsZext) return SDValue(); // Ok, perform the transformation - replace the shuffle with // a shuffle of the form <0, k, k, k, 1, k, k, k> with zero // (instead of undef) where the k elements come from the zero vector. SmallVector Mask; unsigned NumElems = SrcType.getVectorNumElements(); for (unsigned i = 0; i < NumElems; ++i) if (i % ZextRatio) Mask.push_back(NumElems); else Mask.push_back(i / ZextRatio); SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL, Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask); return DAG.getBitcast(N0.getValueType(), NewShuffle); } /// If both input operands of a logic op are being cast from floating point /// types, try to convert this into a floating point logic node to avoid /// unnecessary moves from SSE to integer registers. static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { unsigned FPOpcode = ISD::DELETED_NODE; if (N->getOpcode() == ISD::AND) FPOpcode = X86ISD::FAND; else if (N->getOpcode() == ISD::OR) FPOpcode = X86ISD::FOR; else if (N->getOpcode() == ISD::XOR) FPOpcode = X86ISD::FXOR; assert(FPOpcode != ISD::DELETED_NODE && "Unexpected input node for FP logic conversion"); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && ((Subtarget->hasSSE1() && VT == MVT::i32) || (Subtarget->hasSSE2() && VT == MVT::i64))) { SDValue N00 = N0.getOperand(0); SDValue N10 = N1.getOperand(0); EVT N00Type = N00.getValueType(); EVT N10Type = N10.getValueType(); if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) { SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); return DAG.getBitcast(VT, FPLogic); } } return SDValue(); } static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget)) return Zext; if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget)) return R; if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); // Create BEXTR instructions // BEXTR is ((X >> imm) & (2**size-1)) if (VT == MVT::i32 || VT == MVT::i64) { // Check for BEXTR. if ((Subtarget->hasBMI() || Subtarget->hasTBM()) && (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) { ConstantSDNode *MaskNode = dyn_cast(N1); ConstantSDNode *ShiftNode = dyn_cast(N0.getOperand(1)); if (MaskNode && ShiftNode) { uint64_t Mask = MaskNode->getZExtValue(); uint64_t Shift = ShiftNode->getZExtValue(); if (isMask_64(Mask)) { uint64_t MaskSize = countPopulation(Mask); if (Shift + MaskSize <= VT.getSizeInBits()) return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0), DAG.getConstant(Shift | (MaskSize << 8), DL, VT)); } } } // BEXTR return SDValue(); } // Want to form ANDNP nodes: // 1) In the hopes of then easily combining them with OR and AND nodes // to form PBLEND/PSIGN. // 2) To match ANDN packed intrinsics if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) CanFoldXORWithAllOnes(N0.getOperand(1).getNode())) return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) CanFoldXORWithAllOnes(N1.getOperand(1).getNode())) return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); } static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget)) return R; if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); // look for psign/blend if (VT == MVT::v2i64 || VT == MVT::v4i64) { if (!Subtarget->hasSSSE3() || (VT == MVT::v4i64 && !Subtarget->hasInt256())) return SDValue(); // Canonicalize pandn to RHS if (N0.getOpcode() == X86ISD::ANDNP) std::swap(N0, N1); // or (and (m, y), (pandn m, x)) if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { SDValue Mask = N1.getOperand(0); SDValue X = N1.getOperand(1); SDValue Y; if (N0.getOperand(0) == Mask) Y = N0.getOperand(1); if (N0.getOperand(1) == Mask) Y = N0.getOperand(0); // Check to see if the mask appeared in both the AND and ANDNP and if (!Y.getNode()) return SDValue(); // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. // Look through mask bitcast. if (Mask.getOpcode() == ISD::BITCAST) Mask = Mask.getOperand(0); if (X.getOpcode() == ISD::BITCAST) X = X.getOperand(0); if (Y.getOpcode() == ISD::BITCAST) Y = Y.getOperand(0); EVT MaskVT = Mask.getValueType(); // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); unsigned SraAmt = ~0; if (Mask.getOpcode() == ISD::SRA) { if (auto *AmtBV = dyn_cast(Mask.getOperand(1))) if (auto *AmtConst = AmtBV->getConstantSplatNode()) SraAmt = AmtConst->getZExtValue(); } else if (Mask.getOpcode() == X86ISD::VSRAI) { SDValue SraC = Mask.getOperand(1); SraAmt = cast(SraC)->getZExtValue(); } if ((SraAmt + 1) != EltBits) return SDValue(); SDLoc DL(N); // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); return DAG.getBitcast(VT, Mask); } // PBLENDVB only available on SSE 4.1 if (!Subtarget->hasSSE41()) return SDValue(); MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); Mask = DAG.getBitcast(BlendVT, Mask); Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); return DAG.getBitcast(VT, Mask); } } if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize(); // SHLD/SHRD instructions have lower register pressure, but on some // platforms they have higher latency than the equivalent // series of shifts/or that would otherwise be generated. // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions // have higher latencies and we are not optimizing for size. if (!OptForSize && Subtarget->isSHLDSlow()) return SDValue(); if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); if (!N0.hasOneUse() || !N1.hasOneUse()) return SDValue(); SDValue ShAmt0 = N0.getOperand(1); if (ShAmt0.getValueType() != MVT::i8) return SDValue(); SDValue ShAmt1 = N1.getOperand(1); if (ShAmt1.getValueType() != MVT::i8) return SDValue(); if (ShAmt0.getOpcode() == ISD::TRUNCATE) ShAmt0 = ShAmt0.getOperand(0); if (ShAmt1.getOpcode() == ISD::TRUNCATE) ShAmt1 = ShAmt1.getOperand(0); SDLoc DL(N); unsigned Opc = X86ISD::SHLD; SDValue Op0 = N0.getOperand(0); SDValue Op1 = N1.getOperand(0); if (ShAmt0.getOpcode() == ISD::SUB) { Opc = X86ISD::SHRD; std::swap(Op0, Op1); std::swap(ShAmt0, ShAmt1); } unsigned Bits = VT.getSizeInBits(); if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { SDValue ShAmt1Op1 = ShAmt1.getOperand(1); if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) ShAmt1Op1 = ShAmt1Op1.getOperand(0); if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } } else if (ConstantSDNode *ShAmt1C = dyn_cast(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast(ShAmt0); if (ShAmt0C && ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } return SDValue(); } // Generate NEG and CMOV for integer abs. static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); // Since X86 does not have CMOV for 8-bit integer, we don't convert // 8-bit integer abs to NEG and CMOV. if (VT.isInteger() && VT.getSizeInBits() == 8) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) // and change it to SUB and CMOV. if (VT.isInteger() && N->getOpcode() == ISD::XOR && N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) if (ConstantSDNode *Y1C = dyn_cast(N1.getOperand(1))) if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) { // Generate SUB & CMOV. SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), DAG.getConstant(0, DL, VT), N0.getOperand(0)); SDValue Ops[] = { N0.getOperand(0), Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8), SDValue(Neg.getNode(), 1) }; return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops); } return SDValue(); } // Try to turn tests against the signbit in the form of: // XOR(TRUNCATE(SRL(X, size(X)-1)), 1) // into: // SETGT(X, -1) static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { // This is only worth doing if the output type is i8. if (N->getValueType(0) != MVT::i8) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // We should be performing an xor against a truncated shift. if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse()) return SDValue(); // Make sure we are performing an xor against one. if (!isOneConstant(N1)) return SDValue(); // SetCC on x86 zero extends so only act on this if it's a logical shift. SDValue Shift = N0.getOperand(0); if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse()) return SDValue(); // Make sure we are truncating from one of i16, i32 or i64. EVT ShiftTy = Shift.getValueType(); if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64) return SDValue(); // Make sure the shift amount extracts the sign bit. if (!isa(Shift.getOperand(1)) || Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1) return SDValue(); // Create a greater-than comparison against -1. // N.B. Using SETGE against 0 works but we want a canonical looking // comparison, using SETGT matches up with what TranslateX86CC. SDLoc DL(N); SDValue ShiftOp = Shift.getOperand(0); EVT ShiftOpTy = ShiftOp.getValueType(); SDValue Cond = DAG.getSetCC(DL, MVT::i8, ShiftOp, DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); return Cond; } static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; if (Subtarget->hasCMov()) if (SDValue RV = performIntegerAbsCombine(N, DAG)) return RV; if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; return SDValue(); } /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, const X86Subtarget *Subtarget, SDLoc DL) { if (!VT.isVector() || !VT.isSimple()) return SDValue(); EVT InVT = In.getValueType(); unsigned NumElems = VT.getVectorNumElements(); EVT ScalarVT = VT.getVectorElementType(); if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && isPowerOf2_32(NumElems))) return SDValue(); // InScalarVT is the intermediate type in AVG pattern and it should be greater // than the original input type (i8/i16). EVT InScalarVT = InVT.getVectorElementType(); if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits()) return SDValue(); if (Subtarget->hasAVX512()) { if (VT.getSizeInBits() > 512) return SDValue(); } else if (Subtarget->hasAVX2()) { if (VT.getSizeInBits() > 256) return SDValue(); } else { if (VT.getSizeInBits() > 128) return SDValue(); } // Detect the following pattern: // // %1 = zext %a to // %2 = zext %b to // %3 = add nuw nsw %1, // %4 = add nuw nsw %3, %2 // %5 = lshr %N, // %6 = trunc %5 to // // In AVX512, the last instruction can also be a trunc store. if (In.getOpcode() != ISD::SRL) return SDValue(); // A lambda checking the given SDValue is a constant vector and each element // is in the range [Min, Max]. auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { BuildVectorSDNode *BV = dyn_cast(V); if (!BV || !BV->isConstant()) return false; for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) { ConstantSDNode *C = dyn_cast(V.getOperand(i)); if (!C) return false; uint64_t Val = C->getZExtValue(); if (Val < Min || Val > Max) return false; } return true; }; // Check if each element of the vector is left-shifted by one. auto LHS = In.getOperand(0); auto RHS = In.getOperand(1); if (!IsConstVectorInRange(RHS, 1, 1)) return SDValue(); if (LHS.getOpcode() != ISD::ADD) return SDValue(); // Detect a pattern of a + b + 1 where the order doesn't matter. SDValue Operands[3]; Operands[0] = LHS.getOperand(0); Operands[1] = LHS.getOperand(1); // Take care of the case when one of the operands is a constant vector whose // element is in the range [1, 256]. if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && Operands[0].getOpcode() == ISD::ZERO_EXTEND && Operands[0].getOperand(0).getValueType() == VT) { // The pattern is detected. Subtract one from the constant vector, then // demote it and emit X86ISD::AVG instruction. SDValue One = DAG.getConstant(1, DL, InScalarVT); SDValue Ones = DAG.getNode(ISD::BUILD_VECTOR, DL, InVT, SmallVector(NumElems, One)); Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], Ones); Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), Operands[1]); } if (Operands[0].getOpcode() == ISD::ADD) std::swap(Operands[0], Operands[1]); else if (Operands[1].getOpcode() != ISD::ADD) return SDValue(); Operands[2] = Operands[1].getOperand(0); Operands[1] = Operands[1].getOperand(1); // Now we have three operands of two additions. Check that one of them is a // constant vector with ones, and the other two are promoted from i8/i16. for (int i = 0; i < 3; ++i) { if (!IsConstVectorInRange(Operands[i], 1, 1)) continue; std::swap(Operands[i], Operands[2]); // Check if Operands[0] and Operands[1] are results of type promotion. for (int j = 0; j < 2; ++j) if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || Operands[j].getOperand(0).getValueType() != VT) return SDValue(); // The pattern is detected, emit X86ISD::AVG instruction. return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), Operands[1].getOperand(0)); } return SDValue(); } /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { LoadSDNode *Ld = cast(N); EVT RegVT = Ld->getValueType(0); EVT MemVT = Ld->getMemoryVT(); SDLoc dl(Ld); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // For chips with slow 32-byte unaligned loads, break the 32-byte operation // into two 16-byte operations. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, AddressSpace, Alignment, &Fast) && !Fast) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); SDValue Ptr = Ld->getBasePtr(); SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout())); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), NumElems/2); SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), std::min(16U, Alignment)); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); SDValue NewVec = DAG.getUNDEF(RegVT); NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl); NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl); return DCI.CombineTo(N, NewVec, TF, true); } return SDValue(); } /// PerformMLOADCombine - Resolve extending loads static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { MaskedLoadSDNode *Mld = cast(N); if (Mld->getExtensionType() != ISD::SEXTLOAD) return SDValue(); EVT VT = Mld->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); EVT LdVT = Mld->getMemoryVT(); SDLoc dl(Mld); assert(LdVT != VT && "Cannot extend to the same type"); unsigned ToSz = VT.getVectorElementType().getSizeInBits(); unsigned FromSz = LdVT.getVectorElementType().getSizeInBits(); // From, To sizes and ElemCount must be pow of two assert (isPowerOf2_32(NumElems * FromSz * ToSz) && "Unexpected size for extending masked load"); unsigned SizeRatio = ToSz / FromSz; assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), LdVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); // Convert Src0 value SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0()); if (Mld->getSrc0().getOpcode() != ISD::UNDEF) { SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && "WideVecVT should be legal"); WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); } // Prepare the new mask SDValue NewMask; SDValue Mask = Mld->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type NewMask = DAG.getBitcast(WideVecVT, Mask); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) ShuffleVec[i] = NumElems * SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), &ShuffleVec[0]); } else { assert(Mask.getValueType().getVectorElementType() == MVT::i1); unsigned WidenNumElts = NumElems*SizeRatio; unsigned MaskNumElts = VT.getVectorNumElements(); EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts); unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector Ops(NumConcat); SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), Mld->getBasePtr(), NewMask, WideSrc0, Mld->getMemoryVT(), Mld->getMemOperand(), ISD::NON_EXTLOAD); SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd); return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true); } /// PerformMSTORECombine - Resolve truncating stores static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { MaskedStoreSDNode *Mst = cast(N); if (!Mst->isTruncatingStore()) return SDValue(); EVT VT = Mst->getValue().getValueType(); unsigned NumElems = VT.getVectorNumElements(); EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromSz = VT.getVectorElementType().getSizeInBits(); unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // The truncating store is legal in some cases. For example // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw // are designated for truncate store. // In this case we don't need any further transformations. if (TLI.isTruncStoreLegal(VT, StVT)) return SDValue(); // From, To sizes and ElemCount must be pow of two assert (isPowerOf2_32(NumElems * FromSz * ToSz) && "Unexpected size for truncating masked store"); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. assert (((NumElems * FromSz) % ToSz) == 0 && "Unexpected ratio for truncating masked store"); unsigned SizeRatio = FromSz / ToSz; assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && "WideVecVT should be legal"); SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); SDValue NewMask; SDValue Mask = Mst->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type NewMask = DAG.getBitcast(WideVecVT, Mask); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) ShuffleVec[i] = NumElems*SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), &ShuffleVec[0]); } else { assert(Mask.getValueType().getVectorElementType() == MVT::i1); unsigned WidenNumElts = NumElems*SizeRatio; unsigned MaskNumElts = VT.getVectorNumElements(); EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WidenNumElts); unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector Ops(NumConcat); SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(), NewMask, StVT, Mst->getMemOperand(), false); } /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { StoreSDNode *St = cast(N); EVT VT = St->getValue().getValueType(); EVT StVT = St->getMemoryVT(); SDLoc dl(St); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we are saving a concatenation of two XMM registers and 32-byte stores // are slow, such as on Sandy Bridge, perform two 16-byte stores. bool Fast; unsigned AddressSpace = St->getAddressSpace(); unsigned Alignment = St->getAlignment(); if (VT.is256BitVector() && StVT == VT && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, AddressSpace, Alignment, &Fast) && !Fast) { unsigned NumElems = VT.getVectorNumElements(); if (NumElems < 2) return SDValue(); SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Ptr0 = St->getBasePtr(); SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride); SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), Alignment); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(16U, Alignment)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. if (St->isTruncatingStore() && VT.isVector()) { // Check if we can detect an AVG pattern from the truncation. If yes, // replace the trunc store by a normal store with the result of X86ISD::AVG // instruction. SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl); if (Avg.getNode()) return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromSz = VT.getVectorElementType().getSizeInBits(); unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); // The truncating store is legal in some cases. For example // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw // are designated for truncate store. // In this case we don't need any further transformations. if (TLI.isTruncStoreLegal(VT, StVT)) return SDValue(); // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. if (0 != (NumElems * FromSz) % ToSz) return SDValue(); unsigned SizeRatio = FromSz / ToSz; assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. // Find the largest store unit MVT StoreType = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz) StoreType = Tp; } // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 && (64 <= NumElems * ToSz)) StoreType = MVT::f64; // Bitcast the original vector into a vector of store-size units EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); SmallVector Chains; SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Ptr = St->getBasePtr(); // Perform one or more big stores into memory. for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StoreType, ShuffWide, DAG.getIntPtrConstant(i, dl)); SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); Chains.push_back(Ch); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right // places to insert EMMS. This qualifies as a quick hack. // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. if (VT.getSizeInBits() != 64) return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget->useSoftFloat() && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa(St->getValue()) && !cast(St->getValue())->isVolatile() && St->getChain().hasOneUse() && !St->isVolatile()) { SDNode* LdVal = St->getValue().getNode(); LoadSDNode *Ld = nullptr; int TokenFactorIndex = -1; SmallVector Ops; SDNode* ChainVal = St->getChain().getNode(); // Must be a store of a load. We currently handle two cases: the load // is a direct child, and it's under an intervening TokenFactor. It is // possible to dig deeper under nested TokenFactors. if (ChainVal == LdVal) Ld = cast(St->getChain()); else if (St->getValue().hasOneUse() && ChainVal->getOpcode() == ISD::TokenFactor) { for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) { if (ChainVal->getOperand(i).getNode() == LdVal) { TokenFactorIndex = i; Ld = cast(St->getValue()); } else Ops.push_back(ChainVal->getOperand(i)); } } if (!Ld || !ISD::isNormalLoad(Ld)) return SDValue(); // If this is not the MMX case, i.e. we are just turning i64 load/store // into f64 load/store, avoid the transformation if there are multiple // uses of the loaded value. if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0)) return SDValue(); SDLoc LdDL(Ld); SDLoc StDL(N); // If we are a 64-bit capable x86, lower to a single movq load/store pair. // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); } // Otherwise, lower to two pairs of 32-bit loads / stores. SDValue LoAddr = Ld->getBasePtr(); SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr, DAG.getConstant(4, LdDL, MVT::i32)); SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(LoLd); Ops.push_back(HiLd); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } LoAddr = St->getBasePtr(); HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr, DAG.getConstant(4, StDL, MVT::i32)); SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4), St->isVolatile(), St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } // This is similar to the above case, but here we handle a scalar 64-bit // integer store that is extracted from a vector on a 32-bit target. // If we have SSE2, then we can treat it like a floating-point double // to get past legalization. The execution dependencies fixup pass will // choose the optimal machine instruction for the store if this really is // an integer or v2f32 rather than an f64. if (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit() && St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue OldExtract = St->getOperand(1); SDValue ExtOp0 = OldExtract.getOperand(0); unsigned VecSize = ExtOp0.getValueSizeInBits(); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, BitCast, OldExtract.getOperand(1)); return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); } return SDValue(); } /// Return 'true' if this vector operation is "horizontal" /// and return the operands for the horizontal operation in LHS and RHS. A /// horizontal operation performs the binary operation on successive elements /// of its first operand, then on successive elements of its second operand, /// returning the resulting values in a vector. For example, if /// A = < float a0, float a1, float a2, float a3 > /// and /// B = < float b0, float b1, float b2, float b3 > /// then the result of doing a horizontal operation on A and B is /// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >. /// In short, LHS and RHS are inspected to see if LHS op RHS is of the form /// A horizontal-op B, for some already available A and B, and if so then LHS is /// set to A, RHS to B, and the routine returns 'true'. /// Note that the binary operation should have the property that if one of the /// operands is UNDEF then the result is UNDEF. static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { // Look for the following pattern: if // A = < float a0, float a1, float a2, float a3 > // B = < float b0, float b1, float b2, float b3 > // and // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6> // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7> // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 > // which is A horizontal-op B. // At least one of the operands should be a vector shuffle. if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE && RHS.getOpcode() != ISD::VECTOR_SHUFFLE) return false; MVT VT = LHS.getSimpleValueType(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for horizontal add/sub"); // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to // operate independently on 128-bit lanes. unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts / NumLanes; assert((NumLaneElts % 2 == 0) && "Vector type should have an even number of elements in each lane"); unsigned HalfLaneElts = NumLaneElts/2; // View LHS in the form // LHS = VECTOR_SHUFFLE A, B, LMask // If LHS is not a shuffle then pretend it is the shuffle // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1> // NOTE: in what follows a default initialized SDValue represents an UNDEF of // type VT. SDValue A, B; SmallVector LMask(NumElts); if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (LHS.getOperand(0).getOpcode() != ISD::UNDEF) A = LHS.getOperand(0); if (LHS.getOperand(1).getOpcode() != ISD::UNDEF) B = LHS.getOperand(1); ArrayRef Mask = cast(LHS.getNode())->getMask(); std::copy(Mask.begin(), Mask.end(), LMask.begin()); } else { if (LHS.getOpcode() != ISD::UNDEF) A = LHS; for (unsigned i = 0; i != NumElts; ++i) LMask[i] = i; } // Likewise, view RHS in the form // RHS = VECTOR_SHUFFLE C, D, RMask SDValue C, D; SmallVector RMask(NumElts); if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (RHS.getOperand(0).getOpcode() != ISD::UNDEF) C = RHS.getOperand(0); if (RHS.getOperand(1).getOpcode() != ISD::UNDEF) D = RHS.getOperand(1); ArrayRef Mask = cast(RHS.getNode())->getMask(); std::copy(Mask.begin(), Mask.end(), RMask.begin()); } else { if (RHS.getOpcode() != ISD::UNDEF) C = RHS; for (unsigned i = 0; i != NumElts; ++i) RMask[i] = i; } // Check that the shuffles are both shuffling the same vectors. if (!(A == C && B == D) && !(A == D && B == C)) return false; // If everything is UNDEF then bail out: it would be better to fold to UNDEF. if (!A.getNode() && !B.getNode()) return false; // If A and B occur in reverse order in RHS, then "swap" them (which means // rewriting the mask). if (A != C) ShuffleVectorSDNode::commuteMask(RMask); // At this point LHS and RHS are equivalent to // LHS = VECTOR_SHUFFLE A, B, LMask // RHS = VECTOR_SHUFFLE A, B, RMask // Check that the masks correspond to performing a horizontal operation. for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { int LIdx = LMask[i+l], RIdx = RMask[i+l]; // Ignore any UNDEF components. if (LIdx < 0 || RIdx < 0 || (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) continue; // Check that successive elements are being operated on. If not, this is // not a horizontal operation. unsigned Src = (i/HalfLaneElts); // each lane is split between srcs int Index = 2*(i%HalfLaneElts) + NumElts*Src + l; if (!(LIdx == Index && RIdx == Index + 1) && !(IsCommutative && LIdx == Index + 1 && RIdx == Index)) return false; } } LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it. RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it. return true; } /// Do target-specific dag combines on floating point adds. static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS); return SDValue(); } /// Do target-specific dag combines on floating point subs. static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS); return SDValue(); } /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS. static SDValue combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, SmallVector &Regs) { assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 || Regs[0].getValueType() == MVT::v2i64)); EVT OutVT = N->getValueType(0); EVT OutSVT = OutVT.getVectorElementType(); EVT InVT = Regs[0].getValueType(); EVT InSVT = InVT.getVectorElementType(); SDLoc DL(N); // First, use mask to unset all bits that won't appear in the result. assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) && "OutSVT can only be either i8 or i16."); SDValue MaskVal = DAG.getConstant(OutSVT == MVT::i8 ? 0xFF : 0xFFFF, DL, InSVT); SDValue MaskVec = DAG.getNode( ISD::BUILD_VECTOR, DL, InVT, SmallVector(InVT.getVectorNumElements(), MaskVal)); for (auto &Reg : Regs) Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVec, Reg); MVT UnpackedVT, PackedVT; if (OutSVT == MVT::i8) { UnpackedVT = MVT::v8i16; PackedVT = MVT::v16i8; } else { UnpackedVT = MVT::v4i32; PackedVT = MVT::v8i16; } // In each iteration, truncate the type by a half size. auto RegNum = Regs.size(); for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits(); j < e; j *= 2, RegNum /= 2) { for (unsigned i = 0; i < RegNum; i++) Regs[i] = DAG.getNode(ISD::BITCAST, DL, UnpackedVT, Regs[i]); for (unsigned i = 0; i < RegNum / 2; i++) Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2], Regs[i * 2 + 1]); } // If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and // then extract a subvector as the result since v8i8 is not a legal type. if (OutVT == MVT::v8i8) { Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]); Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0], DAG.getIntPtrConstant(0, DL)); return Regs[0]; } else if (RegNum > 1) { Regs.resize(RegNum); return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs); } else return Regs[0]; } /// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS. static SDValue combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, SmallVector &Regs) { assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32); EVT OutVT = N->getValueType(0); SDLoc DL(N); // Shift left by 16 bits, then arithmetic-shift right by 16 bits. SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32); for (auto &Reg : Regs) { Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG); Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG); } for (unsigned i = 0, e = Regs.size() / 2; i < e; i++) Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2], Regs[i * 2 + 1]); if (Regs.size() > 2) { Regs.resize(Regs.size() / 2); return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs); } else return Regs[0]; } /// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into /// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type /// legalization the truncation will be translated into a BUILD_VECTOR with each /// element that is extracted from a vector and then truncated, and it is /// diffcult to do this optimization based on them. static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT OutVT = N->getValueType(0); if (!OutVT.isVector()) return SDValue(); SDValue In = N->getOperand(0); if (!In.getValueType().isSimple()) return SDValue(); EVT InVT = In.getValueType(); unsigned NumElems = OutVT.getVectorNumElements(); // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on // SSE2, and we need to take care of it specially. // AVX512 provides vpmovdb. if (!Subtarget->hasSSE2() || Subtarget->hasAVX2()) return SDValue(); EVT OutSVT = OutVT.getVectorElementType(); EVT InSVT = InVT.getVectorElementType(); if (!((InSVT == MVT::i32 || InSVT == MVT::i64) && (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && NumElems >= 8)) return SDValue(); // SSSE3's pshufb results in less instructions in the cases below. if (Subtarget->hasSSSE3() && NumElems == 8 && ((OutSVT == MVT::i8 && InSVT != MVT::i64) || (InSVT == MVT::i32 && OutSVT == MVT::i16))) return SDValue(); SDLoc DL(N); // Split a long vector into vectors of legal type. unsigned RegNum = InVT.getSizeInBits() / 128; SmallVector SubVec(RegNum); if (InSVT == MVT::i32) { for (unsigned i = 0; i < RegNum; i++) SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(i * 4, DL)); } else { for (unsigned i = 0; i < RegNum; i++) SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(i * 2, DL)); } // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PAKCUS // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to // truncate 2 x v4i32 to v8i16. if (Subtarget->hasSSE41() || OutSVT == MVT::i8) return combineVectorTruncationWithPACKUS(N, DAG, SubVec); else if (InSVT == MVT::i32) return combineVectorTruncationWithPACKSS(N, DAG, SubVec); else return SDValue(); } static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { // Try to detect AVG pattern first. SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG, Subtarget, SDLoc(N)); if (Avg.getNode()) return Avg; return combineVectorTruncation(N, DAG, Subtarget); } /// Do target-specific dag combines on floating point negations. static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); EVT SVT = VT.getScalarType(); SDValue Arg = N->getOperand(0); SDLoc DL(N); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); // If we're negating a FMUL node on a target with FMA, then we can avoid the // use of a constant by performing (-0 - A*B) instead. // FIXME: Check rounding control flags as well once it becomes available. if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) { SDValue Zero = DAG.getConstantFP(0.0, DL, VT); return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Zero); } // If we're negating a FMA node, then we can adjust the // instruction to include the extra negation. if (Arg.hasOneUse()) { switch (Arg.getOpcode()) { case X86ISD::FMADD: return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Arg.getOperand(2)); case X86ISD::FMSUB: return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Arg.getOperand(2)); case X86ISD::FNMADD: return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Arg.getOperand(2)); case X86ISD::FNMSUB: return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Arg.getOperand(2)); } } return SDValue(); } static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (VT.is512BitVector() && !Subtarget->hasDQI()) { // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. // These logic operations may be executed in the integer domain. SDLoc dl(N); MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); unsigned IntOpcode = 0; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected FP logic op"); case X86ISD::FOR: IntOpcode = ISD::OR; break; case X86ISD::FXOR: IntOpcode = ISD::XOR; break; case X86ISD::FAND: IntOpcode = ISD::AND; break; case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; } SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); } return SDValue(); } /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); // F[X]OR(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) return N->getOperand(1); // F[X]OR(x, 0.0) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(0); return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); // Only perform optimizations if UnsafeMath is used. if (!DAG.getTarget().Options.UnsafeFPMath) return SDValue(); // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes // into FMINC and FMAXC, which are Commutative operations. unsigned NewOp = 0; switch (N->getOpcode()) { default: llvm_unreachable("unknown opcode"); case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; } return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1)); } static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { if (Subtarget->useSoftFloat()) return SDValue(); // TODO: Check for global or instruction-level "nnan". In that case, we // should be able to lower to FMAX/FMIN alone. // TODO: If an operand is already known to be a NaN or not a NaN, this // should be an optional swap and FMAX/FMIN. EVT VT = N->getValueType(0); if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) || (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64)))) return SDValue(); // This takes at least 3 instructions, so favor a library call when operating // on a scalar and minimizing code size. if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc DL(N); EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType( DAG.getDataLayout(), *DAG.getContext(), VT); // There are 4 possibilities involving NaN inputs, and these are the required // outputs: // Op1 // Num NaN // ---------------- // Num | Max | Op0 | // Op0 ---------------- // NaN | Op1 | NaN | // ---------------- // // The SSE FP max/min instructions were not designed for this case, but rather // to implement: // Min = Op1 < Op0 ? Op1 : Op0 // Max = Op1 > Op0 ? Op1 : Op0 // // So they always return Op0 if either input is a NaN. However, we can still // use those instructions for fmaxnum by selecting away a NaN input. // If either operand is NaN, the 2nd source operand (Op0) is passed through. auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO); // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands // are NaN, the NaN value of Op1 is the result. auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); } /// Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { // FAND(0.0, x) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) return N->getOperand(0); // FAND(x, 0.0) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(1); return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FANDN nodes static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { // FANDN(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) return N->getOperand(1); // FANDN(x, 0.0) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(1); return lowerX86FPLogicOp(N, DAG, Subtarget); } static SDValue PerformBTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { // BT ignores high bits in the bit index operand. SDValue Op1 = N->getOperand(1); if (Op1.hasOneUse()) { unsigned BitWidth = Op1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); } return SDValue(); } static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); EVT VT = N->getValueType(0), OpVT = Op.getValueType(); if (Op.getOpcode() == X86ISD::VZEXT_LOAD && VT.getVectorElementType().getSizeInBits() == OpVT.getVectorElementType().getSizeInBits()) { return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } return SDValue(); } static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (!VT.isVector()) return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT ExtraVT = cast(N1)->getVT(); SDLoc dl(N); // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the // both SSE and AVX2 since there is no sign-extended shift right // operation on a vector with 64-bit elements. //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND)) { SDValue N00 = N0.getOperand(0); // EXTLOAD has a better solution on AVX2, // it may be replaced with X86ISD::VSEXT node. if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256()) if (!ISD::isNormalLoad(N00.getNode())) return SDValue(); if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); } } return SDValue(); } /// sext(add_nsw(x, C)) --> add(sext(x), C_sext) /// Promoting a sign extension ahead of an 'add nsw' exposes opportunities /// to combine math ops, use an LEA, or use a complex addressing mode. This can /// eliminate extend, add, and shift instructions. static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG, const X86Subtarget *Subtarget) { // TODO: This should be valid for other integer types. EVT VT = Sext->getValueType(0); if (VT != MVT::i64) return SDValue(); // We need an 'add nsw' feeding into the 'sext'. SDValue Add = Sext->getOperand(0); if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap()) return SDValue(); // Having a constant operand to the 'add' ensures that we are not increasing // the instruction count because the constant is extended for free below. // A constant operand can also become the displacement field of an LEA. auto *AddOp1 = dyn_cast(Add.getOperand(1)); if (!AddOp1) return SDValue(); // Don't make the 'add' bigger if there's no hope of combining it with some // other 'add' or 'shl' instruction. // TODO: It may be profitable to generate simpler LEA instructions in place // of single 'add' instructions, but the cost model for selecting an LEA // currently has a high threshold. bool HasLEAPotential = false; for (auto *User : Sext->uses()) { if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { HasLEAPotential = true; break; } } if (!HasLEAPotential) return SDValue(); // Everything looks good, so pull the 'sext' ahead of the 'add'. int64_t AddConstant = AddOp1->getSExtValue(); SDValue AddOp0 = Add.getOperand(0); SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0); SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT); // The wider add is guaranteed to not wrap because both operands are // sign-extended. SDNodeFlags Flags; Flags.setNoSignedWrap(true); return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags); } /// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) -> /// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y) /// This exposes the {s/z}ext to the sdivrem lowering, so that it directly /// extends from AH (which we otherwise need to do contortions to access). static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); auto OpcodeN = N->getOpcode(); auto OpcodeN0 = N0.getOpcode(); if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) || (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM))) return SDValue(); EVT VT = N->getValueType(0); EVT InVT = N0.getValueType(); if (N0.getResNo() != 1 || InVT != MVT::i8 || VT != MVT::i32) return SDValue(); SDVTList NodeTys = DAG.getVTList(MVT::i8, VT); auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG : X86ISD::UDIVREM8_ZEXT_HREG; SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0), N0.getOperand(1)); DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0)); return R.getValue(1); } static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SVT = VT.getScalarType(); EVT InVT = N0.getValueType(); EVT InSVT = InVT.getScalarType(); SDLoc DL(N); if (SDValue DivRem8 = getDivRem8(N, DAG)) return DivRem8; if (!DCI.isBeforeLegalizeOps()) { if (InVT == MVT::i1) { SDValue Zero = DAG.getConstant(0, DL, VT); SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero); } return SDValue(); } if (VT.isVector() && Subtarget->hasSSE2()) { auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) { EVT InVT = N.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), Size / InVT.getScalarSizeInBits()); SmallVector Opnds(Size / InVT.getSizeInBits(), DAG.getUNDEF(InVT)); Opnds[0] = N; return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds); }; // If target-size is less than 128-bits, extend to a type that would extend // to 128 bits, extend that and extract the original target vector. if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) && (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { unsigned Scale = 128 / VT.getSizeInBits(); EVT ExVT = EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, DAG.getIntPtrConstant(0, DL)); } // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG // which ensures lowering to X86ISD::VSEXT (pmovsx*). if (VT.getSizeInBits() == 128 && (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { SDValue ExOp = ExtendVecSize(DL, N0, 128); return DAG.getSignExtendVectorInReg(ExOp, DL, VT); } // On pre-AVX2 targets, split into 128-bit nodes of // ISD::SIGN_EXTEND_VECTOR_INREG. if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) && (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { unsigned NumVecs = VT.getSizeInBits() / 128; unsigned NumSubElts = 128 / SVT.getSizeInBits(); EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts); EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts); SmallVector Opnds; for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) { SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, DAG.getIntPtrConstant(Offset, DL)); SrcVec = ExtendVecSize(DL, SrcVec, 128); SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT); Opnds.push_back(SrcVec); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds); } } if (Subtarget->hasAVX() && VT.is256BitVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget)) return NewAdd; return SDValue(); } static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget* Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); EVT ScalarVT = VT.getScalarType(); if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA()) return SDValue(); SDValue A = N->getOperand(0); SDValue B = N->getOperand(1); SDValue C = N->getOperand(2); bool NegA = (A.getOpcode() == ISD::FNEG); bool NegB = (B.getOpcode() == ISD::FNEG); bool NegC = (C.getOpcode() == ISD::FNEG); // Negative multiplication when NegA xor NegB bool NegMul = (NegA != NegB); if (NegA) A = A.getOperand(0); if (NegB) B = B.getOperand(0); if (NegC) C = C.getOperand(0); unsigned Opcode; if (!NegMul) Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB; else Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; return DAG.getNode(Opcode, dl, VT, A, B, C); } static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) // This eliminates the zext. This transformation is necessary because // ISD::SETCC is always legalized to i8. SDLoc dl(N); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { if (!isOneConstant(N0.getOperand(1))) return SDValue(); return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, dl, VT)); } } if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, dl, VT)); } } if (VT.is256BitVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; if (SDValue DivRem8 = getDivRem8(N, DAG)) return DivRem8; return SDValue(); } // Optimize x == -y --> x+y == 0 // x != -y --> x+y != 0 static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget* Subtarget) { ISD::CondCode CC = cast(N->getOperand(2))->get(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) { SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS, LHS.getOperand(1)); return DAG.getSetCC(DL, N->getValueType(0), addV, DAG.getConstant(0, DL, addV.getValueType()), CC); } if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) { SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS, RHS.getOperand(1)); return DAG.getSetCC(DL, N->getValueType(0), addV, DAG.getConstant(0, DL, addV.getValueType()), CC); } if (VT.getScalarType() == MVT::i1 && (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); if (!IsSEXT0 || !IsVZero1) { // Swap the operands and update the condition code. std::swap(LHS, RHS); CC = ISD::getSetCCSwappedOperands(CC); IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) && (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1); IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode()); } if (IsSEXT0 && IsVZero1) { assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type"); if (CC == ISD::SETGT) return DAG.getConstant(0, DL, VT); if (CC == ISD::SETLE) return DAG.getConstant(1, DL, VT); if (CC == ISD::SETEQ || CC == ISD::SETGE) return DAG.getNOT(DL, LHS.getOperand(0), VT); assert((CC == ISD::SETNE || CC == ISD::SETLT) && "Unexpected condition code!"); return LHS.getOperand(0); } } return SDValue(); } static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); // Gather and Scatter instructions use k-registers for masks. The type of // the masks is v*i1. So the mask will be truncated anyway. // The SIGN_EXTEND_INREG my be dropped. SDValue Mask = N->getOperand(2); if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) { SmallVector NewOps(N->op_begin(), N->op_end()); NewOps[2] = Mask.getOperand(0); DAG.UpdateNodeOperands(N, NewOps); } return SDValue(); } // Helper function of PerformSETCCCombine. It is to materialize "setb reg" // as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG, MVT VT) { if (VT == MVT::i8) return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, DAG.getConstant(X86::COND_B, DL, MVT::i8), EFLAGS), DAG.getConstant(1, DL, VT)); assert (VT == MVT::i1 && "Unexpected type for SECCC node"); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, DAG.getConstant(X86::COND_B, DL, MVT::i8), EFLAGS)); } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc DL(N); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); if (CC == X86::COND_A) { // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && EFLAGS.getValueType().isInteger() && !isa(EFLAGS.getOperand(1))) { SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0)); } } // Materialize "setb reg" as "sbb reg,reg", since it can be extended without // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. if (CC == X86::COND_B) return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0)); if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); } return SDValue(); } // Optimize branch condition evaluation. // static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc DL(N); SDValue Chain = N->getOperand(0); SDValue Dest = N->getOperand(1); SDValue EFLAGS = N->getOperand(3); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, Flags); } return SDValue(); } static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG) { // Take advantage of vector comparisons producing 0 or -1 in each lane to // optimize away operation when it's from a constant. // // The general transformation is: // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> // AND(VECTOR_CMP(x,y), constant2) // constant2 = UNARYOP(constant) // Early exit if this isn't a vector operation, the operand of the // unary operation isn't a bitwise AND, or if the sizes of the operations // aren't the same. EVT VT = N->getValueType(0); if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits()) return SDValue(); // Now check that the other operand of the AND is a constant. We could // make the transformation for non-constant splats as well, but it's unclear // that would be a benefit as it would not eliminate any operations, just // perform one more step in scalar code before moving to the vector unit. if (BuildVectorSDNode *BV = dyn_cast(N->getOperand(0)->getOperand(1))) { // Bail out if the vector isn't a constant. if (!BV->isConstant()) return SDValue(); // Everything checks out. Build up the new and improved node. SDLoc DL(N); EVT IntVT = BV->getValueType(0); // Create a new constant of the appropriate type for the transformed // DAG. SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); // The AND node needs bitcasts to/from an integer vector type around it. SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, N->getOperand(0)->getOperand(0), MaskConst); SDValue Res = DAG.getBitcast(VT, NewAnd); return Res; } return SDValue(); } static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDValue Op0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); EVT InSVT = InVT.getScalarType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32)) // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32)) if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) { SDLoc dl(N); EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT)) return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } return SDValue(); } static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG)) return Res; // Now move on to more general possibilities. SDValue Op0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); EVT InSVT = InVT.getScalarType(); // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32)) // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32)) if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) { SDLoc dl(N); EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) { LoadSDNode *Ld = cast(Op0.getNode()); EVT LdVT = Ld->getValueType(0); // This transformation is not supported if the result type is f16 if (VT == MVT::f16) return SDValue(); if (!Ld->isVolatile() && !VT.isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !Subtarget->is64Bit() && LdVT == MVT::i64) { SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD( SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG); DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1)); return FILDChain; } } return SDValue(); } // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI) { // If the LHS and RHS of the ADC node are zero, then it can't overflow and // the result is either zero or one (depending on the input carry bit). // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1. if (X86::isZeroNode(N->getOperand(0)) && X86::isZeroNode(N->getOperand(1)) && // We don't have a good way to replace an EFLAGS use, so only do this when // dead right now. SDValue(N, 1).use_empty()) { SDLoc DL(N); EVT VT = N->getValueType(0); SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, DAG.getConstant(X86::COND_B, DL, MVT::i8), N->getOperand(2)), DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } return SDValue(); } // fold (add Y, (sete X, 0)) -> adc 0, Y // (add Y, (setne X, 0)) -> sbb -1, Y // (sub (sete X, 0), Y) -> sbb 0, Y // (sub (setne X, 0), Y) -> adc -1, Y static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); // Look through ZExts. SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0); if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse()) return SDValue(); SDValue SetCC = Ext.getOperand(0); if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse()) return SDValue(); X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0); if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); SDValue Cmp = SetCC.getOperand(1); if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || !X86::isZeroNode(Cmp.getOperand(1)) || !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); SDValue CmpOp0 = Cmp.getOperand(0); SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1); if (CC == X86::COND_NE) return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB, DL, OtherVal.getValueType(), OtherVal, DAG.getConstant(-1ULL, DL, OtherVal.getValueType()), NewCmp); return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC, DL, OtherVal.getValueType(), OtherVal, DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp); } /// PerformADDCombine - Do target-specific dag combines on integer adds. static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); } static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); // X86 can't encode an immediate LHS of a sub. See if we can push the // negation into a preceding instruction. if (ConstantSDNode *C = dyn_cast(Op0)) { // If the RHS of the sub is a XOR with one use and a constant, invert the // immediate. Then add one to the LHS of the sub so we can turn // X-Y -> X+~Y+1, saving one register. if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR && isa(Op1.getOperand(1))) { APInt XorC = cast(Op1.getOperand(1))->getAPIntValue(); EVT VT = Op0.getValueType(); SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), DAG.getConstant(~XorC, SDLoc(Op1), VT)); return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor, DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT)); } } // Try to synthesize horizontal adds from adds of shuffles. EVT VT = N->getValueType(0); if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); } /// performVZEXTCombine - Performs build vector combines static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc DL(N); MVT VT = N->getSimpleValueType(0); SDValue Op = N->getOperand(0); MVT OpVT = Op.getSimpleValueType(); MVT OpEltVT = OpVT.getVectorElementType(); unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements(); // (vzext (bitcast (vzext (x)) -> (vzext x) SDValue V = Op; while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (V != Op && V.getOpcode() == X86ISD::VZEXT) { MVT InnerVT = V.getSimpleValueType(); MVT InnerEltVT = InnerVT.getVectorElementType(); // If the element sizes match exactly, we can just do one larger vzext. This // is always an exact type match as vzext operates on integer types. if (OpEltVT == InnerEltVT) { assert(OpVT == InnerVT && "Types must match for vzext!"); return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0)); } // The only other way we can combine them is if only a single element of the // inner vzext is used in the input to the outer vzext. if (InnerEltVT.getSizeInBits() < InputBits) return SDValue(); // In this case, the inner vzext is completely dead because we're going to // only look at bits inside of the low element. Just do the outer vzext on // a bitcast of the input to the inner. return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V)); } // Check if we can bypass extracting and re-inserting an element of an input // vector. Essentially: // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) { SDValue ExtractedV = V.getOperand(0); SDValue OrigV = ExtractedV.getOperand(0); if (isNullConstant(ExtractedV.getOperand(1))) { MVT OrigVT = OrigV.getSimpleValueType(); // Extract a subvector if necessary... if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) { int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits(); OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(), OrigVT.getVectorNumElements() / Ratio); OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV, DAG.getIntPtrConstant(0, DL)); } Op = DAG.getBitcast(OpVT, OrigV); return DAG.getNode(X86ISD::VZEXT, DL, VT, Op); } } return SDValue(); } SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: case X86ISD::SHRUNKBLEND: return PerformSELECTCombine(N, DAG, DCI, Subtarget); case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget); case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, Subtarget); case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); case ISD::FMINNUM: case ISD::FMAXNUM: return performFMinNumFMaxNumCombine(N, DAG, Subtarget); case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget); case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGNR: case X86ISD::BLENDI: case X86ISD::UNPCKH: case X86ISD::UNPCKL: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::VPERMILPI: case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); case ISD::MGATHER: case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG); } return SDValue(); } /// isTypeDesirableForOp - Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 /// instruction encodings are longer and some i16 instructions are slow. bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; if (VT != MVT::i16) return true; switch (Opc) { default: return true; case ISD::LOAD: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::SHL: case ISD::SRL: case ISD::SUB: case ISD::ADD: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: return false; } } /// This function checks if any of the users of EFLAGS copies the EFLAGS. We /// know that the code that lowers COPY of EFLAGS has to use the stack, and if /// we don't adjust the stack we clobber the first frame index. /// See X86InstrInfo::copyPhysReg. bool X86TargetLowering::hasCopyImplyingStackAdjustment( MachineFunction *MF) const { const MachineRegisterInfo &MRI = MF->getRegInfo(); return any_of(MRI.reg_instructions(X86::EFLAGS), [](const MachineInstr &RI) { return RI.isCopy(); }); } /// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { EVT VT = Op.getValueType(); if (VT != MVT::i16) return false; bool Promote = false; bool Commute = false; switch (Op.getOpcode()) { default: break; case ISD::LOAD: { LoadSDNode *LD = cast(Op); // If the non-extending load has a single use and it's not live out, then it // might be folded. if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&& Op.hasOneUse()*/) { for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) { // The only case where we'd want to promote LOAD (rather then it being // promoted as an operand is when it's only use is liveout. if (UI->getOpcode() != ISD::CopyToReg) return false; } } Promote = true; break; } case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Promote = true; break; case ISD::SHL: case ISD::SRL: { SDValue N0 = Op.getOperand(0); // Look out for (store (shl (load), x)). if (MayFoldLoad(N0) && MayFoldIntoStore(Op)) return false; Promote = true; break; } case ISD::ADD: case ISD::MUL: case ISD::AND: case ISD::OR: case ISD::XOR: Commute = true; // fallthrough case ISD::SUB: { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); if (!Commute && MayFoldLoad(N1)) return false; // Avoid disabling potential load folding opportunities. if (MayFoldLoad(N0) && (!isa(N1) || MayFoldIntoStore(Op))) return false; if (MayFoldLoad(N1) && (!isa(N0) || MayFoldIntoStore(Op))) return false; Promote = true; } } PVT = MVT::i32; return Promote; } //===----------------------------------------------------------------------===// // X86 Inline Assembly Support //===----------------------------------------------------------------------===// // Helper to match a string separated by whitespace. static bool matchAsm(StringRef S, ArrayRef Pieces) { S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace. for (StringRef Piece : Pieces) { if (!S.startswith(Piece)) // Check if the piece matches. return false; S = S.substr(Piece.size()); StringRef::size_type Pos = S.find_first_not_of(" \t"); if (Pos == 0) // We matched a prefix. return false; S = S.substr(Pos); } return S.empty(); } static bool clobbersFlagRegisters(const SmallVector &AsmPieces) { if (AsmPieces.size() == 3 || AsmPieces.size() == 4) { if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") && std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") && std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) { if (AsmPieces.size() == 3) return true; else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}")) return true; } } return false; } bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { InlineAsm *IA = cast(CI->getCalledValue()); std::string AsmStr = IA->getAsmString(); IntegerType *Ty = dyn_cast(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; case 1: // FIXME: this should verify that we are targeting a 486 or better. If not, // we will turn this bswap into something that will be lowered to logical // ops instead of emitting the bswap asm. For now, we don't support 486 or // lower so don't worry about this. // bswap $0 if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || matchAsm(AsmPieces[0], {"bswapl", "$0"}) || matchAsm(AsmPieces[0], {"bswapq", "$0"}) || matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) || matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) || matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. return IntrinsicLowering::LowerToByteSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { AsmPieces.clear(); StringRef ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (clobbersFlagRegisters(AsmPieces)) return IntrinsicLowering::LowerToByteSwap(CI); } break; case 3: if (CI->getType()->isIntegerTy(32) && IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) && matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { AsmPieces.clear(); StringRef ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (clobbersFlagRegisters(AsmPieces)) return IntrinsicLowering::LowerToByteSwap(CI); } if (CI->getType()->isIntegerTy(64)) { InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); if (Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && matchAsm(AsmPieces[1], {"bswap", "%edx"}) && matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"})) return IntrinsicLowering::LowerToByteSwap(CI); } } break; } return false; } /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType X86TargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'R': case 'q': case 'Q': case 'f': case 't': case 'u': case 'y': case 'x': case 'Y': case 'l': return C_RegisterClass; case 'a': case 'b': case 'c': case 'd': case 'S': case 'D': case 'A': return C_Register; case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'G': case 'C': case 'e': case 'Z': return C_Other; default: break; } } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight X86TargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); case 'R': case 'q': case 'Q': case 'a': case 'b': case 'c': case 'd': case 'S': case 'D': case 'A': if (CallOperandVal->getType()->isIntegerTy()) weight = CW_SpecificReg; break; case 'f': case 't': case 'u': if (type->isFloatingPointTy()) weight = CW_SpecificReg; break; case 'y': if (type->isX86_MMXTy() && Subtarget->hasMMX()) weight = CW_SpecificReg; break; case 'x': case 'Y': if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) || ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256())) weight = CW_Register; break; case 'I': if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { if (C->getZExtValue() <= 31) weight = CW_Constant; } break; case 'J': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 63) weight = CW_Constant; } break; case 'K': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) weight = CW_Constant; } break; case 'L': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) weight = CW_Constant; } break; case 'M': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 3) weight = CW_Constant; } break; case 'N': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 0xff) weight = CW_Constant; } break; case 'G': case 'C': if (isa(CallOperandVal)) { weight = CW_Constant; } break; case 'e': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -0x80000000LL) && (C->getSExtValue() <= 0x7fffffffLL)) weight = CW_Constant; } break; case 'Z': if (ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() <= 0xffffffff) weight = CW_Constant; } break; } return weight; } /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. const char *X86TargetLowering:: LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { if (Subtarget->hasSSE2()) return "Y"; if (Subtarget->hasSSE1()) return "x"; } return TargetLowering::LowerXConstraint(ConstraintVT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'I': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 31) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'J': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 63) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'K': if (ConstantSDNode *C = dyn_cast(Op)) { if (isInt<8>(C->getSExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'L': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) { Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'M': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 3) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'N': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 255) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'O': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 127) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } return; case 'e': { // 32-bit signed value if (ConstantSDNode *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); break; } // FIXME gcc accepts some relocatable values here too, but only in certain // memory models; it's complicated. } return; } case 'Z': { // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType()); break; } } // FIXME gcc accepts some relocatable values here too, but only in certain // memory models; it's complicated. return; } case 'i': { // Literal immediates are always ok. if (ConstantSDNode *CST = dyn_cast(Op)) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64); break; } // In any sort of PIC mode addresses need to be computed at runtime by // adding in a register or some sort of table lookup. These can't // be used as immediates. if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC()) return; // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. GlobalAddressSDNode *GA = nullptr; int64_t Offset = 0; // Match either (GA), (GA+C), (GA+C1+C2), etc. while (1) { if ((GA = dyn_cast(Op))) { Offset += GA->getOffset(); break; } else if (Op.getOpcode() == ISD::ADD) { if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { Offset += C->getZExtValue(); Op = Op.getOperand(0); continue; } } else if (Op.getOpcode() == ISD::SUB) { if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { Offset += -C->getZExtValue(); Op = Op.getOperand(0); continue; } } // Otherwise, this isn't something we can handle, reject it. return; } const GlobalValue *GV = GA->getGlobal(); // If we require an extra load to get this address, as in PIC mode, we // can't accept it. if (isGlobalStubReference( Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()))) return; Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), GA->getValueType(0), Offset); break; } } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } std::pair X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { default: break; // TODO: Slight differences here in allocation order and leaving // RIP in the class. Do they matter any more here than they do // in the normal allocation? case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget->is64Bit()) { if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32RegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16RegClass); if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i64 || VT == MVT::f64) return std::make_pair(0U, &X86::GR64RegClass); break; } // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32_ABCDRegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16_ABCDRegClass); if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8_ABCD_LRegClass); if (VT == MVT::i64) return std::make_pair(0U, &X86::GR64_ABCDRegClass); break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16RegClass); if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit()) return std::make_pair(0U, &X86::GR32RegClass); return std::make_pair(0U, &X86::GR64RegClass); case 'R': // LEGACY_REGS if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8_NOREXRegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16_NOREXRegClass); if (VT == MVT::i32 || !Subtarget->is64Bit()) return std::make_pair(0U, &X86::GR32_NOREXRegClass); return std::make_pair(0U, &X86::GR64_NOREXRegClass); case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) return std::make_pair(0U, &X86::RFP32RegClass); if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) return std::make_pair(0U, &X86::RFP64RegClass); return std::make_pair(0U, &X86::RFP80RegClass); case 'y': // MMX_REGS if MMX allowed. if (!Subtarget->hasMMX()) break; return std::make_pair(0U, &X86::VR64RegClass); case 'Y': // SSE_REGS if SSE2 allowed if (!Subtarget->hasSSE2()) break; // FALL THROUGH. case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed if (!Subtarget->hasSSE1()) break; switch (VT.SimpleTy) { default: break; // Scalar SSE types. case MVT::f32: case MVT::i32: return std::make_pair(0U, &X86::FR32RegClass); case MVT::f64: case MVT::i64: return std::make_pair(0U, &X86::FR64RegClass); // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. // Vector types. case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: return std::make_pair(0U, &X86::VR128RegClass); // AVX types. case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: return std::make_pair(0U, &X86::VR256RegClass); case MVT::v8f64: case MVT::v16f32: case MVT::v16i32: case MVT::v8i64: return std::make_pair(0U, &X86::VR512RegClass); } break; } } // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // Not found as a standard register? if (!Res.second) { // Map st(0) -> st(7) -> ST0 if (Constraint.size() == 7 && Constraint[0] == '{' && tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' && Constraint[3] == '(' && (Constraint[4] >= '0' && Constraint[4] <= '7') && Constraint[5] == ')' && Constraint[6] == '}') { Res.first = X86::FP0+Constraint[4]-'0'; Res.second = &X86::RFP80RegClass; return Res; } // GCC allows "st(0)" to be called just plain "st". if (StringRef("{st}").equals_lower(Constraint)) { Res.first = X86::FP0; Res.second = &X86::RFP80RegClass; return Res; } // flags -> EFLAGS if (StringRef("{flags}").equals_lower(Constraint)) { Res.first = X86::EFLAGS; Res.second = &X86::CCRRegClass; return Res; } // 'A' means EAX + EDX. if (Constraint == "A") { Res.first = X86::EAX; Res.second = &X86::GR32_ADRegClass; return Res; } return Res; } // Otherwise, check to see if this is a register class of the wrong value // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to // turn into {ax},{dx}. // MVT::Other is used to specify clobber names. if (Res.second->hasType(VT) || VT == MVT::Other) return Res; // Correct type already, nothing to do. // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should // return "eax". This should even work for things like getting 64bit integer // registers when given an f64 type. const TargetRegisterClass *Class = Res.second; if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass || Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) { unsigned Size = VT.getSizeInBits(); if (Size == 1) Size = 8; unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); if (DestReg > 0) { Res.first = DestReg; Res.second = Size == 8 ? &X86::GR8RegClass : Size == 16 ? &X86::GR16RegClass : Size == 32 ? &X86::GR32RegClass : &X86::GR64RegClass; assert(Res.second->contains(Res.first) && "Register in register class"); } else { // No register found/type mismatch. Res.first = 0; Res.second = nullptr; } } else if (Class == &X86::FR32RegClass || Class == &X86::FR64RegClass || Class == &X86::VR128RegClass || Class == &X86::VR256RegClass || Class == &X86::FR32XRegClass || Class == &X86::FR64XRegClass || Class == &X86::VR128XRegClass || Class == &X86::VR256XRegClass || Class == &X86::VR512RegClass) { // Handle references to XMM physical registers that got mapped into the // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can // find, ignoring the required type. // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; else if (X86::VR128RegClass.hasType(VT)) Res.second = &X86::VR128RegClass; else if (X86::VR256RegClass.hasType(VT)) Res.second = &X86::VR256RegClass; else if (X86::VR512RegClass.hasType(VT)) Res.second = &X86::VR512RegClass; else { // Type mismatch and not a clobber: Return an error; Res.first = 0; Res.second = nullptr; } } return Res; } int X86TargetLowering::getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { // Scaling factors are not free at all. // An indexed folded instruction, i.e., inst (reg1, reg2, scale), // will take 2 allocations in the out of order engine instead of 1 // for plain addressing mode, i.e. inst (reg1). // E.g., // vaddps (%rsi,%drx), %ymm0, %ymm1 // Requires two allocations (one for the load, one for the computation) // whereas: // vaddps (%rsi), %ymm0, %ymm1 // Requires just 1 allocation, i.e., freeing allocations for other operations // and having less micro operations to execute. // // For some X86 architectures, this is even worse because for instance for // stores, the complex addressing mode forces the instruction to use the // "load" ports instead of the dedicated "store" port. // E.g., on Haswell: // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. if (isLegalAddressingMode(DL, AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register. return AM.Scale != 0; return -1; } bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { // Integer division on x86 is expensive. However, when aggressively optimizing // for code size, we prefer to use a div instruction, as it is usually smaller // than the alternative sequence. // The exception to this is vector division. Since x86 doesn't have vector // integer division, leaving the division as-is is a loss even in terms of // size, because it will have to be scalarized, while the alternative code // sequence can be performed in vector form. bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); return OptSize && !VT.isVector(); } void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { if (!Subtarget->is64Bit()) return; // Update IsSplitCSR in X86MachineFunctionInfo. X86MachineFunctionInfo *AFI = Entry->getParent()->getInfo(); AFI->setIsSplitCSR(true); } void X86TargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (X86::GR64RegClass.contains(*I)) RC = &X86::GR64RegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction()->hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } Index: projects/clang380-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h (revision 296011) @@ -1,1176 +1,1179 @@ //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that X86 uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" namespace llvm { class X86Subtarget; class X86TargetMachine; namespace X86ISD { // X86 Specific DAG Nodes enum NodeType : unsigned { // Start the numbering where the builtin ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END, /// Bit scan forward. BSF, /// Bit scan reverse. BSR, /// Double shift instructions. These correspond to /// X86::SHLDxx and X86::SHRDxx instructions. SHLD, SHRD, /// Bitwise logical AND of floating point values. This corresponds /// to X86::ANDPS or X86::ANDPD. FAND, /// Bitwise logical OR of floating point values. This corresponds /// to X86::ORPS or X86::ORPD. FOR, /// Bitwise logical XOR of floating point values. This corresponds /// to X86::XORPS or X86::XORPD. FXOR, /// Bitwise logical ANDNOT of floating point values. This /// corresponds to X86::ANDNPS or X86::ANDNPD. FANDN, /// These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: /// /// #0 - The incoming token chain /// #1 - The callee /// #2 - The number of arg bytes the caller pushes on the stack. /// #3 - The number of arg bytes the callee pops off the stack. /// #4 - The value to pass in AL/AX/EAX (optional) /// #5 - The value to pass in DL/DX/EDX (optional) /// /// The result values of these nodes are: /// /// #0 - The outgoing token chain /// #1 - The first register result value (optional) /// #2 - The second register result value (optional) /// CALL, /// This operation implements the lowering for readcyclecounter RDTSC_DAG, /// X86 Read Time-Stamp Counter and Processor ID. RDTSCP_DAG, /// X86 Read Performance Monitoring Counters. RDPMC_DAG, /// X86 compare and logical compare instructions. CMP, COMI, UCOMI, /// X86 bit-test instructions. BT, /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS /// operand, usually produced by a CMP instruction. SETCC, /// X86 Select SELECT, // Same as SETCC except it's materialized with a sbb and the value is all // one's or all zero's. SETCC_CARRY, // R = carry_bit ? ~0 : 0 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. /// Operands are two FP values to compare; result is a mask of /// 0s or 1s. Generally DTRT for C/C++ with NaNs. FSETCC, /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, /// result in an integer GPR. Needs masking for scalar result. FGETSIGNx86, /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the /// flag operand produced by a CMP or TEST instruction. It also writes a /// flag result. CMOV, /// X86 conditional branches. Operand 0 is the chain operand, operand 1 /// is the block to branch if condition is true, operand 2 is the /// condition code, and operand 3 is the flag operand produced by a CMP /// or TEST instruction. BRCOND, /// Return with a flag operand. Operand 0 is the chain operand, operand /// 1 is the number of bytes of stack to pop. RET_FLAG, /// Return from interrupt. Operand 0 is the number of bytes to pop. IRET, /// Repeat fill, corresponds to X86::REP_STOSx. REP_STOS, /// Repeat move, corresponds to X86::REP_MOVSx. REP_MOVS, /// On Darwin, this node represents the result of the popl /// at function entry, used for PIC code. GlobalBaseReg, /// A wrapper node for TargetConstantPool, /// TargetExternalSymbol, and TargetGlobalAddress. Wrapper, /// Special wrapper used under X86-64 PIC mode for RIP /// relative displacements. WrapperRIP, /// Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. If you think this is too close to the previous /// mnemonic, so do I; blame Intel. MOVDQ2Q, /// Copies a 32-bit value from the low word of a MMX /// vector to a GPR. MMX_MOVD2W, /// Copies a GPR into the low 32-bit word of a MMX vector /// and zero out the high word. MMX_MOVW2D, /// Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, /// Extract a 16-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRW. PEXTRW, /// Insert any element of a 4 x float vector into any element /// of a destination 4 x floatvector. INSERTPS, /// Insert the lower 8-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRB. PINSRB, /// Insert the lower 16-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRW. PINSRW, MMX_PINSRW, /// Shuffle 16 8-bit values within a vector. PSHUFB, /// Compute Sum of Absolute Differences. PSADBW, /// Compute Double Block Packed Sum-Absolute-Differences DBPSADBW, /// Bitwise Logical AND NOT of Packed FP values. ANDNP, /// Copy integer sign. PSIGN, /// Blend where the selector is an immediate. BLENDI, /// Blend where the condition has been shrunk. /// This is used to emphasize that the condition mask is /// no more valid for generic VSELECT optimizations. SHRUNKBLEND, /// Combined add and sub on an FP vector. ADDSUB, // FP vector ops with rounding mode. FADD_RND, FSUB_RND, FMUL_RND, FDIV_RND, FMAX_RND, FMIN_RND, FSQRT_RND, // FP vector get exponent FGETEXP_RND, // Extract Normalized Mantissas VGETMANT, // FP Scale SCALEF, // Integer add/sub with unsigned saturation. ADDUS, SUBUS, // Integer add/sub with signed saturation. ADDS, SUBS, // Unsigned Integer average AVG, /// Integer horizontal add. HADD, /// Integer horizontal sub. HSUB, /// Floating point horizontal add. FHADD, /// Floating point horizontal sub. FHSUB, // Integer absolute value ABS, // Detect Conflicts Within a Vector CONFLICT, /// Floating point max and min. FMAX, FMIN, /// Commutative FMIN and FMAX. FMAXC, FMINC, /// Floating point reciprocal-sqrt and reciprocal approximation. /// Note that these typically require refinement /// in order to obtain suitable precision. FRSQRT, FRCP, // Thread Local Storage. TLSADDR, // Thread Local Storage. A call to get the start address // of the TLS block for the current module. TLSBASEADDR, // Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. TLSCALL, // Exception Handling helpers. EH_RETURN, // SjLj exception handling setjmp. EH_SJLJ_SETJMP, // SjLj exception handling longjmp. EH_SJLJ_LONGJMP, /// Tail call return. See X86TargetLowering::LowerCall for /// the list of operands. TC_RETURN, // Vector move to low scalar and zero higher vector elements. VZEXT_MOVL, // Vector integer zero-extend. VZEXT, // Vector integer signed-extend. VSEXT, // Vector integer truncate. VTRUNC, // Vector integer truncate with unsigned/signed saturation. VTRUNCUS, VTRUNCS, // Vector FP extend. VFPEXT, // Vector FP round. VFPROUND, // Vector signed/unsigned integer to double. CVTDQ2PD, CVTUDQ2PD, // Convert a vector to mask, set bits base on MSB. CVT2MASK, // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, // Vector shift elements VSHL, VSRL, VSRA, // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, // Bit rotate by immediate VROTLI, VROTRI, // Vector packed double/float comparison. CMPP, // Vector integer comparisons. PCMPEQ, PCMPGT, // Vector integer comparisons, the result is in a mask vector. PCMPEQM, PCMPGTM, /// Vector comparison generating mask bits for fp and /// integer signed and unsigned data types. CMPM, CMPMU, // Vector comparison with rounding mode for FP values CMPM_RND, // Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, BEXTR, // Bit field extract UMUL, // LOW, HI, FLAGS = umul LHS, RHS // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS SMUL8, UMUL8, // 8-bit divrem that zero-extend the high result (AH). UDIVREM8_ZEXT_HREG, SDIVREM8_SEXT_HREG, // X86-specific multiply by immediate. MUL_IMM, // Vector bitwise comparisons. PTEST, // Vector packed fp sign bitwise comparisons. TESTP, // Vector "test" in AVX-512, the result is in a mask vector. TESTM, TESTNM, // OR/AND test for masks KORTEST, KTEST, // Several flavors of instructions with vector shuffle behaviors. PACKSS, PACKUS, // Intra-lane alignr PALIGNR, // AVX512 inter-lane alignr VALIGN, PSHUFD, PSHUFHW, PSHUFLW, SHUFP, //Shuffle Packed Values at 128-bit granularity SHUF128, MOVDDUP, MOVSHDUP, MOVSLDUP, MOVLHPS, MOVLHPD, MOVHLPS, MOVLPS, MOVLPD, MOVSD, MOVSS, UNPCKL, UNPCKH, VPERMILPV, VPERMILPI, VPERMV, VPERMV3, VPERMIV3, VPERMI, VPERM2X128, // Bitwise ternary logic VPTERNLOG, // Fix Up Special Packed Float32/64 values VFIXUPIMM, // Range Restriction Calculation For Packed Pairs of Float32/64 values VRANGE, // Reduce - Perform Reduction Transformation on scalar\packed FP VREDUCE, // RndScale - Round FP Values To Include A Given Number Of Fraction Bits VRNDSCALE, // VFPCLASS - Tests Types Of a FP Values for packed types. VFPCLASS, // VFPCLASSS - Tests Types Of a FP Values for scalar types. VFPCLASSS, // Broadcast scalar to vector VBROADCAST, // Broadcast mask to vector VBROADCASTM, // Broadcast subvector to vector SUBV_BROADCAST, // Insert/Extract vector element VINSERT, VEXTRACT, /// SSE4A Extraction and Insertion. EXTRQI, INSERTQI, // XOP variable/immediate rotations VPROT, VPROTI, // XOP arithmetic/logical shifts VPSHA, VPSHL, // XOP signed/unsigned integer comparisons VPCOM, VPCOMU, // Vector multiply packed unsigned doubleword integers PMULUDQ, // Vector multiply packed signed doubleword integers PMULDQ, // Vector Multiply Packed UnsignedIntegers with Round and Scale MULHRS, // Multiply and Add Packed Integers VPMADDUBSW, VPMADDWD, // FMA nodes FMADD, FNMADD, FMSUB, FNMSUB, FMADDSUB, FMSUBADD, // FMA with rounding mode FMADD_RND, FNMADD_RND, FMSUB_RND, FNMSUB_RND, FMADDSUB_RND, FMSUBADD_RND, // Compress and expand COMPRESS, EXPAND, //Convert Unsigned/Integer to Scalar Floating-Point Value //with rounding mode SINT_TO_FP_RND, UINT_TO_FP_RND, // Vector float/double to signed/unsigned integer. FP_TO_SINT_RND, FP_TO_UINT_RND, // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, // Windows's _chkstk call to do stack probing. WIN_ALLOCA, // For allocating variable amounts of stack space when using // segmented stacks. Check if the current stacklet has enough space, and // falls back to heap allocation if not. SEG_ALLOCA, // Memory barrier MEMBARRIER, MFENCE, SFENCE, LFENCE, // Store FP status word into i16 register. FNSTSW16r, // Store contents of %ah into %eflags. SAHF, // Get a random integer and indicate whether it is valid in CF. RDRAND, // Get a NIST SP800-90B & C compliant random integer and // indicate whether it is valid in CF. RDSEED, PCMPISTRI, PCMPESTRI, // Test if in transactional execution. XTEST, // ERI instructions RSQRT28, RCP28, EXP2, // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, LCMPXCHG16_DAG, // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, // Store FP control world into i16 memory. FNSTCW16m, /// This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds /// to the X86::FIST*m instructions and the rounding mode change stuff. It /// has two inputs (token chain and address) and two outputs (int value /// and token chain). FP_TO_INT16_IN_MEM, FP_TO_INT32_IN_MEM, FP_TO_INT64_IN_MEM, /// This instruction implements SINT_TO_FP with the /// integer source in memory and FP reg result. This corresponds to the /// X86::FILD*m instructions. It has three inputs (token chain, address, /// and source type) and two outputs (FP value and token chain). FILD_FLAG /// also produces a flag). FILD, FILD_FLAG, /// This instruction implements an extending load to FP stack slots. /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain /// operand, ptr to load from, and a ValueType node indicating the type /// to load to. FLD, /// This instruction implements a truncating store to FP stack /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a /// chain operand, value to store, address, and a ValueType to store it /// as. FST, /// This instruction grabs the address of the next argument /// from a va_list. (reads and modifies the va_list in memory) VAARG_64 // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be // thought as target memory ops! }; } /// Define some predicates that are used for node matching. namespace X86 { /// Return true if the specified /// EXTRACT_SUBVECTOR operand specifies a vector extract that is /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions. bool isVEXTRACT128Index(SDNode *N); /// Return true if the specified /// INSERT_SUBVECTOR operand specifies a subvector insert that is /// suitable for input to VINSERTF128, VINSERTI128 instructions. bool isVINSERT128Index(SDNode *N); /// Return true if the specified /// EXTRACT_SUBVECTOR operand specifies a vector extract that is /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions. bool isVEXTRACT256Index(SDNode *N); /// Return true if the specified /// INSERT_SUBVECTOR operand specifies a subvector insert that is /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions. bool isVINSERT256Index(SDNode *N); /// Return the appropriate /// immediate to extract the specified EXTRACT_SUBVECTOR index /// with VEXTRACTF128, VEXTRACTI128 instructions. unsigned getExtractVEXTRACT128Immediate(SDNode *N); /// Return the appropriate /// immediate to insert at the specified INSERT_SUBVECTOR index /// with VINSERTF128, VINSERT128 instructions. unsigned getInsertVINSERT128Immediate(SDNode *N); /// Return the appropriate /// immediate to extract the specified EXTRACT_SUBVECTOR index /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions. unsigned getExtractVEXTRACT256Immediate(SDNode *N); /// Return the appropriate /// immediate to insert at the specified INSERT_SUBVECTOR index /// with VINSERTF64x4, VINSERTI64x4 instructions. unsigned getInsertVINSERT256Immediate(SDNode *N); /// Returns true if Elt is a constant zero or floating point constant +0.0. bool isZeroNode(SDValue Elt); /// Returns true of the given offset can be /// fit into displacement field of the instruction. bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement = true); /// Determines whether the callee is required to pop its /// own arguments. Callee pop is necessary to support tail calls. bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool TailCallOpt); } //===--------------------------------------------------------------------===// // X86 Implementation of the TargetLowering interface class X86TargetLowering final : public TargetLowering { public: explicit X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI); unsigned getJumpTableEncoding() const override; bool useSoftFloat() const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i8; } const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override; /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override; const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override; /// Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. For X86, aggregates /// that contains are placed at 16-byte boundaries while the rest are at /// 4-byte boundaries. unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; /// Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If 'IsMemset' is /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override; /// Returns true if it's safe to use load / store of the /// specified type to expand memcpy / memset inline. This is mostly true /// for all types except for some special cases. For example, on X86 /// targets without SSE2 f64 load / store are done with fldl / fstpl which /// also does type conversion. Note the specified type doesn't have to be /// legal as the hook is used before type legalization. bool isSafeMemOpType(MVT VT) const override; /// Returns true if the target allows unaligned memory accesses of the /// specified type. Returns whether it is "fast" in the last argument. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override; /// Provide custom lowering hooks for some operations. /// SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// Replace the results of node with an illegal result /// type with new values built out of custom code. /// void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 /// instruction encodings are longer and some i16 instructions are slow. bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; /// Return true if the target has native support for the /// specified value type and it is 'desirable' to use the type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; /// Return true if the MachineFunction contains a COPY which would imply /// HasOpaqueSPAdjustment. bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; /// This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; /// Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const override; /// Determine the number of bits in the operation that are sign bits. unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, unsigned Depth) const override; bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const override; SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; bool ExpandInlineAsm(CallInst *CI) const override; ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override; const char *LowerXConstraint(EVT ConstraintVT) const override; /// Lower the specified operand into the Ops vector. If it is invalid, don't /// add anything to Ops. If hasMemory is true it means one of the asm /// constraint of the inline asm instruction being processed is 'm'. void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const override; unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "i") return InlineAsm::Constraint_i; else if (ConstraintCode == "o") return InlineAsm::Constraint_o; else if (ConstraintCode == "v") return InlineAsm::Constraint_v; else if (ConstraintCode == "X") return InlineAsm::Constraint_X; return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } /// Given a physical register constraint /// (e.g. {edx}), return the register number and the register class for the /// register. This should only be used for C_Register constraints. On /// error, this returns a register number of 0. std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; /// Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can /// compare a register against the immediate without having to materialize /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; /// Return true if the specified immediate is legal /// add immediate, that is the target has add instructions which can /// add a register and the immediate without having to materialize /// the immediate into a register. bool isLegalAddImmediate(int64_t Imm) const override; /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; bool isVectorShiftByScalarCheap(Type *Ty) const override; /// Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// Return true if any actual instruction that defines a /// value of type Ty1 implicit zero-extends the value to Ty2 in the result /// register. This does not necessarily include registers defined in /// unknown ways, such as incoming arguments, or copies from unknown /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this /// does not necessarily apply to truncate instructions. e.g. on x86-64, /// all instructions that define 32-bit values implicit zero-extend the /// result out to 64 bits. bool isZExtFree(Type *Ty1, Type *Ty2) const override; bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; /// Return true if folding a vector load into ExtVal (a sign, zero, or any /// extend node) is profitable. bool isVectorLoadExtDesirable(SDValue) const override; /// Return true if an FMA operation is faster than a pair of fmul and fadd /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true, otherwise fmuladd is expanded to fmul + fadd. bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; /// Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; /// Given an intrinsic, checks if on the target the intrinsic will need to map /// to a MemIntrinsicNode (touches memory). If this is the case, it returns /// true and stores the intrinsic information into the IntrinsicInfo that was /// passed to the function. bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override; /// Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// Targets can use this to indicate that they only support *some* /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to /// be legal. bool isShuffleMaskLegal(const SmallVectorImpl &Mask, EVT VT) const override; /// Similar to isShuffleMaskLegal. This is used by Targets can use this to /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to /// replace a VAND with a constant pool entry. bool isVectorClearMaskLegal(const SmallVectorImpl &Mask, EVT VT) const override; /// If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. bool ShouldShrinkFPConstant(EVT VT) const override { // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more // expensive than a straight movsd. On the other hand, it's important to // shrink long double fp constant since fldt is very slow. return !X86ScalarSSEf64 || VT == MVT::f80; } /// Return true if we believe it is correct and profitable to reduce the /// load node to a smaller type. bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override; /// Return true if the specified scalar FP type is computed in an SSE /// register, not on the X87 floating point stack. bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; /// Return true if EXTRACT_SUBVECTOR is cheap for this result type /// with this index. bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { return nullptr; // nothing to do, move along. } unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override; /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; /// This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override; /// Return true if the target stores stack protector cookies at a fixed /// offset in some non-standard address space, and populates the address /// space and offset as appropriate. bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const override; /// Return true if the target stores SafeStack pointer at a fixed offset in /// some non-standard address space, and populates the address space and /// offset as appropriate. Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; bool useLoadStackGuardNode() const override; /// \brief Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; bool isIntDivCheap(EVT VT, AttributeSet Attr) const override; protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; private: /// Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; /// Select between SSE or x87 floating point ops. /// When SSE is available, use it for f32 operations. /// When SSE2 is available, use it for f64 operations. bool X86ScalarSSEf32; bool X86ScalarSSEf64; /// A list of legal FP immediates. std::vector LegalFPImmediates; /// Indicate that this x86 target can instruction /// select the specified FP immediate natively. void addLegalFPImmediate(const APFloat& Imm) { LegalFPImmediates.push_back(Imm); } SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &ArgInfo, SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, unsigned i) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; // Call lowering helpers. /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. bool IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, SDLoc dl) const; unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; std::pair FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned, bool isReplace) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, int64_t Offset, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToBT(SDValue And, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const override; bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; void insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const override; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; bool needsCmpXchgNb(Type *MemType) const; // Utility function to emit the low-level va_arg code for X86-64. MachineBasicBlock *EmitVAARG64WithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const; /// Utility function to emit the xmm reg save portion of va_start. MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *BInstr, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredCatchPad(MachineInstr *MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, MachineBasicBlock *MBB) const; /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const; /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent, for use with the given x86 condition code. SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const; /// Convert a comparison if required by the subtarget. SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; /// Use rsqrt* to speed up sqrt calculations. SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const override; /// Use rcp* to speed up fdiv calculations. SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; }; namespace X86 { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } } #endif // X86ISELLOWERING_H Index: projects/clang380-import/contrib/llvm/lib/Target/X86/X86InstrCompiler.td =================================================================== --- projects/clang380-import/contrib/llvm/lib/Target/X86/X86InstrCompiler.td (revision 296010) +++ projects/clang380-import/contrib/llvm/lib/Target/X86/X86InstrCompiler.td (revision 296011) @@ -1,1864 +1,1864 @@ //===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the various pseudo instructions used by the compiler, // as well as Pat patterns used during instruction selection. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Pattern Matching Support def GetLo32XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; def GetLo8XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; //===----------------------------------------------------------------------===// // Random Pseudo Instructions. // PIC base construction. This expands to code that looks like this: // call $next_inst // popl %destreg" let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), "", []>; // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into // a stack adjustment and the codegen must know that they may modify the stack // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. let Defs = [ESP, EFLAGS], Uses = [ESP] in { def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKDOWN", []>, Requires<[NotLP64]>; def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[NotLP64]>; } def : Pat<(X86callseq_start timm:$amt1), (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>; // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into // a stack adjustment and the codegen must know that they may modify the stack // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. let Defs = [RSP, EFLAGS], Uses = [RSP] in { def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKDOWN", []>, Requires<[IsLP64]>; def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[IsLP64]>; } def : Pat<(X86callseq_start timm:$amt1), (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>; // x86-64 va_start lowering magic. let usesCustomInserter = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, i64imm:$regsavefi, i64imm:$offset, variable_ops), "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", [(X86vastart_save_xmm_regs GR8:$al, imm:$regsavefi, imm:$offset), (implicit EFLAGS)]>; // The VAARG_64 pseudo-instruction takes the address of the va_list, // and places the address of the next argument into a register. let Defs = [EFLAGS] in def VAARG_64 : I<0, Pseudo, (outs GR64:$dst), (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), "#VAARG_64 $dst, $ap, $size, $mode, $align", [(set GR64:$dst, (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), (implicit EFLAGS)]>; // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows // targets. These calls are needed to probe the stack when allocating more than // 4k bytes in one go. Touching the stack at 4K increments is necessary to // ensure that the guard pages used by the OS virtual memory manager are // allocated in correct sequence. // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), "# dynamic stack allocation", [(X86WinAlloca)]>; // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is // allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), "# variable sized alloca for segmented stacks", [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; } //===----------------------------------------------------------------------===// // EH Pseudo Instructions // let SchedRW = [WriteSystem] in { let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1, isReturn = 1 in { def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>; // CATCHRET needs a custom inserter for SEH. let usesCustomInserter = 1 in def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from), "# CATCHRET", [(catchret bb:$dst, bb:$from)]>; } let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>; // This instruction is responsible for re-establishing stack pointers after an // exception has been caught and we are rejoining normal control flow in the // parent function or funclet. It generally sets ESP and EBP, and optionally // ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us // elsewhere. let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>; let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), "#EH_SJLJ_SETJMP32", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, Requires<[Not64BitMode]>; def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), "#EH_SJLJ_SETJMP64", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, Requires<[In64BitMode]>; let isTerminator = 1 in { def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), "#EH_SJLJ_LONGJMP32", [(X86eh_sjlj_longjmp addr:$buf)]>, Requires<[Not64BitMode]>; def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), "#EH_SJLJ_LONGJMP64", [(X86eh_sjlj_longjmp addr:$buf)]>, Requires<[In64BitMode]>; } } } // SchedRW let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by unwind info. // let isPseudo = 1 in { def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), "#SEH_PushReg $reg", []>; def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), "#SEH_SaveReg $reg, $dst", []>; def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), "#SEH_SaveXMM $reg, $dst", []>; def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), "#SEH_StackAlloc $size", []>; def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), "#SEH_SetFrame $reg, $offset", []>; def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), "#SEH_PushFrame $mode", []>; def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), "#SEH_EndPrologue", []>; def SEH_Epilogue : I<0, Pseudo, (outs), (ins), "#SEH_Epilogue", []>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // // This is lowered into a RET instruction by MCInstLower. We need // this so that we don't have to have a MachineBasicBlock which ends // with a RET and also has successors. let isPseudo = 1 in { def MORESTACK_RET: I<0, Pseudo, (outs), (ins), "", []>; // This instruction is lowered to a RET followed by a MOV. The two // instructions are not generated on a higher level since then the // verifier sees a MachineBasicBlock ending with a non-terminator. def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>; } //===----------------------------------------------------------------------===// // Alias Instructions //===----------------------------------------------------------------------===// // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Other widths can also make use of the 32-bit xor, which may have a smaller // encoding and avoid partial register updates. def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>; def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> { let AddedComplexity = 20; } let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], AddedComplexity = 1 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 1)]>; def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, -1)]>; } // MOV16ri is 4 bytes, so the instructions above are smaller. def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; } // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, hasSideEffects = 0 in def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>; // This 64-bit pseudo-move can be used for both a 64-bit constant that is // actually the zero-extension of a 32-bit constant and for labels in the // x86-64 small code model. def mov64imm32 : ComplexPattern; let AddedComplexity = 1 in def : Pat<(i64 mov64imm32:$src), (SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>; // Use sbb to materialize carry bit. let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "", [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "", [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; // We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and // will be eliminated and that the sbb can be extended up to a wider type. When // this happens, it is great. However, if we are left with an 8-bit sbb and an // and, we might as well just match it as a setb. def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), (SETBr)>; // (add OP, SETB) -> (adc OP, 0) def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op), (ADC8ri GR8:$op, 0)>; def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op), (ADC32ri8 GR32:$op, 0)>; def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op), (ADC64ri8 GR64:$op, 0)>; // (sub OP, SETB) -> (sbb OP, 0) def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)), (SBB8ri GR8:$op, 0)>; def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)), (SBB32ri8 GR32:$op, 0)>; def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)), (SBB64ri8 GR64:$op, 0)>; // (sub OP, SETCC_CARRY) -> (adc OP, 0) def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))), (ADC8ri GR8:$op, 0)>; def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))), (ADC32ri8 GR32:$op, 0)>; def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), (ADC64ri8 GR64:$op, 0)>; //===----------------------------------------------------------------------===// // String Pseudo Instructions // let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, Requires<[Not64BitMode]>; def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16, Requires<[Not64BitMode]>; def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32, Requires<[Not64BitMode]>; } let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, Requires<[In64BitMode]>; def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16, Requires<[In64BitMode]>; def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32, Requires<[In64BitMode]>; def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", [(X86rep_movs i64)], IIC_REP_MOVS>, REP, Requires<[In64BitMode]>; } // FIXME: Should use "(X86rep_stos AL)" as the pattern. let Defs = [ECX,EDI], isCodeGenOnly = 1 in { let Uses = [AL,ECX,EDI] in def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)], IIC_REP_STOS>, REP, Requires<[Not64BitMode]>; let Uses = [AX,ECX,EDI] in def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16, Requires<[Not64BitMode]>; let Uses = [EAX,ECX,EDI] in def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[Not64BitMode]>; } let Defs = [RCX,RDI], isCodeGenOnly = 1 in { let Uses = [AL,RCX,RDI] in def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; let Uses = [AX,RCX,RDI] in def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16, Requires<[In64BitMode]>; let Uses = [RAX,RCX,RDI] in def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[In64BitMode]>; let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; } } // SchedRW //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // // ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { + usesCustomInserter = 1, Uses = [ESP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[Not64BitMode]>; def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_base_addr32", [(X86tlsbaseaddr tls32baseaddr:$sym)]>, Requires<[Not64BitMode]>; } // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { + usesCustomInserter = 1, Uses = [RSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_base_addr64", [(X86tlsbaseaddr tls64baseaddr:$sym)]>, Requires<[In64BitMode]>; } // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function // call. All other registers are preserved. let Defs = [EAX, ECX, EFLAGS], Uses = [ESP], usesCustomInserter = 1 in def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLSCall_32", [(X86TLSCall addr:$sym)]>, Requires<[Not64BitMode]>; // For x86_64, the address of the thunk is passed in %rdi, on return // the address of the variable is in %rax. All other registers are preserved. let Defs = [RAX, EFLAGS], Uses = [RSP, RDI], usesCustomInserter = 1 in def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLSCall_64", [(X86TLSCall addr:$sym)]>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions // CMOV* - Used to implement the SELECT DAG operation. Expanded after // instruction selection into a branch sequence. multiclass CMOVrr_PSEUDO { def CMOV#NAME : I<0, Pseudo, (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond), "#CMOV_"#NAME#" PSEUDO!", [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond, EFLAGS)))]>; } let usesCustomInserter = 1, Uses = [EFLAGS] in { // X86 doesn't have 8-bit conditional moves. Use a customInserter to // emit control flow. An alternative to this is to mark i8 SELECT as Promote, // however that requires promoting the operands, and can induce additional // i8 register pressure. defm _GR8 : CMOVrr_PSEUDO; let Predicates = [NoCMov] in { defm _GR32 : CMOVrr_PSEUDO; defm _GR16 : CMOVrr_PSEUDO; } // Predicates = [NoCMov] // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no // SSE1/SSE2. let Predicates = [FPStackf32] in defm _RFP32 : CMOVrr_PSEUDO; let Predicates = [FPStackf64] in defm _RFP64 : CMOVrr_PSEUDO; defm _RFP80 : CMOVrr_PSEUDO; defm _FR32 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; defm _FR128 : CMOVrr_PSEUDO; defm _V4F32 : CMOVrr_PSEUDO; defm _V2F64 : CMOVrr_PSEUDO; defm _V2I64 : CMOVrr_PSEUDO; defm _V8F32 : CMOVrr_PSEUDO; defm _V4F64 : CMOVrr_PSEUDO; defm _V4I64 : CMOVrr_PSEUDO; defm _V8I64 : CMOVrr_PSEUDO; defm _V8F64 : CMOVrr_PSEUDO; defm _V16F32 : CMOVrr_PSEUDO; defm _V8I1 : CMOVrr_PSEUDO; defm _V16I1 : CMOVrr_PSEUDO; defm _V32I1 : CMOVrr_PSEUDO; defm _V64I1 : CMOVrr_PSEUDO; } // usesCustomInserter = 1, Uses = [EFLAGS] //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// // FIXME: Use normal instructions and add lock prefix dynamically. // Memory barriers // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "or{l}\t{$zero, $dst|$dst, $zero}", [], IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK, Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", [(X86MemBarrier)]>, Sched<[WriteLoad]>; // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction // ImmOpc8 corresponds to the mi8 version of the instruction // ImmMod corresponds to the instruction format of the mi and mi8 versions multiclass LOCK_ArithBinOp RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, OpSize16, LOCK; def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, OpSize32, LOCK; def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize16, LOCK; def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize32, LOCK; def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize16, LOCK; def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize32, LOCK; def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; } } defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">; defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">; defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">; defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I, LOCK; def NAME#16m : I, OpSize16, LOCK; def NAME#32m : I, OpSize32, LOCK; def NAME#64m : RI, LOCK; } } defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">; defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">; // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { let isCodeGenOnly = 1 in { def NAME : I, TB, LOCK; } } multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag, InstrItinClass itin8, InstrItinClass itin> { let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { let Defs = [AL, EFLAGS], Uses = [AL] in def NAME#8 : I, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in def NAME#16 : I, TB, OpSize16, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in def NAME#32 : I, TB, OpSize32, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in def NAME#64 : RI, TB, LOCK; } } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem, IIC_CMPX_LOCK_8B>; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", X86cas16, i128mem, IIC_CMPX_LOCK_16B>, REX_W; } defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>; // Atomic exchange and add multiclass ATOMIC_LOAD_BINOP opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin8, InstrItinClass itin> { let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { def NAME#8 : I(frag # "_8") addr:$ptr, GR8:$val))], itin8>; def NAME#16 : I(frag # "_16") addr:$ptr, GR16:$val))], itin>, OpSize16; def NAME#32 : I(frag # "_32") addr:$ptr, GR32:$val))], itin>, OpSize32; def NAME#64 : RI(frag # "_64") addr:$ptr, GR64:$val))], itin>; } } defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add", IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, TB, LOCK; /* The following multiclass tries to make sure that in code like * x.store (immediate op x.load(acquire), release) * and * x.store (register op x.load(acquire), release) * an operation directly on memory is generated instead of wasting a register. * It is not automatic as atomic_store/load are only lowered to MOV instructions * extremely late to prevent them from being accidentally reordered in the backend * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions) */ multiclass RELEASE_BINOP_MI { def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), "#BINOP "#NAME#"8mi PSEUDO!", [(atomic_store_8 addr:$dst, (op (atomic_load_8 addr:$dst), (i8 imm:$src)))]>; def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src), "#BINOP "#NAME#"8mr PSEUDO!", [(atomic_store_8 addr:$dst, (op (atomic_load_8 addr:$dst), GR8:$src))]>; // NAME#16 is not generated as 16-bit arithmetic instructions are considered // costly and avoided as far as possible by this backend anyway def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), "#BINOP "#NAME#"32mi PSEUDO!", [(atomic_store_32 addr:$dst, (op (atomic_load_32 addr:$dst), (i32 imm:$src)))]>; def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), "#BINOP "#NAME#"32mr PSEUDO!", [(atomic_store_32 addr:$dst, (op (atomic_load_32 addr:$dst), GR32:$src))]>; def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), "#BINOP "#NAME#"64mi32 PSEUDO!", [(atomic_store_64 addr:$dst, (op (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>; def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), "#BINOP "#NAME#"64mr PSEUDO!", [(atomic_store_64 addr:$dst, (op (atomic_load_64 addr:$dst), GR64:$src))]>; } let Defs = [EFLAGS] in { defm RELEASE_ADD : RELEASE_BINOP_MI; defm RELEASE_AND : RELEASE_BINOP_MI; defm RELEASE_OR : RELEASE_BINOP_MI; defm RELEASE_XOR : RELEASE_BINOP_MI; // Note: we don't deal with sub, because substractions of constants are // optimized into additions before this code can run. } // Same as above, but for floating-point. // FIXME: imm version. // FIXME: Version that doesn't clobber $src, using AVX's VADDSS. // FIXME: This could also handle SIMD operations with *ps and *pd instructions. let usesCustomInserter = 1 in { multiclass RELEASE_FP_BINOP_MI { def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src), "#BINOP "#NAME#"32mr PSEUDO!", [(atomic_store_32 addr:$dst, (i32 (bitconvert (op (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))), FR32:$src))))]>, Requires<[HasSSE1]>; def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src), "#BINOP "#NAME#"64mr PSEUDO!", [(atomic_store_64 addr:$dst, (i64 (bitconvert (op (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))), FR64:$src))))]>, Requires<[HasSSE2]>; } defm RELEASE_FADD : RELEASE_FP_BINOP_MI; // FIXME: Add fsub, fmul, fdiv, ... } multiclass RELEASE_UNOP { def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst), "#UNOP "#NAME#"8m PSEUDO!", [(atomic_store_8 addr:$dst, dag8)]>; def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst), "#UNOP "#NAME#"16m PSEUDO!", [(atomic_store_16 addr:$dst, dag16)]>; def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst), "#UNOP "#NAME#"32m PSEUDO!", [(atomic_store_32 addr:$dst, dag32)]>; def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst), "#UNOP "#NAME#"64m PSEUDO!", [(atomic_store_64 addr:$dst, dag64)]>; } let Defs = [EFLAGS] in { defm RELEASE_INC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; defm RELEASE_DEC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; } /* TODO: These don't work because the type inference of TableGen fails. TODO: find a way to fix it. let Defs = [EFLAGS] in { defm RELEASE_NEG : RELEASE_UNOP< (ineg (atomic_load_8 addr:$dst)), (ineg (atomic_load_16 addr:$dst)), (ineg (atomic_load_32 addr:$dst)), (ineg (atomic_load_64 addr:$dst))>; } // NOT doesn't set flags. defm RELEASE_NOT : RELEASE_UNOP< (not (atomic_load_8 addr:$dst)), (not (atomic_load_16 addr:$dst)), (not (atomic_load_32 addr:$dst)), (not (atomic_load_64 addr:$dst))>; */ def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), "#RELEASE_MOV8mi PSEUDO!", [(atomic_store_8 addr:$dst, (i8 imm:$src))]>; def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src), "#RELEASE_MOV16mi PSEUDO!", [(atomic_store_16 addr:$dst, (i16 imm:$src))]>; def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), "#RELEASE_MOV32mi PSEUDO!", [(atomic_store_32 addr:$dst, (i32 imm:$src))]>; def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), "#RELEASE_MOV64mi32 PSEUDO!", [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>; def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), "#RELEASE_MOV8mr PSEUDO!", [(atomic_store_8 addr:$dst, GR8 :$src)]>; def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), "#RELEASE_MOV16mr PSEUDO!", [(atomic_store_16 addr:$dst, GR16:$src)]>; def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), "#RELEASE_MOV32mr PSEUDO!", [(atomic_store_32 addr:$dst, GR32:$src)]>; def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), "#RELEASE_MOV64mr PSEUDO!", [(atomic_store_64 addr:$dst, GR64:$src)]>; def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), "#ACQUIRE_MOV8rm PSEUDO!", [(set GR8:$dst, (atomic_load_8 addr:$src))]>; def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src), "#ACQUIRE_MOV16rm PSEUDO!", [(set GR16:$dst, (atomic_load_16 addr:$src))]>; def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), "#ACQUIRE_MOV32rm PSEUDO!", [(set GR32:$dst, (atomic_load_32 addr:$src))]>; def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), "#ACQUIRE_MOV64rm PSEUDO!", [(set GR64:$dst, (atomic_load_64 addr:$src))]>; //===----------------------------------------------------------------------===// // DAG Pattern Matching Rules //===----------------------------------------------------------------------===// // ConstantPool GlobalAddress, ExternalSymbol, and JumpTable def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>; def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>; def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), (ADD32ri GR32:$src1, tconstpool:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)), (ADD32ri GR32:$src1, tjumptable:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), (ADD32ri GR32:$src1, tglobaladdr:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), (ADD32ri GR32:$src1, texternalsym:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)), (ADD32ri GR32:$src1, mcsym:$src2)>; def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)), (ADD32ri GR32:$src1, tblockaddress:$src2)>; def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV32mi addr:$dst, tglobaladdr:$src)>; def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV32mi addr:$dst, texternalsym:$src)>; def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst), (MOV32mi addr:$dst, mcsym:$src)>; def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), (MOV32mi addr:$dst, tblockaddress:$src)>; // ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small // code model mode, should use 'movabs'. FIXME: This is really a hack, the // 'movabs' predicate should handle this sort of thing. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), (MOV64ri mcsym:$dst)>, Requires<[FarData]>; def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; // In kernel code model, we can get the address of a label // into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of // the MOV64ri32 should accept these. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; // If we have small model and -static mode, it is safe to store global addresses // directly as immediates. FIXME: This is really a hack, the 'imm' predicate // for MOV64mi32 should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), (MOV64mi32 addr:$dst, tjumptable:$src)>, Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV64mi32 addr:$dst, tglobaladdr:$src)>, Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, mcsym:$src)>, Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), (MOV64mi32 addr:$dst, tblockaddress:$src)>, Requires<[NearData, IsStatic]>; def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>; def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>; // Calls // tls has some funny stuff here... // This corresponds to movabs $foo@tpoff, %rax def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), (MOV64ri32 tglobaltlsaddr :$dst)>; // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), (CALL64pcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they // can never use callee-saved registers. That is the purpose of the GR64_TC // register classes. // // The only volatile register that is never used by the calling convention is // %r11. This happens when calling a vararg function with 6 arguments. // // Match an X86tcret that uses less than 7 volatile registers. def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), (X86tcret node:$ptr, node:$off), [{ // X86tcret args: (*chain, ptr, imm, regs..., glue) unsigned NumRegs = 0; for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) if (isa(N->getOperand(i)) && ++NumRegs > 6) return false; return true; }]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, Requires<[Not64BitMode]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, Requires<[Not64BitMode, IsNotPIC]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, Requires<[NotLP64]>; def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, Requires<[NotLP64]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, Requires<[In64BitMode]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, Requires<[IsLP64]>; def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>, Requires<[IsLP64]>; // Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i32 texternalsym:$dst)), (CALLpcrel32 texternalsym:$dst)>; def : Pat<(X86call (i32 imm:$dst)), (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; // Comparisons. // TEST R,R is smaller than CMP R,0 def : Pat<(X86cmp GR8:$src1, 0), (TEST8rr GR8:$src1, GR8:$src1)>; def : Pat<(X86cmp GR16:$src1, 0), (TEST16rr GR16:$src1, GR16:$src1)>; def : Pat<(X86cmp GR32:$src1, 0), (TEST32rr GR32:$src1, GR32:$src1)>; def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; // Conditional moves with folded loads with operands swapped and conditions // inverted. multiclass CMOVmr { let Predicates = [HasCMov] in { def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), (Inst16 GR16:$src2, addr:$src1)>; def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), (Inst32 GR32:$src2, addr:$src1)>; def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), (Inst64 GR64:$src2, addr:$src1)>; } } defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; // zextload bool -> zextload byte def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>; def : Pat<(zextloadi16i1 addr:$src), (AND16ri8 (MOVZX16rm8 addr:$src), (i16 1))>; def : Pat<(zextloadi32i1 addr:$src), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1))>; def : Pat<(zextloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>; // extload bool -> extload byte // When extloading from 16-bit and smaller memory locations into 64-bit // registers, use zero-extending loads so that the entire 64-bit register is // defined, avoiding partial-register updates. def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. def : Pat<(extloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i8 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i16 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; def : Pat<(extloadi64i32 addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG (MOVZX32rr8 GR8 :$src), sub_16bit)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; def : Pat<(i64 (anyext GR8 :$src)), (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR16:$src)), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR32:$src)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may // be copying from a truncate. And x86's cmov doesn't do anything if the // condition is false. But any other 32-bit operation will zero-extend // up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != ISD::AssertSext && N->getOpcode() != X86ISD::CMOV; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; //===----------------------------------------------------------------------===// // Pattern match OR as ADD //===----------------------------------------------------------------------===// // If safe, we prefer to pattern match OR as ADD at isel time. ADD can be // 3-addressified into an LEA instruction to avoid copies. However, we also // want to finally emit these instructions as an or at the end of the code // generator to make the generated code easier to read. To do this, we select // into "disjoint bits" pseudo ops. // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); APInt KnownZero0, KnownOne0; CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); APInt KnownZero1, KnownOne1; CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); return (~KnownZero0 & ~KnownZero1) == 0; }]>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. // Try this before the selecting to OR. let AddedComplexity = 5, SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { let isCommutable = 1 in { def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "", // orw/addw REG, REG [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "", // orl/addl REG, REG [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "", // orq/addq REG, REG [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; } // isCommutable // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. def ADD16ri8_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "", // orw/addw REG, imm8 [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>; def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "", // orw/addw REG, imm [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; def ADD32ri8_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "", // orl/addl REG, imm8 [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>; def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "", // orl/addl REG, imm [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; def ADD64ri8_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "", // orq/addq REG, imm8 [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt8:$src2))]>; def ADD64ri32_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "", // orq/addq REG, imm [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt32:$src2))]>; } } // AddedComplexity, SchedRW //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// // Odd encoding trick: -128 fits into an 8-bit immediate field while // +128 doesn't, so in this special case use a sub instead of an add. def : Pat<(add GR16:$src1, 128), (SUB16ri8 GR16:$src1, -128)>; def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), (SUB16mi8 addr:$dst, -128)>; def : Pat<(add GR32:$src1, 128), (SUB32ri8 GR32:$src1, -128)>; def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), (SUB32mi8 addr:$dst, -128)>; def : Pat<(add GR64:$src1, 128), (SUB64ri8 GR64:$src1, -128)>; def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), (SUB64mi8 addr:$dst, -128)>; // The same trick applies for 32-bit immediate fields in 64-bit // instructions. def : Pat<(add GR64:$src1, 0x0000000080000000), (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; // To avoid needing to materialize an immediate in a register, use a 32-bit and // with implicit zero-extension instead of a 64-bit and if the immediate has at // least 32 bits of leading zeros. If in addition the last 32 bits can be // represented with a sign extension of a 8 bit constant, use that. // This can also reduce instruction size by eliminating the need for the REX // prefix. // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), (SUBREG_TO_REG (i64 0), (AND32ri8 (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo8XForm imm:$imm))), sub_32bit)>; def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; } // AddedComplexity = 1 // AddedComplexity is needed due to the increased complexity on the // i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all // the MOVZX patterns keeps thems together in DAGIsel tables. let AddedComplexity = 1 in { // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD)), sub_8bit))>, Requires<[Not64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)), sub_16bit)>, Requires<[Not64BitMode]>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (SUBREG_TO_REG (i64 0), (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), sub_32bit)>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), sub_32bit)>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), (SUBREG_TO_REG (i64 0), (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))), sub_32bit)>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, Requires<[In64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>, Requires<[In64BitMode]>; } // AddedComplexity = 1 // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit))>, Requires<[Not64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))), sub_16bit)>, Requires<[Not64BitMode]>; def : Pat<(sext_inreg GR64:$src, i32), (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, Requires<[In64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (EXTRACT_SUBREG (MOVSX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>, Requires<[In64BitMode]>; // sext, sext_load, zext, zext_load def: Pat<(i16 (sext GR8:$src)), (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>; def: Pat<(sextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>; def: Pat<(i16 (zext GR8:$src)), (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>; def: Pat<(zextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, sub_16bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit)>, Requires<[Not64BitMode]>; def : Pat<(i32 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_16bit)>; def : Pat<(i8 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_8bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, sub_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)), sub_16bit)>, Requires<[Not64BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, Requires<[Not64BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, Requires<[Not64BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, Requires<[Not64BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, Requires<[Not64BitMode]>; // h-register tricks. // For now, be conservative on x86-64 and use an h-register extract only if the // value is immediately zero-extended or stored, which are somewhat common // cases. This uses a bunch of code to prevent a register requiring a REX prefix // from being allocated in the same instruction as the h register, as there's // currently no way to describe this requirement to the register allocator. // h-register extract and zero-extend. def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), sub_8bit_hi)), sub_32bit)>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)), sub_16bit)>, Requires<[In64BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)), sub_32bit)>; def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)), sub_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), sub_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) // Note that if x is undef (immediate or otherwise), we could theoretically // end up with the two uses of x getting different values, producing a result // where the least significant bit is not 0. However, the probability of this // happening is considered low enough that this is officially not a // "real problem". def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; // Helper imms that check if a mask doesn't change significant shift bits. def immShift32 : ImmLeaf(Imm) >= 5; }]>; def immShift64 : ImmLeaf(Imm) >= 6; }]>; // Shift amount is implicitly masked. multiclass MaskedShiftAmountPats { // (shift x (and y, 31)) ==> (shift x, y) def : Pat<(frag GR8:$src1, (and CL, immShift32)), (!cast(name # "8rCL") GR8:$src1)>; def : Pat<(frag GR16:$src1, (and CL, immShift32)), (!cast(name # "16rCL") GR16:$src1)>; def : Pat<(frag GR32:$src1, (and CL, immShift32)), (!cast(name # "32rCL") GR32:$src1)>; def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "8mCL") addr:$dst)>; def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "16mCL") addr:$dst)>; def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "32mCL") addr:$dst)>; // (shift x (and y, 63)) ==> (shift x, y) def : Pat<(frag GR64:$src1, (and CL, immShift64)), (!cast(name # "64rCL") GR64:$src1)>; def : Pat<(store (frag (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (!cast(name # "64mCL") addr:$dst)>; } defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// // add reg, reg def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; // add reg, mem def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), (ADD8rm GR8:$src1, addr:$src2)>; def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), (ADD16rm GR16:$src1, addr:$src2)>; def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), (ADD32rm GR32:$src1, addr:$src2)>; // add reg, imm def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; def : Pat<(add GR16:$src1, i16immSExt8:$src2), (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(add GR32:$src1, i32immSExt8:$src2), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; // sub reg, reg def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; // sub reg, mem def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), (SUB8rm GR8:$src1, addr:$src2)>; def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), (SUB16rm GR16:$src1, addr:$src2)>; def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), (SUB32rm GR32:$src1, addr:$src2)>; // sub reg, imm def : Pat<(sub GR8:$src1, imm:$src2), (SUB8ri GR8:$src1, imm:$src2)>; def : Pat<(sub GR16:$src1, imm:$src2), (SUB16ri GR16:$src1, imm:$src2)>; def : Pat<(sub GR32:$src1, imm:$src2), (SUB32ri GR32:$src1, imm:$src2)>; def : Pat<(sub GR16:$src1, i16immSExt8:$src2), (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(sub GR32:$src1, i32immSExt8:$src2), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; // sub 0, reg def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>; def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>; def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>; def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>; // mul reg, reg def : Pat<(mul GR16:$src1, GR16:$src2), (IMUL16rr GR16:$src1, GR16:$src2)>; def : Pat<(mul GR32:$src1, GR32:$src2), (IMUL32rr GR32:$src1, GR32:$src2)>; // mul reg, mem def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), (IMUL16rm GR16:$src1, addr:$src2)>; def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), (IMUL32rm GR32:$src1, addr:$src2)>; // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), (IMUL16rri GR16:$src1, imm:$src2)>; def : Pat<(mul GR32:$src1, imm:$src2), (IMUL32rri GR32:$src1, imm:$src2)>; def : Pat<(mul GR16:$src1, i16immSExt8:$src2), (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(mul GR32:$src1, i32immSExt8:$src2), (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; // reg = mul mem, imm def : Pat<(mul (loadi16 addr:$src1), imm:$src2), (IMUL16rmi addr:$src1, imm:$src2)>; def : Pat<(mul (loadi32 addr:$src1), imm:$src2), (IMUL32rmi addr:$src1, imm:$src2)>; def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; // Patterns for nodes that do not produce flags, for instructions that do. // addition def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; def : Pat<(add GR64:$src1, i64immSExt8:$src2), (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), (ADD64rm GR64:$src1, addr:$src2)>; // subtraction def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>; def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), (SUB64rm GR64:$src1, addr:$src2)>; def : Pat<(sub GR64:$src1, i64immSExt8:$src2), (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(sub GR64:$src1, i64immSExt32:$src2), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; // Multiply def : Pat<(mul GR64:$src1, GR64:$src2), (IMUL64rr GR64:$src1, GR64:$src2)>; def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), (IMUL64rm GR64:$src1, addr:$src2)>; def : Pat<(mul GR64:$src1, i64immSExt8:$src2), (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(mul GR64:$src1, i64immSExt32:$src2), (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; // Increment/Decrement reg. // Do not make INC/DEC if it is slow let Predicates = [NotSlowIncDec] in { def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>; def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; } // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; // or reg/mem def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), (OR8rm GR8:$src1, addr:$src2)>; def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), (OR16rm GR16:$src1, addr:$src2)>; def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), (OR32rm GR32:$src1, addr:$src2)>; def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), (OR64rm GR64:$src1, addr:$src2)>; // or reg/imm def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; def : Pat<(or GR16:$src1, i16immSExt8:$src2), (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(or GR32:$src1, i32immSExt8:$src2), (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; def : Pat<(or GR64:$src1, i64immSExt8:$src2), (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(or GR64:$src1, i64immSExt32:$src2), (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; // xor reg/reg def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; // xor reg/mem def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), (XOR8rm GR8:$src1, addr:$src2)>; def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), (XOR16rm GR16:$src1, addr:$src2)>; def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), (XOR32rm GR32:$src1, addr:$src2)>; def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), (XOR64rm GR64:$src1, addr:$src2)>; // xor reg/imm def : Pat<(xor GR8:$src1, imm:$src2), (XOR8ri GR8:$src1, imm:$src2)>; def : Pat<(xor GR16:$src1, imm:$src2), (XOR16ri GR16:$src1, imm:$src2)>; def : Pat<(xor GR32:$src1, imm:$src2), (XOR32ri GR32:$src1, imm:$src2)>; def : Pat<(xor GR16:$src1, i16immSExt8:$src2), (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(xor GR32:$src1, i32immSExt8:$src2), (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; def : Pat<(xor GR64:$src1, i64immSExt8:$src2), (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(xor GR64:$src1, i64immSExt32:$src2), (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; // and reg/reg def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; // and reg/mem def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), (AND8rm GR8:$src1, addr:$src2)>; def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), (AND16rm GR16:$src1, addr:$src2)>; def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), (AND32rm GR32:$src1, addr:$src2)>; def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), (AND64rm GR64:$src1, addr:$src2)>; // and reg/imm def : Pat<(and GR8:$src1, imm:$src2), (AND8ri GR8:$src1, imm:$src2)>; def : Pat<(and GR16:$src1, imm:$src2), (AND16ri GR16:$src1, imm:$src2)>; def : Pat<(and GR32:$src1, imm:$src2), (AND32ri GR32:$src1, imm:$src2)>; def : Pat<(and GR16:$src1, i16immSExt8:$src2), (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(and GR32:$src1, i32immSExt8:$src2), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt8:$src2), (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; // Bit scan instruction patterns to match explicit zero-undef behavior. def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; // When HasMOVBE is enabled it is possible to get a non-legalized // register-register 16 bit bswap. This maps it to a ROL instruction. let Predicates = [HasMOVBE] in { def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>; } Index: projects/clang380-import/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- projects/clang380-import/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp (revision 296010) +++ projects/clang380-import/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp (revision 296011) @@ -1,7586 +1,7589 @@ //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // These classes wrap the information about a call or function // definition used to handle ABI compliancy. // //===----------------------------------------------------------------------===// #include "TargetInfo.h" #include "ABIInfo.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include // std::sort using namespace clang; using namespace CodeGen; static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, unsigned FirstIndex, unsigned LastIndex) { // Alternatively, we could emit this as a loop in the source. for (unsigned I = FirstIndex; I <= LastIndex; ++I) { llvm::Value *Cell = Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I); Builder.CreateAlignedStore(Value, Cell, CharUnits::One()); } } static bool isAggregateTypeForABI(QualType T) { return !CodeGenFunction::hasScalarEvaluationKind(T) || T->isMemberFunctionPointerType(); } ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByRef, bool Realign, llvm::Type *Padding) const { return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByRef, Realign, Padding); } ABIArgInfo ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const { return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty), /*ByRef*/ false, Realign); } Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return Address::invalid(); } ABIInfo::~ABIInfo() {} static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI) { const CXXRecordDecl *RD = dyn_cast(RT->getDecl()); if (!RD) return CGCXXABI::RAA_Default; return CXXABI.getRecordArgABI(RD); } static CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI) { const RecordType *RT = T->getAs(); if (!RT) return CGCXXABI::RAA_Default; return getRecordArgABI(RT, CXXABI); } /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". static QualType useFirstFieldIfTransparentUnion(QualType Ty) { if (const RecordType *UT = Ty->getAsUnionType()) { const RecordDecl *UD = UT->getDecl(); if (UD->hasAttr()) { assert(!UD->field_empty() && "sema created an empty transparent union"); return UD->field_begin()->getType(); } } return Ty; } CGCXXABI &ABIInfo::getCXXABI() const { return CGT.getCXXABI(); } ASTContext &ABIInfo::getContext() const { return CGT.getContext(); } llvm::LLVMContext &ABIInfo::getVMContext() const { return CGT.getLLVMContext(); } const llvm::DataLayout &ABIInfo::getDataLayout() const { return CGT.getDataLayout(); } const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; } bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return false; } bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const { return false; } void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; switch (TheKind) { case Direct: OS << "Direct Type="; if (llvm::Type *Ty = getCoerceToType()) Ty->print(OS); else OS << "null"; break; case Extend: OS << "Extend"; break; case Ignore: OS << "Ignore"; break; case InAlloca: OS << "InAlloca Offset=" << getInAllocaFieldIndex(); break; case Indirect: OS << "Indirect Align=" << getIndirectAlign().getQuantity() << " ByVal=" << getIndirectByVal() << " Realign=" << getIndirectRealign(); break; case Expand: OS << "Expand"; break; } OS << ")\n"; } // Dynamically round a pointer up to a multiple of the given alignment. static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, llvm::Value *Ptr, CharUnits Align) { llvm::Value *PtrAsInt = Ptr; // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1)); PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())); PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(), Ptr->getName() + ".aligned"); return PtrAsInt; } /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// /// This version implements the core direct-value passing rules. /// /// \param SlotSize - The size and alignment of a stack slot. /// Each argument will be allocated to a multiple of this number of /// slots, and all the slots will be aligned to this value. /// \param AllowHigherAlign - The slot alignment is not a cap; /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. If this /// is false, the returned address might be less-aligned than /// DirectAlign. static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Type *DirectTy, CharUnits DirectSize, CharUnits DirectAlign, CharUnits SlotSize, bool AllowHigherAlign) { // Cast the element type to i8* if necessary. Some platforms define // va_list as a struct containing an i8* instead of just an i8*. if (VAListAddr.getElementType() != CGF.Int8PtrTy) VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur"); // If the CC aligns values higher than the slot size, do so if needed. Address Addr = Address::invalid(); if (AllowHigherAlign && DirectAlign > SlotSize) { Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), DirectAlign); } else { Addr = Address(Ptr, SlotSize); } // Advance the pointer past the argument, then store that back. CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize); llvm::Value *NextPtr = CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize, "argp.next"); CGF.Builder.CreateStore(NextPtr, VAListAddr); // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) { Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); } Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy); return Addr; } /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// /// \param IsIndirect - Values of this type are passed indirectly. /// \param ValueInfo - The size and alignment of this type, generally /// computed with getContext().getTypeInfoInChars(ValueTy). /// \param SlotSizeAndAlign - The size and alignment of a stack slot. /// Each argument will be allocated to a multiple of this number of /// slots, and all the slots will be aligned to this value. /// \param AllowHigherAlign - The slot alignment is not a cap; /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, std::pair ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign) { // The size and alignment of the value that was passed directly. CharUnits DirectSize, DirectAlign; if (IsIndirect) { DirectSize = CGF.getPointerSize(); DirectAlign = CGF.getPointerAlign(); } else { DirectSize = ValueInfo.first; DirectAlign = ValueInfo.second; } // Cast the address we've calculated to the right type. llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy); if (IsIndirect) DirectTy = DirectTy->getPointerTo(0); Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign, SlotSizeAndAlign, AllowHigherAlign); if (IsIndirect) { Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second); } return Addr; } static Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, llvm::BasicBlock *Block1, Address Addr2, llvm::BasicBlock *Block2, const llvm::Twine &Name = "") { assert(Addr1.getType() == Addr2.getType()); llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name); PHI->addIncoming(Addr1.getPointer(), Block1); PHI->addIncoming(Addr2.getPointer(), Block2); CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment()); return Address(PHI, Align); } TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; } // If someone can figure out a general rule for this, that would be great. // It's probably just doomed to be platform-dependent, though. unsigned TargetCodeGenInfo::getSizeOfUnwindException() const { // Verified for: // x86-64 FreeBSD, Linux, Darwin // x86-32 FreeBSD, Linux, Darwin // PowerPC Linux, Darwin // ARM Darwin (*not* EABI) // AArch64 Linux return 32; } bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const { // The following conventions are known to require this to be false: // x86_stdcall // MIPS // For everything else, we just prefer false unless we opt out. return false; } void TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const { // This assumes the user is passing a library name like "rt" instead of a // filename like "librt.a/so", and that they don't care whether it's static or // dynamic. Opt = "-l"; Opt += Lib; } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it /// is an unnamed bit-field or an (array of) empty record(s). static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays) { if (FD->isUnnamedBitfield()) return true; QualType FT = FD->getType(); // Constant arrays of empty records count as empty, strip them off. // Constant arrays of zero length always count as empty. if (AllowArrays) while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize() == 0) return true; FT = AT->getElementType(); } const RecordType *RT = FT->getAs(); if (!RT) return false; // C++ record fields are never empty, at least in the Itanium ABI. // // FIXME: We should use a predicate for whether this behavior is true in the // current ABI. if (isa(RT->getDecl())) return false; return isEmptyRecord(Context, FT, AllowArrays); } /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not /// considered empty. static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { const RecordType *RT = T->getAs(); if (!RT) return 0; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) if (!isEmptyRecord(Context, I.getType(), true)) return false; for (const auto *I : RD->fields()) if (!isEmptyField(Context, I, AllowArrays)) return false; return true; } /// isSingleElementStruct - Determine if a structure is a "single /// element struct", i.e. it has exactly one non-empty field or /// exactly one field which is itself a single element /// struct. Structures with flexible array members are never /// considered single element structs. /// /// \return The field declaration for the single non-empty field, if /// it exists. static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { const RecordType *RT = T->getAs(); if (!RT) return nullptr; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return nullptr; const Type *Found = nullptr; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(Context, I.getType(), true)) continue; // If we already found an element then this isn't a single-element struct. if (Found) return nullptr; // If this is non-empty and not a single element struct, the composite // cannot be a single element struct. Found = isSingleElementStruct(I.getType(), Context); if (!Found) return nullptr; } } // Check for single element. for (const auto *FD : RD->fields()) { QualType FT = FD->getType(); // Ignore empty fields. if (isEmptyField(Context, FD, true)) continue; // If we already found an element then this isn't a single-element // struct. if (Found) return nullptr; // Treat single element arrays as the element. while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize().getZExtValue() != 1) break; FT = AT->getElementType(); } if (!isAggregateTypeForABI(FT)) { Found = FT.getTypePtr(); } else { Found = isSingleElementStruct(FT, Context); if (!Found) return nullptr; } } // We don't consider a struct a single-element struct if it has // padding beyond the element type. if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T)) return nullptr; return Found; } static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { // Treat complex types as the element type. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); // Check for a type which we know has a simple scalar argument-passing // convention without any padding. (We're specifically looking for 32 // and 64-bit integer and integer-equivalents, float, and double.) if (!Ty->getAs() && !Ty->hasPointerRepresentation() && !Ty->isEnumeralType() && !Ty->isBlockPointerType()) return false; uint64_t Size = Context.getTypeSize(Ty); return Size == 32 || Size == 64; } /// canExpandIndirectArgument - Test whether an argument type which is to be /// passed indirectly (on the stack) would have the equivalent layout if it was /// expanded into separate arguments. If so, we prefer to do the latter to avoid /// inhibiting optimizations. /// // FIXME: This predicate is missing many cases, currently it just follows // llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We // should probably make this smarter, or better yet make the LLVM backend // capable of handling it. static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) { // We can only expand structure types. const RecordType *RT = Ty->getAs(); if (!RT) return false; // We can only expand (C) structures. // // FIXME: This needs to be generalized to handle classes as well. const RecordDecl *RD = RT->getDecl(); if (!RD->isStruct()) return false; // We try to expand CLike CXXRecordDecl. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { if (!CXXRD->isCLike()) return false; } uint64_t Size = 0; for (const auto *FD : RD->fields()) { if (!is32Or64BitBasicType(FD->getType(), Context)) return false; // FIXME: Reject bit-fields wholesale; there are two problems, we don't know // how to expand them yet, and the predicate for telling if a bitfield still // counts as "basic" is more complicated than what we were doing previously. if (FD->isBitField()) return false; Size += Context.getTypeSize(FD->getType()); } // Make sure there are not any holes in the struct. if (Size != Context.getTypeSize(Ty)) return false; return true; } namespace { /// DefaultABIInfo - The default implementation for ABI specific /// details. This implementation provides information which results in /// self-consistent and sensible LLVM IR generation, but does not /// conform to any particular ABI. class DefaultABIInfo : public ABIInfo { public: DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { public: DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} }; Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return Address::invalid(); } ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); return getNaturalAlignIndirect(Ty); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } //===----------------------------------------------------------------------===// // WebAssembly ABI Implementation // // This is a very simple ABI that relies a lot on DefaultABIInfo. //===----------------------------------------------------------------------===// class WebAssemblyABIInfo final : public DefaultABIInfo { public: explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload that. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {} }; /// \brief Classify argument of given type \p Ty. ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (auto RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just pass a regular value. TODO: We // could do reasonable-size multiple-element structs too, using getExpand(), // though watch out for things like bitfields. if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); } // Otherwise just do the default thing. return DefaultABIInfo::classifyArgumentType(Ty); } ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) { // Records with non-trivial destructors/copy-constructors should not be // returned by value. if (!getRecordArgABI(RetTy, getCXXABI())) { // Ignore empty structs/unions. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just return a regular value. TODO: We // could do reasonable-size multiple-element structs too, using // ABIArgInfo::getDirect(). if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); } } // Otherwise just do the default thing. return DefaultABIInfo::classifyReturnType(RetTy); } //===----------------------------------------------------------------------===// // le32/PNaCl bitcode ABI Implementation // // This is a simplified version of the x86_32 ABI. Arguments and return values // are always passed on the stack. //===----------------------------------------------------------------------===// class PNaClABIInfo : public ABIInfo { public: PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { public: PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(new PNaClABIInfo(CGT)) {} }; void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return Address::invalid(); } /// \brief Classify argument of given type \p Ty. ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); return getNaturalAlignIndirect(Ty); } else if (const EnumType *EnumTy = Ty->getAs()) { // Treat an enum type as its underlying type. Ty = EnumTy->getDecl()->getIntegerType(); } else if (Ty->isFloatingType()) { // Floating-point types don't go inreg. return ABIArgInfo::getDirect(); } return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // In the PNaCl ABI we always return records/structures on the stack. if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } /// IsX86_MMXType - Return true if this is an MMX type. bool IsX86_MMXType(llvm::Type *IRType) { // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && cast(IRType)->getElementType()->isIntegerTy() && IRType->getScalarSizeInBits() != 64; } static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { if (cast(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; } return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); } // No operation needed return Ty; } /// Returns true if this type can be passed in SSE registers with the /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs()) { if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) return true; } else if (const VectorType *VT = Ty->getAs()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. unsigned VecSize = Context.getTypeSize(VT); if (VecSize == 128 || VecSize == 256 || VecSize == 512) return true; } return false; } /// Returns true if this aggregate is small enough to be passed in SSE registers /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { return NumMembers <= 4; } //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// /// \brief Similar to llvm::CCState, but for Clang. struct CCState { CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {} unsigned CC; unsigned FreeRegs; unsigned FreeSSERegs; }; /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public ABIInfo { enum Class { Integer, Float }; static const unsigned MinABIStackAlignInBytes = 4; bool IsDarwinVectorABI; bool IsRetSmallStructInRegABI; bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); } bool isHomogeneousAggregateBaseType(QualType Ty) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorTypeForVectorCall(getContext(), Ty); } bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t NumMembers) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be passed in memory. ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; /// \brief Return the alignment to use for the given type on the stack. unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; /// \brief Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; void addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; public: void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), DefaultNumRegisterParameters(NumRegisterParameters) {} }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) : TargetCodeGenInfo(new X86_32ABIInfo( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, SoftFloatABI)) {} static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. if (CGM.getTarget().getTriple().isOSDarwin()) return 5; return 4; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) const override { return X86AdjustInlineAsmType(CGF, Constraint, Ty); } void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, std::string &Constraints, std::vector &ResultRegTypes, std::vector &ResultTruncRegTypes, std::vector &ResultRegDests, std::string &AsmString, unsigned NumOutputs) const override; llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 ('F' << 16) | ('T' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } }; } /// Rewrite input constraint references after adding some output constraints. /// In the case where there is one output and one input and we add one output, /// we need to replace all operand references greater than or equal to 1: /// mov $0, $1 /// mov eax, $1 /// The result will be: /// mov $0, $2 /// mov eax, $2 static void rewriteInputConstraintReferences(unsigned FirstIn, unsigned NumNewOuts, std::string &AsmString) { std::string Buf; llvm::raw_string_ostream OS(Buf); size_t Pos = 0; while (Pos < AsmString.size()) { size_t DollarStart = AsmString.find('$', Pos); if (DollarStart == std::string::npos) DollarStart = AsmString.size(); size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); if (DollarEnd == std::string::npos) DollarEnd = AsmString.size(); OS << StringRef(&AsmString[Pos], DollarEnd - Pos); Pos = DollarEnd; size_t NumDollars = DollarEnd - DollarStart; if (NumDollars % 2 != 0 && Pos < AsmString.size()) { // We have an operand reference. size_t DigitStart = Pos; size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); if (DigitEnd == std::string::npos) DigitEnd = AsmString.size(); StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); unsigned OperandIndex; if (!OperandStr.getAsInteger(10, OperandIndex)) { if (OperandIndex >= FirstIn) OperandIndex += NumNewOuts; OS << OperandIndex; } else { OS << OperandStr; } Pos = DigitEnd; } } AsmString = std::move(OS.str()); } /// Add output constraints for EAX:EDX because they are return registers. void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, std::vector &ResultRegTypes, std::vector &ResultTruncRegTypes, std::vector &ResultRegDests, std::string &AsmString, unsigned NumOutputs) const { uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is // larger. if (!Constraints.empty()) Constraints += ','; if (RetWidth <= 32) { Constraints += "={eax}"; ResultRegTypes.push_back(CGF.Int32Ty); } else { // Use the 'A' constraint for EAX:EDX. Constraints += "=A"; ResultRegTypes.push_back(CGF.Int64Ty); } // Truncate EAX or EAX:EDX to an integer of the appropriate size. llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); ResultTruncRegTypes.push_back(CoerceTy); // Coerce the integer by bitcasting the return slot pointer. ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(), CoerceTy->getPointerTo())); ResultRegDests.push_back(ReturnSlot); rewriteInputConstraintReferences(NumOutputs, 1, AsmString); } /// shouldReturnTypeInRegister - Determine if the given type should be /// returned in a register (for the Darwin and MCU ABI). bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const { uint64_t Size = Context.getTypeSize(Ty); // For i386, type must be register sized. // For the MCU ABI, it only needs to be <= 8-byte if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) return false; if (Ty->isVectorType()) { // 64- and 128- bit vectors inside structures are not returned in // registers. if (Size == 64 || Size == 128) return false; return true; } // If this is a builtin, pointer, enum, complex type, member pointer, or // member function pointer it is ok. if (Ty->getAs() || Ty->hasPointerRepresentation() || Ty->isAnyComplexType() || Ty->isEnumeralType() || Ty->isBlockPointerType() || Ty->isMemberPointerType()) return true; // Arrays are treated like records. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) return shouldReturnTypeInRegister(AT->getElementType(), Context); // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // FIXME: Traverse bases here too. // Structure types are passed in register if all fields would be // passed in a register. for (const auto *FD : RT->getDecl()->fields()) { // Empty fields are ignored. if (isEmptyField(Context, FD, true)) continue; // Check fields recursively. if (!shouldReturnTypeInRegister(FD->getType(), Context)) return false; } return true; } ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. if (State.FreeRegs) { --State.FreeRegs; if (!IsMCUABI) return getNaturalAlignIndirectInReg(RetTy); } return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); } ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; uint64_t NumElts = 0; if (State.CC == llvm::CallingConv::X86_VectorCall && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); } if (const VectorType *VT = RetTy->getAs()) { // On Darwin, some vectors are returned in registers. if (IsDarwinVectorABI) { uint64_t Size = getContext().getTypeSize(RetTy); // 128-bit vectors are a special case; they are returned in // registers and we need to make sure to pick a type the LLVM // backend will like. if (Size == 128) return ABIArgInfo::getDirect(llvm::VectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2)); // Always return in register if it fits in a general purpose // register, or if it is 64 bits and has a single element. if ((Size == 8 || Size == 16 || Size == 32) || (Size == 64 && VT->getNumElements() == 1)) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); return getIndirectReturnResult(RetTy, State); } return ABIArgInfo::getDirect(); } if (isAggregateTypeForABI(RetTy)) { if (const RecordType *RT = RetTy->getAs()) { // Structures with flexible arrays are always indirect. if (RT->getDecl()->hasFlexibleArrayMember()) return getIndirectReturnResult(RetTy, State); } // If specified, structs and unions are always indirect. if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) return getIndirectReturnResult(RetTy, State); // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { uint64_t Size = getContext().getTypeSize(RetTy); // As a special-case, if the struct is a "single-element" struct, and // the field is of type "float" or "double", return it in a // floating-point register. (MSVC does not apply this special case.) // We apply a similar transformation for pointer types to improve the // quality of the generated IR. if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) || SeltTy->hasPointerRepresentation()) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // FIXME: We should be able to narrow this integer in cases with dead // padding. return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); } return getIndirectReturnResult(RetTy, State); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } static bool isSSEVectorType(ASTContext &Context, QualType Ty) { return Ty->getAs() && Context.getTypeSize(Ty) == 128; } static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) { const RecordType *RT = Ty->getAs(); if (!RT) return 0; const RecordDecl *RD = RT->getDecl(); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) if (!isRecordWithSSEVectorType(Context, I.getType())) return false; for (const auto *i : RD->fields()) { QualType FT = i->getType(); if (isSSEVectorType(Context, FT)) return true; if (isRecordWithSSEVectorType(Context, FT)) return true; } return false; } unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, unsigned Align) const { // Otherwise, if the alignment is less than or equal to the minimum ABI // alignment, just use the default; the backend will handle this. if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. return MinABIStackAlignInBytes; } // Otherwise, if the type contains an SSE vector type, the alignment is 16. if (Align >= 16 && (isSSEVectorType(getContext(), Ty) || isRecordWithSSEVectorType(getContext(), Ty))) return 16; return MinABIStackAlignInBytes; } ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, CCState &State) const { if (!ByVal) { if (State.FreeRegs) { --State.FreeRegs; // Non-byval indirects just use one pointer. if (!IsMCUABI) return getNaturalAlignIndirectInReg(Ty); } return getNaturalAlignIndirect(Ty, false); } // Compute the byval alignment. unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); if (StackAlign == 0) return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); // If the stack alignment is less than the type alignment, realign the // argument. bool Realign = TypeAlign > StackAlign; return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), /*ByVal=*/true, Realign); } X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { const Type *T = isSingleElementStruct(Ty, getContext()); if (!T) T = Ty.getTypePtr(); if (const BuiltinType *BT = T->getAs()) { BuiltinType::Kind K = BT->getKind(); if (K == BuiltinType::Float || K == BuiltinType::Double) return Float; } return Integer; } bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { if (!IsSoftFloatABI) { Class C = classify(Ty); if (C == Float) return false; } unsigned Size = getContext().getTypeSize(Ty); unsigned SizeInRegs = (Size + 31) / 32; if (SizeInRegs == 0) return false; if (!IsMCUABI) { if (SizeInRegs > State.FreeRegs) { State.FreeRegs = 0; return false; } } else { // The MCU psABI allows passing parameters in-reg even if there are // earlier parameters that are passed on the stack. Also, // it does not allow passing >8-byte structs in-register, // even if there are 3 free registers available. if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) return false; } State.FreeRegs -= SizeInRegs; return true; } bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { NeedsPadding = false; InReg = !IsMCUABI; if (!updateFreeRegs(Ty, State)) return false; if (IsMCUABI) return true; if (State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; return false; } return true; } bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { if (!updateFreeRegs(Ty, State)) return false; if (IsMCUABI) return false; if (State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall) { if (getContext().getTypeSize(Ty) > 32) return false; return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || Ty->isReferenceType()); } return true; } ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. Ty = useFirstFieldIfTransparentUnion(Ty); // Check with the C++ ABI first. const RecordType *RT = Ty->getAs(); if (RT) { CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, false, State); } else if (RAA == CGCXXABI::RAA_DirectInMemory) { // The field index doesn't matter, we'll fix it up later. return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); } } // vectorcall adds the concept of a homogenous vector aggregate, similar // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; if (State.CC == llvm::CallingConv::X86_VectorCall && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { if (RT) { // Structs are always byval on win32, regardless of what they contain. if (IsWin32StructABI) return getIndirectResult(Ty, true, State); // Structures with flexible arrays are always indirect. if (RT->getDecl()->hasFlexibleArrayMember()) return getIndirectResult(Ty, true, State); } // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); bool NeedsPadding, InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); if (InReg) return ABIArgInfo::getDirectInReg(Result); else return ABIArgInfo::getDirect(Result); } llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; // Expand small (<= 128-bit) record types when we know that the stack layout // of those arguments will match the struct. This is important because the // LLVM backend isn't smart enough to remove byval, which inhibits many // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). if (getContext().getTypeSize(Ty) <= 4*32 && canExpandIndirectArgument(Ty, getContext()) && (!IsMCUABI || State.FreeRegs == 0)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall, PaddingType); return getIndirectResult(Ty, true, State); } if (const VectorType *VT = Ty->getAs()) { // On Darwin, some vectors are passed in memory, we handle this by passing // it as an i8/i16/i32/i64. if (IsDarwinVectorABI) { uint64_t Size = getContext().getTypeSize(Ty); if ((Size == 8 || Size == 16 || Size == 32) || (Size == 64 && VT->getNumElements() == 1)) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } if (IsX86_MMXType(CGT.ConvertType(Ty))) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); return ABIArgInfo::getDirect(); } if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); bool InReg = shouldPrimitiveUseInReg(Ty, State); if (Ty->isPromotableIntegerType()) { if (InReg) return ABIArgInfo::getExtendInReg(); return ABIArgInfo::getExtend(); } if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); } void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI.getCallingConvention()); if (IsMCUABI) State.FreeRegs = 3; else if (State.CC == llvm::CallingConv::X86_FastCall) State.FreeRegs = 2; else if (State.CC == llvm::CallingConv::X86_VectorCall) { State.FreeRegs = 2; State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); else State.FreeRegs = DefaultNumRegisterParameters; if (!getCXXABI().classifyReturnType(FI)) { FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); } else if (FI.getReturnInfo().isIndirect()) { // The C++ ABI is not aware of register usage, so we have to check if the // return value was sret and put it in a register ourselves if appropriate. if (State.FreeRegs) { --State.FreeRegs; // The sret parameter consumes a register. if (!IsMCUABI) FI.getReturnInfo().setInReg(true); } } // The chain argument effectively gives us another free register. if (FI.isChainCall()) ++State.FreeRegs; bool UsedInAlloca = false; for (auto &I : FI.arguments()) { I.info = classifyArgumentType(I.type, State); UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); } // If we needed to use inalloca for any argument, do a second pass and rewrite // all the memory arguments to use inalloca. if (UsedInAlloca) rewriteWithInAlloca(FI); } void X86_32ABIInfo::addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const { // Arguments are always 4-byte-aligned. CharUnits FieldAlign = CharUnits::fromQuantity(4); assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct"); Info = ABIArgInfo::getInAlloca(FrameFields.size()); FrameFields.push_back(CGT.ConvertTypeForMem(Type)); StackOffset += getContext().getTypeSizeInChars(Type); // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); FrameFields.push_back(Ty); } } static bool isArgInAlloca(const ABIArgInfo &Info) { // Leave ignored and inreg arguments alone. switch (Info.getKind()) { case ABIArgInfo::InAlloca: return true; case ABIArgInfo::Indirect: assert(Info.getIndirectByVal()); return true; case ABIArgInfo::Ignore: return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: case ABIArgInfo::Expand: if (Info.getInReg()) return false; return true; } llvm_unreachable("invalid enum"); } void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { assert(IsWin32StructABI && "inalloca only supported on win32"); // Build a packed struct type for all of the arguments in memory. SmallVector FrameFields; // The stack alignment is always 4. CharUnits StackAlign = CharUnits::fromQuantity(4); CharUnits StackOffset; CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); // Put 'this' into the struct before 'sret', if necessary. bool IsThisCall = FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; ABIArgInfo &Ret = FI.getReturnInfo(); if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && isArgInAlloca(I->info)) { addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); ++I; } // Put the sret parameter into the inalloca struct if it's in memory. if (Ret.isIndirect() && !Ret.getInReg()) { CanQualType PtrTy = getContext().getPointerType(FI.getReturnType()); addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy); // On Windows, the hidden sret parameter is always returned in eax. Ret.setInAllocaSRet(IsWin32StructABI); } // Skip the 'this' parameter in ecx. if (IsThisCall) ++I; // Put arguments passed in memory into the struct. for (; I != E; ++I) { if (isArgInAlloca(I->info)) addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); } FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, /*isPacked=*/true), StackAlign); } Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { auto TypeInfo = getContext().getTypeInfoInChars(Ty); // x86-32 changes the alignment of certain arguments on the stack. // // Just messing with TypeInfo like this works because we never pass // anything indirectly. TypeInfo.second = CharUnits::fromQuantity( getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity())); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, CharUnits::fromQuantity(4), /*AllowHigherAlign*/ true); } bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts) { assert(Triple.getArch() == llvm::Triple::x86); switch (Opts.getStructReturnConvention()) { case CodeGenOptions::SRCK_Default: break; case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return return false; case CodeGenOptions::SRCK_InRegs: // -freg-struct-return return true; } if (Triple.isOSDarwin() || Triple.isOSIAMCU()) return true; switch (Triple.getOS()) { case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: case llvm::Triple::Bitrig: case llvm::Triple::Win32: return true; default: return false; } } void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { // Get the LLVM function. llvm::Function *Fn = cast(GV); // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); Fn->addAttributes(llvm::AttributeSet::FunctionIndex, llvm::AttributeSet::get(CGM.getLLVMContext(), llvm::AttributeSet::FunctionIndex, B)); } } } bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-7 are the eight integer registers; the order is different // on Darwin (for EH), but the range is the same. // 8 is %eip. AssignToArrayRange(Builder, Address, Four8, 0, 8); if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { // 12-16 are st(0..4). Not sure why we stop at 4. // These have size 16, which is sizeof(long double) on // platforms with 8-byte alignment for that type. llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); } else { // 9 is %eflags, which doesn't get a size on Darwin for some // reason. Builder.CreateAlignedStore( Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), CharUnits::One()); // 11-16 are st(0..5). Not sure why we stop at 5. // These have size 12, which is sizeof(long double) on // platforms with 4-byte alignment for that type. llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); AssignToArrayRange(Builder, Address, Twelve8, 11, 16); } return false; } //===----------------------------------------------------------------------===// // X86-64 ABI Implementation //===----------------------------------------------------------------------===// namespace { /// The AVX ABI level for X86 targets. enum class X86AVXABILevel { None, AVX, AVX512 }; /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { switch (AVXLevel) { case X86AVXABILevel::AVX512: return 512; case X86AVXABILevel::AVX: return 256; case X86AVXABILevel::None: return 128; } llvm_unreachable("Unknown AVXLevel"); } /// X86_64ABIInfo - The X86_64 ABI information. class X86_64ABIInfo : public ABIInfo { enum Class { Integer = 0, SSE, SSEUp, X87, X87Up, ComplexX87, NoClass, Memory }; /// merge - Implement the X86_64 ABI merging algorithm. /// /// Merge an accumulating classification \arg Accum with a field /// classification \arg Field. /// /// \param Accum - The accumulating classification. This should /// always be either NoClass or the result of a previous merge /// call. In addition, this should never be Memory (the caller /// should just return Memory for the aggregate). static Class merge(Class Accum, Class Field); /// postMerge - Implement the X86_64 ABI post merging algorithm. /// /// Post merger cleanup, reduces a malformed Hi and Lo pair to /// final MEMORY or SSE classes when necessary. /// /// \param AggregateSize - The size of the current aggregate in /// the classification process. /// /// \param Lo - The classification for the parts of the type /// residing in the low word of the containing object. /// /// \param Hi - The classification for the parts of the type /// residing in the higher words of the containing object. /// void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; /// classify - Determine the x86_64 register classes in which the /// given type T should be passed. /// /// \param Lo - The classification for the parts of the type /// residing in the low word of the containing object. /// /// \param Hi - The classification for the parts of the type /// residing in the high word of the containing object. /// /// \param OffsetBase - The bit offset of this type in the /// containing object. Some parameters are classified different /// depending on whether they straddle an eightbyte boundary. /// /// \param isNamedArg - Whether the argument in question is a "named" /// argument, as used in AMD64-ABI 3.5.7. /// /// If a word is unused its result will be NoClass; if a type should /// be passed in Memory then at least the classification of \arg Lo /// will be Memory. /// /// The \arg Lo class will be NoClass iff the argument is ignored. /// /// If the \arg Lo class is ComplexX87, then the \arg Hi class will /// also be ComplexX87. void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, bool isNamedArg) const; llvm::Type *GetByteVectorType(QualType Ty) const; llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const; llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be returned in memory. ABIArgInfo getIndirectReturnResult(QualType Ty) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be passed in memory. /// /// \param freeIntRegs - The number of free integer registers remaining /// available. ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, /// unfortunately in ways that were not always consistent with /// certain previous compilers. In particular, platforms which /// required strict binary compatibility with older versions of GCC /// may need to exempt themselves. bool honorsRevision0_98() const { return !getTarget().getTriple().isOSDarwin(); } X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. bool Has64BitPointers; public: X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : ABIInfo(CGT), AVXLevel(AVXLevel), Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { } bool isPassedUsingAVXType(QualType type) const { unsigned neededInt, neededSSE; // The freeIntRegs argument doesn't matter here. ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, /*isNamedArg*/true); if (info.isDirect()) { llvm::Type *ty = info.getCoerceToType(); if (llvm::VectorType *vectorTy = dyn_cast_or_null(ty)) return (vectorTy->getBitWidth() > 128); } return false; } void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool has64BitPointers() const { return Has64BitPointers; } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. class WinX86_64ABIInfo : public ABIInfo { public: WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool isHomogeneousAggregateBaseType(QualType Ty) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorTypeForVectorCall(getContext(), Ty); } bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t NumMembers) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType) const; bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {} const X86_64ABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); } int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); // 0-15 are the 16 integer registers. // 16 is %rip. AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); return false; } llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) const override { return X86AdjustInlineAsmType(CGF, Constraint, Ty); } bool isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const override { // The default CC on x86-64 sets %al to the number of SSA // registers used, and GCC sets this when calling an unprototyped // function, so we override the default behavior. However, don't do // that when AVX types are involved: the ABI explicitly states it is // undefined, and it doesn't work in practice because of how the ABI // defines varargs anyway. if (fnType->getCallConv() == CC_C) { bool HasAVXType = false; for (CallArgList::const_iterator it = args.begin(), ie = args.end(); it != ie; ++it) { if (getABIInfo().isPassedUsingAVXType(it->Ty)) { HasAVXType = true; break; } } if (!HasAVXType) return true; } return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); } llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig; if (getABIInfo().has64BitPointers()) Sig = (0xeb << 0) | // jmp rel8 (0x0a << 8) | // .+0x0c ('F' << 16) | ('T' << 24); else Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 ('F' << 16) | ('T' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } }; class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo { public: PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : X86_64TargetCodeGenInfo(CGT, AVXLevel) {} void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "\01"; // If the argument contains a space, enclose it in quotes. if (Lib.find(" ") != StringRef::npos) Opt += "\"" + Lib.str() + "\""; else Opt += Lib; } }; static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { // If the argument does not end in .lib, automatically add the suffix. // If the argument contains a space, enclose it in quotes. // This matches the behavior of MSVC. bool Quote = (Lib.find(" ") != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; if (!Lib.endswith_lower(".lib")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; } class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { public: WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters) : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:"; Opt += qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; static void addStackProbeSizeTargetAttribute(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) { if (D && isa(D)) { if (CGM.getCodeGenOpts().StackProbeSize != 4096) { llvm::Function *Fn = cast(GV); Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize)); } } } void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); addStackProbeSizeTargetAttribute(D, GV, CGM); } class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); // 0-15 are the 16 integer registers. // 16 is %rip. AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); return false; } void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:"; Opt += qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); addStackProbeSizeTargetAttribute(D, GV, CGM); } } void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const { // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: // // (a) If one of the classes is Memory, the whole argument is passed in // memory. // // (b) If X87UP is not preceded by X87, the whole argument is passed in // memory. // // (c) If the size of the aggregate exceeds two eightbytes and the first // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole // argument is passed in memory. NOTE: This is necessary to keep the // ABI working for processors that don't support the __m256 type. // // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. // // Some of these are enforced by the merging logic. Others can arise // only with unions; for example: // union { _Complex double; unsigned; } // // Note that clauses (b) and (c) were added in 0.98. // if (Hi == Memory) Lo = Memory; if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) Lo = Memory; if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) Lo = Memory; if (Hi == SSEUp && Lo != SSE) Hi = SSE; } X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is // classified recursively so that always two fields are // considered. The resulting class is calculated according to // the classes of the fields in the eightbyte: // // (a) If both classes are equal, this is the resulting class. // // (b) If one of the classes is NO_CLASS, the resulting class is // the other class. // // (c) If one of the classes is MEMORY, the result is the MEMORY // class. // // (d) If one of the classes is INTEGER, the result is the // INTEGER. // // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, // MEMORY is used as class. // // (f) Otherwise class SSE is used. // Accum should never be memory (we should have returned) or // ComplexX87 (because this cannot be passed in a structure). assert((Accum != Memory && Accum != ComplexX87) && "Invalid accumulated classification during merge."); if (Accum == Field || Field == NoClass) return Accum; if (Field == Memory) return Memory; if (Accum == NoClass) return Field; if (Accum == Integer || Field == Integer) return Integer; if (Field == X87 || Field == X87Up || Field == ComplexX87 || Accum == X87 || Accum == X87Up) return Memory; return SSE; } void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, Class &Hi, bool isNamedArg) const { // FIXME: This code can be simplified by introducing a simple value class for // Class pairs with appropriate constructor methods for the various // situations. // FIXME: Some of the split computations are wrong; unaligned vectors // shouldn't be passed in registers for example, so there is no chance they // can straddle an eightbyte. Verify & simplify. Lo = Hi = NoClass; Class &Current = OffsetBase < 64 ? Lo : Hi; Current = Memory; if (const BuiltinType *BT = Ty->getAs()) { BuiltinType::Kind k = BT->getKind(); if (k == BuiltinType::Void) { Current = NoClass; } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { Lo = Integer; Hi = Integer; } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { Current = Integer; } else if (k == BuiltinType::Float || k == BuiltinType::Double) { Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad) { Lo = SSE; Hi = SSEUp; } else if (LDF == &llvm::APFloat::x87DoubleExtended) { Lo = X87; Hi = X87Up; } else if (LDF == &llvm::APFloat::IEEEdouble) { Current = SSE; } else llvm_unreachable("unexpected long double representation!"); } // FIXME: _Decimal32 and _Decimal64 are SSE. // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). return; } if (const EnumType *ET = Ty->getAs()) { // Classify the underlying integer type. classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); return; } if (Ty->hasPointerRepresentation()) { Current = Integer; return; } if (Ty->isMemberPointerType()) { if (Ty->isMemberFunctionPointerType()) { if (Has64BitPointers) { // If Has64BitPointers, this is an {i64, i64}, so classify both // Lo and Hi now. Lo = Hi = Integer; } else { // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that // straddles an eightbyte boundary, Hi should be classified as well. uint64_t EB_FuncPtr = (OffsetBase) / 64; uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; if (EB_FuncPtr != EB_ThisAdj) { Lo = Hi = Integer; } else { Current = Integer; } } } else { Current = Integer; } return; } if (const VectorType *VT = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VT); if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { // gcc passes the following as integer: // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> // 2 bytes - <2 x char>, <1 x short> // 1 byte - <1 x char> Current = Integer; // If this type crosses an eightbyte boundary, it should be // split. uint64_t EB_Lo = (OffsetBase) / 64; uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; if (EB_Lo != EB_Hi) Hi = Lo; } else if (Size == 64) { // gcc passes <1 x double> in memory. :( if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) return; // gcc passes <1 x long long> as INTEGER. if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::LongLong) || VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULongLong) || VT->getElementType()->isSpecificBuiltinType(BuiltinType::Long) || VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULong)) Current = Integer; else Current = SSE; // If this type crosses an eightbyte boundary, it should be // split. if (OffsetBase && OffsetBase != 64) Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. // This design isn't correct for 256-bits, but since there're no cases // where the upper parts would need to be inspected, avoid adding // complexity and just consider Hi to match the 64-256 part. // // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in // registers if they are "named", i.e. not part of the "..." of a // variadic function. // // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are // split into eight eightbyte chunks, one SSE and seven SSEUP. Lo = SSE; Hi = SSEUp; } return; } if (const ComplexType *CT = Ty->getAs()) { QualType ET = getContext().getCanonicalType(CT->getElementType()); uint64_t Size = getContext().getTypeSize(Ty); if (ET->isIntegralOrEnumerationType()) { if (Size <= 64) Current = Integer; else if (Size <= 128) Lo = Hi = Integer; } else if (ET == getContext().FloatTy) { Current = SSE; } else if (ET == getContext().DoubleTy) { Lo = Hi = SSE; } else if (ET == getContext().LongDoubleTy) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad) Current = Memory; else if (LDF == &llvm::APFloat::x87DoubleExtended) Current = ComplexX87; else if (LDF == &llvm::APFloat::IEEEdouble) Lo = Hi = SSE; else llvm_unreachable("unexpected long double representation!"); } // If this complex type crosses an eightbyte boundary then it // should be split. uint64_t EB_Real = (OffsetBase) / 64; uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; if (Hi == NoClass && EB_Real != EB_Imag) Hi = Lo; return; } if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { // Arrays are treated like structures. uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than four eightbytes, ..., it has class MEMORY. if (Size > 256) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned // fields, it has class MEMORY. // // Only need to check alignment of array base. if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) return; // Otherwise implement simplified merge. We could be smarter about // this, but it isn't worth it and would be harder to verify. Current = NoClass; uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); uint64_t ArraySize = AT->getSize().getZExtValue(); // The only case a 256-bit wide vector could be used is when the array // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. if (Size > 128 && EltSize != 256) return; for (uint64_t i=0, Offset=OffsetBase; igetElementType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) break; } postMerge(Size, Lo, Hi); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); return; } if (const RecordType *RT = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than four eightbytes, ..., it has class MEMORY. if (Size > 256) return; // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial // copy constructor or a non-trivial destructor, it is passed by invisible // reference. if (getRecordArgABI(RT, getCXXABI())) return; const RecordDecl *RD = RT->getDecl(); // Assume variable sized types are passed in memory. if (RD->hasFlexibleArrayMember()) return; const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); // Reset Lo class, this will be recomputed. Current = NoClass; // If this is a C++ record, classify the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); const CXXRecordDecl *Base = cast(I.getType()->getAs()->getDecl()); // Classify this field. // // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a // single eightbyte, each is classified separately. Each eightbyte gets // initialized to class NO_CLASS. Class FieldLo, FieldHi; uint64_t Offset = OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) { postMerge(Size, Lo, Hi); return; } } } // Classify the fields one at a time, merging the results. unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); bool BitField = i->isBitField(); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than // four eightbytes, or it contains unaligned fields, it has class MEMORY. // // The only case a 256-bit wide vector could be used is when the struct // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) { Lo = Memory; postMerge(Size, Lo, Hi); return; } // Note, skip this test for bit-fields, see below. if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { Lo = Memory; postMerge(Size, Lo, Hi); return; } // Classify this field. // // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate // exceeds a single eightbyte, each is classified // separately. Each eightbyte gets initialized to class // NO_CLASS. Class FieldLo, FieldHi; // Bit-fields require special handling, they do not force the // structure to be passed in memory even if unaligned, and // therefore they can straddle an eightbyte. if (BitField) { // Ignore padding bit-fields. if (i->isUnnamedBitfield()) continue; uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); uint64_t Size = i->getBitWidthValue(getContext()); uint64_t EB_Lo = Offset / 64; uint64_t EB_Hi = (Offset + Size - 1) / 64; if (EB_Lo) { assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); FieldLo = NoClass; FieldHi = Integer; } else { FieldLo = Integer; FieldHi = EB_Hi ? Integer : NoClass; } } else classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) break; } postMerge(Size, Lo, Hi); } } ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { // If this is a scalar LLVM value then assume LLVM will pass it in the right // place naturally. if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); } bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { if (const VectorType *VecTy = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VecTy); unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; } return false; } ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, unsigned freeIntRegs) const { // If this is a scalar LLVM value then assume LLVM will pass it in the right // place naturally. // // This assumption is optimistic, as there could be free registers available // when we need to pass this argument in memory, and LLVM could try to pass // the argument in the free register. This does not seem to happen currently, // but this code would be much safer if we could mark the argument with // 'onstack'. See PR12193. if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Compute the byval alignment. We specify the alignment of the byval in all // cases so that the mid-level optimizer knows the alignment of the byval. unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); // Attempt to avoid passing indirect results using byval when possible. This // is important for good codegen. // // We do this by coercing the value into a scalar type which the backend can // handle naturally (i.e., without using byval). // // For simplicity, we currently only do this when we have exhausted all of the // free integer registers. Doing this when there are free integer registers // would require more care, as we would have to ensure that the coerced value // did not claim the unused register. That would require either reording the // arguments to the function (so that any subsequent inreg values came first), // or only doing this optimization when there were no following arguments that // might be inreg. // // We currently expect it to be rare (particularly in well written code) for // arguments to be passed on the stack when there are still free integer // registers available (this would typically imply large structs being passed // by value), so this seems like a fair tradeoff for now. // // We can revisit this if the backend grows support for 'onstack' parameter // attributes. See PR12193. if (freeIntRegs == 0) { uint64_t Size = getContext().getTypeSize(Ty); // If this type fits in an eightbyte, coerce it into the matching integral // type, which will end up on the stack (with alignment 8). if (Align == 8 && Size <= 64) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); } /// The ABI specifies that a value should be passed in a full vector XMM/YMM /// register. Pick an LLVM IR type that will be passed as a vector register. llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // Wrapper structs/arrays that only contain vectors are passed just like // vectors; strip them off if present. if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); if (isa(IRType) || IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256) && "Invalid type found!"); // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); } /// BitsContainNoUserData - Return true if the specified [start,end) bit range /// is known to either be off the end of the specified type or being in /// alignment padding. The user type specified is known to be at most 128 bits /// in size, and have passed through X86_64ABIInfo::classify with a successful /// classification that put one of the two halves in the INTEGER class. /// /// It is conservatively correct to return false. static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, unsigned EndBit, ASTContext &Context) { // If the bytes being queried are off the end of the type, there is no user // data hiding here. This handles analysis of builtins, vectors and other // types that don't contain interesting padding. unsigned TySize = (unsigned)Context.getTypeSize(Ty); if (TySize <= StartBit) return true; if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); // Check each element to see if the element overlaps with the queried range. for (unsigned i = 0; i != NumElts; ++i) { // If the element is after the span we care about, then we're done.. unsigned EltOffset = i*EltSize; if (EltOffset >= EndBit) break; unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; if (!BitsContainNoUserData(AT->getElementType(), EltStart, EndBit-EltOffset, Context)) return false; } // If it overlaps no elements, then it is safe to process as padding. return true; } if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); const CXXRecordDecl *Base = cast(I.getType()->getAs()->getDecl()); // If the base is after the span we care about, ignore it. unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); if (BaseOffset >= EndBit) continue; unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; if (!BitsContainNoUserData(I.getType(), BaseStart, EndBit-BaseOffset, Context)) return false; } } // Verify that no field has data that overlaps the region of interest. Yes // this could be sped up a lot by being smarter about queried fields, // however we're only looking at structs up to 16 bytes, so we don't care // much. unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); // If we found a field after the region we care about, then we're done. if (FieldOffset >= EndBit) break; unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, Context)) return false; } // If nothing in this record overlapped the area of interest, then we're // clean. return true; } return false; } /// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a /// float member at the specified offset. For example, {int,{float}} has a /// float at offset 4. It is conservatively correct for this routine to return /// false. static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset, const llvm::DataLayout &TD) { // Base case if we find a float. if (IROffset == 0 && IRType->isFloatTy()) return true; // If this is a struct, recurse into the field at the specified offset. if (llvm::StructType *STy = dyn_cast(IRType)) { const llvm::StructLayout *SL = TD.getStructLayout(STy); unsigned Elt = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(Elt); return ContainsFloatAtOffset(STy->getElementType(Elt), IROffset, TD); } // If this is an array, recurse into the field at the specified offset. if (llvm::ArrayType *ATy = dyn_cast(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = TD.getTypeAllocSize(EltTy); IROffset -= IROffset/EltSize*EltSize; return ContainsFloatAtOffset(EltTy, IROffset, TD); } return false; } /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the /// low 8 bytes of an XMM register, corresponding to the SSE class. llvm::Type *X86_64ABIInfo:: GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { // The only three choices we have are either double, <2 x float>, or float. We // pass as float if the last 4 bytes is just padding. This happens for // structs that contain 3 floats. if (BitsContainNoUserData(SourceTy, SourceOffset*8+32, SourceOffset*8+64, getContext())) return llvm::Type::getFloatTy(getVMContext()); // We want to pass as <2 x float> if the LLVM IR type contains a float at // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the // case. if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) && ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout())) return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); return llvm::Type::getDoubleTy(getVMContext()); } /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in /// an 8-byte GPR. This means that we either have a scalar or we are talking /// about the high or low part of an up-to-16-byte struct. This routine picks /// the best LLVM IR type to represent this, which may be i64 or may be anything /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, /// etc). /// /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for /// the source type. IROffset is an offset in bytes into the LLVM IR type that /// the 8-byte value references. PrefType may be null. /// /// SourceTy is the source-level type for the entire argument. SourceOffset is /// an offset into this that we're processing (which is always either 0 or 8). /// llvm::Type *X86_64ABIInfo:: GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { // If we're dealing with an un-offset LLVM IR type, then it means that we're // returning an 8-byte unit starting with it. See if we can safely use it. if (IROffset == 0) { // Pointers and int64's always fill the 8-byte unit. if ((isa(IRType) && Has64BitPointers) || IRType->isIntegerTy(64)) return IRType; // If we have a 1/2/4-byte integer, we can use it only if the rest of the // goodness in the source type is just tail padding. This is allowed to // kick in for struct {double,int} on the int, but not on // struct{double,int,int} because we wouldn't return the second int. We // have to do this analysis on the source type because we can't depend on // unions being lowered a specific way etc. if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || IRType->isIntegerTy(32) || (isa(IRType) && !Has64BitPointers)) { unsigned BitWidth = isa(IRType) ? 32 : cast(IRType)->getBitWidth(); if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, SourceOffset*8+64, getContext())) return IRType; } } if (llvm::StructType *STy = dyn_cast(IRType)) { // If this is a struct, recurse into the field at the specified offset. const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); if (IROffset < SL->getSizeInBytes()) { unsigned FieldIdx = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(FieldIdx); return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, SourceTy, SourceOffset); } } if (llvm::ArrayType *ATy = dyn_cast(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); unsigned EltOffset = IROffset/EltSize*EltSize; return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, SourceOffset); } // Okay, we don't have any better idea of what to pass, so we pass this in an // integer register that isn't too big to fit the rest of the struct. unsigned TySizeInBytes = (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); assert(TySizeInBytes != SourceOffset && "Empty field?"); // It is always safe to classify this as an integer type up to i64 that // isn't larger than the structure. return llvm::IntegerType::get(getVMContext(), std::min(TySizeInBytes-SourceOffset, 8U)*8); } /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally /// be used as elements of a two register pair to pass or return, return a /// first class aggregate to represent them. For example, if the low part of /// a by-value argument should be passed as i32* and the high part as float, /// return {i32*, float}. static llvm::Type * GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, const llvm::DataLayout &TD) { // In order to correctly satisfy the ABI, we need to the high part to start // at offset 8. If the high and low parts we inferred are both 4-byte types // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have // the second element at offset 8. Check for this: unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); unsigned HiAlign = TD.getABITypeAlignment(Hi); unsigned HiStart = llvm::RoundUpToAlignment(LoSize, HiAlign); assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); // To handle this, we have to increase the size of the low part so that the // second element will start at an 8 byte offset. We can't increase the size // of the second element because it might make us access off the end of the // struct. if (HiStart != 8) { // There are usually two sorts of types the ABI generation code can produce // for the low part of a pair that aren't 8 bytes in size: float or // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and // NaCl). // Promote these to a larger type. if (Lo->isFloatTy()) Lo = llvm::Type::getDoubleTy(Lo->getContext()); else { assert((Lo->isIntegerTy() || Lo->isPointerTy()) && "Invalid/unknown lo type"); Lo = llvm::Type::getInt64Ty(Lo->getContext()); } } llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && "Invalid x86-64 argument pair!"); return Result; } ABIArgInfo X86_64ABIInfo:: classifyReturnType(QualType RetTy) const { // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the // classification algorithm. X86_64ABIInfo::Class Lo, Hi; classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); // Check some invariants. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); llvm::Type *ResType = nullptr; switch (Lo) { case NoClass: if (Hi == NoClass) return ABIArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. assert((Hi == SSE || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; case SSEUp: case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via // hidden argument. case Memory: return getIndirectReturnResult(RetTy); // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next // available register of the sequence %rax, %rdx is used. case Integer: ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); // If we have a sign or zero extended integer, make sure to return Extend // so that the parameter gets the right LLVM IR attributes. if (Hi == NoClass && isa(ResType)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (RetTy->isIntegralOrEnumerationType() && RetTy->isPromotableIntegerType()) return ABIArgInfo::getExtend(); } break; // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next // available SSE register of the sequence %xmm0, %xmm1 is used. case SSE: ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); break; // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is // returned on the X87 stack in %st0 as 80-bit x87 number. case X87: ResType = llvm::Type::getX86_FP80Ty(getVMContext()); break; // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real // part of the value is returned in %st0 and the imaginary part in // %st1. case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), llvm::Type::getX86_FP80Ty(getVMContext()), nullptr); break; } llvm::Type *HighPart = nullptr; switch (Hi) { // Memory was handled previously and X87 should // never occur as a hi class. case Memory: case X87: llvm_unreachable("Invalid classification for hi word."); case ComplexX87: // Previously handled. case NoClass: break; case Integer: HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; case SSE: HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte // is passed in the next available eightbyte chunk if the last used // vector register. // // SSEUP should always be preceded by SSE, just widen. case SSEUp: assert(Lo == SSE && "Unexpected SSEUp classification."); ResType = GetByteVectorType(RetTy); break; // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is // returned together with the previous X87 value in %st0. case X87Up: // If X87Up is preceded by X87, we don't need to do // anything. However, in some cases with unions it may not be // preceded by X87. In such situations we follow gcc and pass the // extra bits in an SSE reg. if (Lo != X87) { HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); } break; } // If a high part was specified, merge it together with the low part. It is // known to pass in the high eightbyte of the result. We do this by forming a // first class struct aggregate with the high and low part: {low, high} if (HighPart) ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); return ABIArgInfo::getDirect(ResType); } ABIArgInfo X86_64ABIInfo::classifyArgumentType( QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const { Ty = useFirstFieldIfTransparentUnion(Ty); X86_64ABIInfo::Class Lo, Hi; classify(Ty, 0, Lo, Hi, isNamedArg); // Check some invariants. // FIXME: Enforce these by construction. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); neededInt = 0; neededSSE = 0; llvm::Type *ResType = nullptr; switch (Lo) { case NoClass: if (Hi == NoClass) return ABIArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. assert((Hi == SSE || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument // on the stack. case Memory: // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or // COMPLEX_X87, it is passed in memory. case X87: case ComplexX87: if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) ++neededInt; return getIndirectResult(Ty, freeIntRegs); case SSEUp: case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 // and %r9 is used. case Integer: ++neededInt; // Pick an 8-byte type based on the preferred type. ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); // If we have a sign or zero extended integer, make sure to return Extend // so that the parameter gets the right LLVM IR attributes. if (Hi == NoClass && isa(ResType)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); if (Ty->isIntegralOrEnumerationType() && Ty->isPromotableIntegerType()) return ABIArgInfo::getExtend(); } break; // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next // available SSE register is used, the registers are taken in the // order from %xmm0 to %xmm7. case SSE: { llvm::Type *IRType = CGT.ConvertType(Ty); ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); ++neededSSE; break; } } llvm::Type *HighPart = nullptr; switch (Hi) { // Memory was handled previously, ComplexX87 and X87 should // never occur as hi classes, and X87Up must be preceded by X87, // which is passed in memory. case Memory: case X87: case ComplexX87: llvm_unreachable("Invalid classification for hi word."); case NoClass: break; case Integer: ++neededInt; // Pick an 8-byte type based on the preferred type. HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; // X87Up generally doesn't occur here (long double is passed in // memory), except in situations involving unions. case X87Up: case SSE: HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); ++neededSSE; break; // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the // eightbyte is passed in the upper half of the last used SSE // register. This only happens when 128-bit vectors are passed. case SSEUp: assert(Lo == SSE && "Unexpected SSEUp classification"); ResType = GetByteVectorType(Ty); break; } // If a high part was specified, merge it together with the low part. It is // known to pass in the high eightbyte of the result. We do this by forming a // first class struct aggregate with the high and low part: {low, high} if (HighPart) ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); return ABIArgInfo::getDirect(ResType); } void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); // Keep track of the number of assigned registers. unsigned freeIntRegs = 6, freeSSERegs = 8; // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) --freeIntRegs; // The chain argument effectively gives us another free register. if (FI.isChainCall()) ++freeIntRegs; unsigned NumRequiredArgs = FI.getNumRequiredArgs(); // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers // get assigned (in left-to-right order) for passing as follows... unsigned ArgNo = 0; for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; unsigned neededInt, neededSSE; it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, neededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the // stack. If registers have already been assigned for some // eightbytes of such an argument, the assignments get reverted. if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) { freeIntRegs -= neededInt; freeSSERegs -= neededSSE; } else { it->info = getIndirectResult(it->type, freeIntRegs); } } } static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) { Address overflow_arg_area_p = CGF.Builder.CreateStructGEP( VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p"); llvm::Value *overflow_arg_area = CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 // byte boundary if alignment needed by type exceeds 8 byte boundary. // It isn't stated explicitly in the standard, but in practice we use // alignment greater than 16 where necessary. CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); if (Align > CharUnits::fromQuantity(8)) { overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, Align); } // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *Res = CGF.Builder.CreateBitCast(overflow_arg_area, llvm::PointerType::getUnqual(LTy)); // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: // l->overflow_arg_area + sizeof(type). // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to // an 8 byte boundary. uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset, "overflow_arg_area.next"); CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. return Address(Res, Align); } Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Assume that va_list type is correct; should be pointer to LLVM type: // struct { // i32 gp_offset; // i32 fp_offset; // i8* overflow_arg_area; // i8* reg_save_area; // }; unsigned neededInt, neededSSE; Ty = getContext().getCanonicalType(Ty); ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, /*isNamedArg*/false); // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed // in the registers. If not go to step 7. if (!neededInt && !neededSSE) return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of // general purpose registers needed to pass type and num_fp to hold // the number of floating point registers needed. // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into // registers. In the case: l->gp_offset > 48 - num_gp * 8 or // l->fp_offset > 304 - num_fp * 16 go to step 7. // // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of // register save space). llvm::Value *InRegs = nullptr; Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; if (neededInt) { gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(), "gp_offset_p"); gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); } if (neededSSE) { fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, CharUnits::fromQuantity(4), "fp_offset_p"); fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); llvm::Value *FitsInFP = llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; } llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); // Emit code to load the value if it was passed in registers. CGF.EmitBlock(InRegBlock); // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with // an offset of l->gp_offset and/or l->fp_offset. This may require // copying to a temporary location in case the parameter is passed // in different register classes or requires an alignment greater // than 8 for general purpose registers and 16 for XMM registers. // // FIXME: This really results in shameful code when we end up needing to // collect arguments from different places; often what should result in a // simple assembling of a structure from scattered addresses has many more // loads than necessary. Can we clean this up? llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(16)), "reg_save_area"); Address RegAddr = Address::invalid(); if (neededInt && neededSSE) { // FIXME: Cleanup. assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); llvm::StructType *ST = cast(AI.getCoerceToType()); Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); llvm::Type *TyLo = ST->getElementType(0); llvm::Type *TyHi = ST->getElementType(1); assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && "Unexpected ABI info for mixed regs"); llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset); llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset); llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; // Copy the first element. llvm::Value *V = CGF.Builder.CreateDefaultAlignedLoad( CGF.Builder.CreateBitCast(RegLoAddr, PTyLo)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); // Copy the second element. V = CGF.Builder.CreateDefaultAlignedLoad( CGF.Builder.CreateBitCast(RegHiAddr, PTyHi)); CharUnits Offset = CharUnits::fromQuantity( getDataLayout().getStructLayout(ST)->getElementOffset(1)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } else if (neededInt) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset), CharUnits::fromQuantity(8)); RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); // Copy to a temporary if necessary to ensure the appropriate alignment. std::pair SizeAlign = getContext().getTypeInfoInChars(Ty); uint64_t TySize = SizeAlign.first.getQuantity(); CharUnits TyAlign = SizeAlign.second; // Copy into a temporary if the type is more aligned than the // register save area. if (TyAlign.getQuantity() > 8) { Address Tmp = CGF.CreateMemTemp(Ty); CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); RegAddr = Tmp; } } else if (neededSSE == 1) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); } else { assert(neededSSE == 2 && "Invalid number of needed registers!"); // SSE registers are spaced 16 bytes apart in the register save // area, we need to collect the two eightbytes together. // The ABI isn't explicit about this, but it seems reasonable // to assume that the slots are 16-byte aligned, since the stack is // naturally 16-byte aligned and the prologue is expected to store // all the SSE registers to the RSA. Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); Address RegAddrHi = CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); llvm::Type *DoubleTy = CGF.DoubleTy; llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); V = CGF.Builder.CreateLoad( CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); V = CGF.Builder.CreateLoad( CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8))); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } // AMD64-ABI 3.5.7p5: Step 5. Set: // l->gp_offset = l->gp_offset + num_gp * 8 // l->fp_offset = l->fp_offset + num_fp * 16. if (neededInt) { llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), gp_offset_p); } if (neededSSE) { llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), fp_offset_p); } CGF.EmitBranch(ContBlock); // Emit code to load the value if it was passed in memory. CGF.EmitBlock(InMemBlock); Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); // Return the appropriate result. CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, "vaarg.addr"); return ResAddr; } Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); TypeInfo Info = getContext().getTypeInfo(Ty); uint64_t Width = Info.Width; CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); const RecordType *RT = Ty->getAs(); if (RT) { if (!IsReturnType) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } if (RT->getDecl()->hasFlexibleArrayMember()) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } // vectorcall adds the concept of a homogenous vector aggregate, similar to // other targets. const Type *Base = nullptr; uint64_t NumElts = 0; if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) { if (FreeSSERegs >= NumElts) { FreeSSERegs -= NumElts; if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. llvm::Type *LLTy = CGT.ConvertType(Ty); if (LLTy->isPointerTy() || LLTy->isIntegerTy()) return ABIArgInfo::getDirect(); } if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is // not 1, 2, 4, or 8 bytes, must be passed by reference." if (Width > 64 || !llvm::isPowerOf2_64(Width)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Otherwise, coerce it to a small integer. return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); } // Bool type is always extended to the ABI, other builtin types are not // extended. const BuiltinType *BT = Ty->getAs(); if (BT && BT->getKind() == BuiltinType::Bool) return ABIArgInfo::getExtend(); // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It // passes them indirectly through memory. if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::x87DoubleExtended) return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } return ABIArgInfo::getDirect(); } void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; // We can use up to 4 SSE return registers with vectorcall. unsigned FreeSSERegs = IsVectorCall ? 4 : 0; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); // We can use up to 6 SSE register parameters with vectorcall. FreeSSERegs = IsVectorCall ? 6 : 0; for (auto &I : FI.arguments()) I.info = classify(I.type, FreeSSERegs, false); } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } // PowerPC-32 namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { bool IsSoftFloatABI; public: PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI) : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { + const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs()) { // TODO: Implement this. For now ignore. (void)CTy; return Address::invalid(); } // struct __va_list_tag { // unsigned char gpr; // unsigned char fpr; // unsigned short reserved; // void *overflow_arg_area; // void *reg_save_area; // }; bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; bool isInt = Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType(); bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; // All aggregates are passed indirectly? That doesn't seem consistent // with the argument-lowering code. bool isIndirect = Ty->isAggregateType(); CGBuilderTy &Builder = CGF.Builder; // The calling convention either uses 1-2 GPRs or 1 FPR. Address NumRegsAddr = Address::invalid(); if (isInt || IsSoftFloatABI) { NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(), "gpr"); } else { NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(), "fpr"); } llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); // "Align" the register count when TY is i64. if (isI64 || (isF64 && IsSoftFloatABI)) { NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1)); NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U)); } llvm::Value *CC = - Builder.CreateICmpULT(NumRegs, Builder.getInt8(8), "cond"); + Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond"); llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs"); llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow"); llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); Builder.CreateCondBr(CC, UsingRegs, UsingOverflow); llvm::Type *DirectTy = CGF.ConvertType(Ty); if (isIndirect) DirectTy = DirectTy->getPointerTo(0); // Case 1: consume registers. Address RegAddr = Address::invalid(); { CGF.EmitBlock(UsingRegs); Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8)); RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CharUnits::fromQuantity(8)); assert(RegAddr.getElementType() == CGF.Int8Ty); // Floating-point registers start after the general-purpose registers. if (!(isInt || IsSoftFloatABI)) { RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr, CharUnits::fromQuantity(32)); } // Get the address of the saved value by scaling the number of // registers we've used by the number of CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8); llvm::Value *RegOffset = Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity())); RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset), RegAddr.getAlignment().alignmentOfArrayElement(RegSize)); RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy); // Increase the used-register count. NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1)); Builder.CreateStore(NumRegs, NumRegsAddr); CGF.EmitBranch(Cont); } // Case 2: consume space in the overflow area. Address MemAddr = Address::invalid(); { CGF.EmitBlock(UsingOverflow); + + Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr); // Everything in the overflow area is rounded up to a size of at least 4. CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4); CharUnits Size; if (!isIndirect) { auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign); } else { Size = CGF.getPointerSize(); } Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4)); Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); // Round up address of argument to alignment CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); if (Align > OverflowAreaAlign) { llvm::Value *Ptr = OverflowArea.getPointer(); OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), Align); } MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); // Increase the overflow area. OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size); Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr); CGF.EmitBranch(Cont); } CGF.EmitBlock(Cont); // Merge the cases with a phi. Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow, "vaarg.addr"); // Load the pointer if the argument was passed indirectly. if (isIndirect) { Result = Address(Builder.CreateLoad(Result, "aggr"), getContext().getTypeAlignInChars(Ty)); } return Result; } bool PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); // 0-31: r0-31, the 4-byte general-purpose registers AssignToArrayRange(Builder, Address, Four8, 0, 31); // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); // 64-76 are various 4-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap // 68-75 cr0-7 // 76: xer AssignToArrayRange(Builder, Address, Four8, 64, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); // 109: vrsave // 110: vscr // 111: spe_acc // 112: spefscr // 113: sfp AssignToArrayRange(Builder, Address, Four8, 109, 113); return false; } // PowerPC-64 namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. class PPC64_SVR4_ABIInfo : public DefaultABIInfo { public: enum ABIKind { ELFv1 = 0, ELFv2 }; private: static const unsigned GPRBits = 64; ABIKind Kind; bool HasQPX; // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and // will be passed in a QPX register. bool IsQPXVectorTy(const Type *Ty) const { if (!HasQPX) return false; if (const VectorType *VT = Ty->getAs()) { unsigned NumElements = VT->getNumElements(); if (NumElements == 1) return false; if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) { if (getContext().getTypeSize(Ty) <= 256) return true; } else if (VT->getElementType()-> isSpecificBuiltinType(BuiltinType::Float)) { if (getContext().getTypeSize(Ty) <= 128) return true; } } return false; } bool IsQPXVectorTy(QualType Ty) const { return IsQPXVectorTy(Ty.getTypePtr()); } public: PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX) : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; // TODO: We can add more logic to computeInfo to improve performance. // Example: For aggregate arguments that fit in a register, we could // use getDirectInReg (as is done below for structs containing a single // floating-point value) to avoid pushing them to memory on function // entry. This would require changing the logic in PPCISelLowering // when lowering the parameters in the caller and args in the callee. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) { // We rely on the default argument classification for the most part. // One exception: An aggregate containing a single floating-point // or vector item must be passed in a register if one is available. const Type *T = isSingleElementStruct(I.type, getContext()); if (T) { const BuiltinType *BT = T->getAs(); if (IsQPXVectorTy(T) || (T->isVectorType() && getContext().getTypeSize(T) == 128) || (BT && BT->isFloatingPoint())) { QualType QT(T, 0); I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT)); continue; } } I.info = classifyArgumentType(I.type); } } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX) : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo { public: PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // Return true if the ABI requires Ty to be passed sign- or zero- // extended to 64 bits. bool PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (Ty->isPromotableIntegerType()) return true; // In addition to the usual promotable integer types, we also need to // extend all 32-bit types, since the ABI requires promotion to 64 bits. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: break; } return false; } /// isAlignedParamType - Determine whether a type requires 16-byte or /// higher alignment in the parameter area. Always returns at least 8. CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { // Complex types are passed just like their elements. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); // Only vector types of size 16 bytes need alignment (larger types are // passed via reference, smaller types are not aligned). if (IsQPXVectorTy(Ty)) { if (getContext().getTypeSize(Ty) > 128) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } else if (Ty->isVectorType()) { return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); } // For single-element float/vector structs, we consider the whole type // to have the same alignment requirements as its single element. const Type *AlignAsType = nullptr; const Type *EltType = isSingleElementStruct(Ty, getContext()); if (EltType) { const BuiltinType *BT = EltType->getAs(); if (IsQPXVectorTy(EltType) || (EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || (BT && BT->isFloatingPoint())) AlignAsType = EltType; } // Likewise for ELFv2 homogeneous aggregates. const Type *Base = nullptr; uint64_t Members = 0; if (!AlignAsType && Kind == ELFv2 && isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members)) AlignAsType = Base; // With special case aggregates, only vector base types need alignment. if (AlignAsType && IsQPXVectorTy(AlignAsType)) { if (getContext().getTypeSize(AlignAsType) > 128) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } else if (AlignAsType) { return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8); } // Otherwise, we only need alignment for any aggregate type that // has an alignment requirement of >= 16 bytes. if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) { if (HasQPX && getContext().getTypeAlign(Ty) >= 256) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } return CharUnits::fromQuantity(8); } /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous /// aggregate. Base is set to the base element type, and Members is set /// to the number of base elements. bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, uint64_t &Members) const { if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { uint64_t NElements = AT->getSize().getZExtValue(); if (NElements == 0) return false; if (!isHomogeneousAggregate(AT->getElementType(), Base, Members)) return false; Members *= NElements; } else if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; Members = 0; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(getContext(), I.getType(), true)) continue; uint64_t FldMembers; if (!isHomogeneousAggregate(I.getType(), Base, FldMembers)) return false; Members += FldMembers; } } for (const auto *FD : RD->fields()) { // Ignore (non-zero arrays of) empty records. QualType FT = FD->getType(); while (const ConstantArrayType *AT = getContext().getAsConstantArrayType(FT)) { if (AT->getSize().getZExtValue() == 0) return false; FT = AT->getElementType(); } if (isEmptyRecord(getContext(), FT, true)) continue; // For compatibility with GCC, ignore empty bitfields in C++ mode. if (getContext().getLangOpts().CPlusPlus && FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) continue; uint64_t FldMembers; if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers)) return false; Members = (RD->isUnion() ? std::max(Members, FldMembers) : Members + FldMembers); } if (!Base) return false; // Ensure there is no padding. if (getContext().getTypeSize(Base) * Members != getContext().getTypeSize(Ty)) return false; } else { Members = 1; if (const ComplexType *CT = Ty->getAs()) { Members = 2; Ty = CT->getElementType(); } // Most ABIs only support float, double, and some vector type widths. if (!isHomogeneousAggregateBaseType(Ty)) return false; // The base type must be the same for all members. Types that // agree in both total size and mode (float vs. vector) are // treated as being equivalent here. const Type *TyPtr = Ty.getTypePtr(); if (!Base) Base = TyPtr; if (Base->isVectorType() != TyPtr->isVectorType() || getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) return false; } return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members); } bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for ELFv2 must have base types of float, // double, long double, or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble) return true; } if (const VectorType *VT = Ty->getAs()) { if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty)) return true; } return false; } bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( const Type *Base, uint64_t Members) const { // Vector types require one register, floating point types require one // or two registers depending on their size. uint32_t NumRegs = Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64; // Homogeneous Aggregates may occupy at most 8 registers. return Members * NumRegs <= 8; } ABIArgInfo PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes) // or via reference (larger than 16 bytes). if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); if (Size > 128) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); else if (Size < 128) { llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity(); uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); // ELFv2 homogeneous aggregates are passed as array types. const Type *Base = nullptr; uint64_t Members = 0; if (Kind == ELFv2 && isHomogeneousAggregate(Ty, Base, Members)) { llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); return ABIArgInfo::getDirect(CoerceTy); } // If an aggregate may end up fully in registers, we do not // use the ByVal method, but pass the aggregate as array. // This is usually beneficial since we avoid forcing the // back-end to store the argument to memory. uint64_t Bits = getContext().getTypeSize(Ty); if (Bits > 0 && Bits <= 8 * GPRBits) { llvm::Type *CoerceTy; // Types up to 8 bytes are passed as integer type (which will be // properly aligned in the argument save area doubleword). if (Bits <= GPRBits) CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::RoundUpToAlignment(Bits, 8)); // Larger types are passed as arrays, with the base type selected // according to the required alignment in the save area. else { uint64_t RegBits = ABIAlign * 8; uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits; llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); } return ABIArgInfo::getDirect(CoerceTy); } // All other aggregates are passed ByVal. return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes) // or via reference (larger than 16 bytes). if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) { uint64_t Size = getContext().getTypeSize(RetTy); if (Size > 128) return getNaturalAlignIndirect(RetTy); else if (Size < 128) { llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } if (isAggregateTypeForABI(RetTy)) { // ELFv2 homogeneous aggregates are returned as array types. const Type *Base = nullptr; uint64_t Members = 0; if (Kind == ELFv2 && isHomogeneousAggregate(RetTy, Base, Members)) { llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); return ABIArgInfo::getDirect(CoerceTy); } // ELFv2 small aggregates are returned in up to two registers. uint64_t Bits = getContext().getTypeSize(RetTy); if (Kind == ELFv2 && Bits <= 2 * GPRBits) { if (Bits == 0) return ABIArgInfo::getIgnore(); llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr); } else CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::RoundUpToAlignment(Bits, 8)); return ABIArgInfo::getDirect(CoerceTy); } // All other aggregates are returned indirectly. return getNaturalAlignIndirect(RetTy); } return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { auto TypeInfo = getContext().getTypeInfoInChars(Ty); TypeInfo.second = getParamTypeAlignment(Ty); CharUnits SlotSize = CharUnits::fromQuantity(8); // If we have a complex type and the base type is smaller than 8 bytes, // the ABI calls for the real and imaginary parts to be right-adjusted // in separate doublewords. However, Clang expects us to produce a // pointer to a structure with the two parts packed tightly. So generate // loads of the real and imaginary parts relative to the va_list pointer, // and store them to a temporary structure. if (const ComplexType *CTy = Ty->getAs()) { CharUnits EltSize = TypeInfo.first / 2; if (EltSize < SlotSize) { Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2, SlotSize, SlotSize, /*AllowHigher*/ true); Address RealAddr = Addr; Address ImagAddr = RealAddr; if (CGF.CGM.getDataLayout().isBigEndian()) { RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize); ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, 2 * SlotSize - EltSize); } else { ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); } llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy); ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy); llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), /*init*/ true); return Temp; } } // Otherwise, just use the general rule. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true); } static bool PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); // 0-31: r0-31, the 8-byte general-purpose registers AssignToArrayRange(Builder, Address, Eight8, 0, 31); // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); // 64-76 are various 4-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap // 68-75 cr0-7 // 76: xer AssignToArrayRange(Builder, Address, Four8, 64, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); // 109: vrsave // 110: vscr // 111: spe_acc // 112: spefscr // 113: sfp AssignToArrayRange(Builder, Address, Four8, 109, 113); return false; } bool PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC64_initDwarfEHRegSizeTable(CGF, Address); } bool PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC64_initDwarfEHRegSizeTable(CGF, Address); } //===----------------------------------------------------------------------===// // AArch64 ABI Implementation //===----------------------------------------------------------------------===// namespace { class AArch64ABIInfo : public ABIInfo { public: enum ABIKind { AAPCS = 0, DarwinPCS }; private: ABIKind Kind; public: AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {} private: ABIKind getABIKind() const { return Kind; } bool isDarwinPCS() const { return Kind == DarwinPCS; } ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &it : FI.arguments()) it.info = classifyArgumentType(it.type); } Address EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const; Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 31; } bool doesReturnSlotInterfereWithArgs() const override { return false; } }; } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { llvm::Type *ResType = llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() && isDarwinPCS() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } // Structures with either a non-trivial destructor or a non-trivial // copy constructor are always indirect. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory); } // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. if (isEmptyRecord(getContext(), Ty, true)) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { return ABIArgInfo::getDirect( llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { unsigned Alignment = getContext().getTypeAlign(Ty); Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); } return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() && isDarwinPCS() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { unsigned Alignment = getContext().getTypeAlign(RetTy); Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); } return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVectorType - check whether the vector type is legal for AArch64. bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs()) { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2 between 1 and 16. if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16) return true; return Size != 64 && (Size != 128 || NumElements == 1); } return false; } bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, // but with the difference that any floating-point type is allowed, // including __fp16. if (const BuiltinType *BT = Ty->getAs()) { if (BT->isFloatingPoint()) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); llvm::Type *BaseTy = CGF.ConvertType(Ty); if (IsIndirect) BaseTy = llvm::PointerType::getUnqual(BaseTy); else if (AI.getCoerceToType()) BaseTy = AI.getCoerceToType(); unsigned NumRegs = 1; if (llvm::ArrayType *ArrTy = dyn_cast(BaseTy)) { BaseTy = ArrTy->getElementType(); NumRegs = ArrTy->getNumElements(); } bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: // // struct { // void *__stack; // void *__gr_top; // void *__vr_top; // int __gr_offs; // int __vr_offs; // }; llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); auto TyInfo = getContext().getTypeInfoInChars(Ty); CharUnits TyAlign = TyInfo.second; Address reg_offs_p = Address::invalid(); llvm::Value *reg_offs = nullptr; int reg_top_index; CharUnits reg_top_offset; int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity(); if (!IsFPR) { // 3 is the field number of __gr_offs reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top reg_top_offset = CharUnits::fromQuantity(8); RegSize = llvm::RoundUpToAlignment(RegSize, 8); } else { // 4 is the field number of __vr_offs. reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, CharUnits::fromQuantity(28), "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top reg_top_offset = CharUnits::fromQuantity(16); RegSize = 16 * NumRegs; } //======================================= // Find out where argument was passed //======================================= // If reg_offs >= 0 we're already using the stack for this type of // argument. We don't want to keep updating reg_offs (in case it overflows, // though anyone passing 2GB of arguments, each at most 16 bytes, deserves // whatever they get). llvm::Value *UsingStack = nullptr; UsingStack = CGF.Builder.CreateICmpSGE( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0)); CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); // Otherwise, at least some kind of argument could go in these registers, the // question is whether this particular type is too big. CGF.EmitBlock(MaybeRegBlock); // Integer arguments may need to correct register alignment (for example a // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we // align __gr_offs to calculate the potential address. if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) { int Align = TyAlign.getQuantity(); reg_offs = CGF.Builder.CreateAdd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), "align_regoffs"); reg_offs = CGF.Builder.CreateAnd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align), "aligned_regoffs"); } // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. // The fact that this is done unconditionally reflects the fact that // allocating an argument to the stack also uses up all the remaining // registers of the appropriate kind. llvm::Value *NewOffset = nullptr; NewOffset = CGF.Builder.CreateAdd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs"); CGF.Builder.CreateStore(NewOffset, reg_offs_p); // Now we're in a position to decide whether this argument really was in // registers or not. llvm::Value *InRegs = nullptr; InRegs = CGF.Builder.CreateICmpSLE( NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); //======================================= // Argument was in registers //======================================= // Now we emit the code for if the argument was originally passed in // registers. First start the appropriate block: CGF.EmitBlock(InRegBlock); llvm::Value *reg_top = nullptr; Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, reg_top_offset, "reg_top_p"); reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs), CharUnits::fromQuantity(IsFPR ? 16 : 8)); Address RegAddr = Address::invalid(); llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty); if (IsIndirect) { // If it's been passed indirectly (actually a struct), whatever we find from // stored registers or on the stack will actually be a struct **. MemTy = llvm::PointerType::getUnqual(MemTy); } const Type *Base = nullptr; uint64_t NumMembers = 0; bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); if (IsHFA && NumMembers > 1) { // Homogeneous aggregates passed in registers will have their elements split // and stored 16-bytes apart regardless of size (they're notionally in qN, // qN+1, ...). We reload and store into a temporary local variable // contiguously. assert(!IsIndirect && "Homogeneous aggregates should be passed directly"); auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0)); llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); Address Tmp = CGF.CreateTempAlloca(HFATy, std::max(TyAlign, BaseTyInfo.second)); // On big-endian platforms, the value will be right-aligned in its slot. int Offset = 0; if (CGF.CGM.getDataLayout().isBigEndian() && BaseTyInfo.first.getQuantity() < 16) Offset = 16 - BaseTyInfo.first.getQuantity(); for (unsigned i = 0; i < NumMembers; ++i) { CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset); Address LoadAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy); Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i, BaseTyInfo.first); llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); CGF.Builder.CreateStore(Elem, StoreAddr); } RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy); } else { // Otherwise the object is contiguous in memory. // It might be right-aligned in its slot. CharUnits SlotSize = BaseAddr.getAlignment(); if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && (IsHFA || !isAggregateTypeForABI(Ty)) && TyInfo.first < SlotSize) { CharUnits Offset = SlotSize - TyInfo.first; BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); } RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy); } CGF.EmitBranch(ContBlock); //======================================= // Argument was on the stack //======================================= CGF.EmitBlock(OnStackBlock); Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(), "stack_p"); llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); // Again, stack arguments may need realignment. In this case both integer and // floating-point ones might be affected. if (!IsIndirect && TyAlign.getQuantity() > 8) { int Align = TyAlign.getQuantity(); OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty); OnStackPtr = CGF.Builder.CreateAdd( OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), "align_stack"); OnStackPtr = CGF.Builder.CreateAnd( OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align), "align_stack"); OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy); } Address OnStackAddr(OnStackPtr, std::max(CharUnits::fromQuantity(8), TyAlign)); // All stack slots are multiples of 8 bytes. CharUnits StackSlotSize = CharUnits::fromQuantity(8); CharUnits StackSize; if (IsIndirect) StackSize = StackSlotSize; else StackSize = TyInfo.first.RoundUpToAlignment(StackSlotSize); llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(OnStackPtr, StackSizeC, "new_stack"); // Write the new value of __stack for the next call to va_arg CGF.Builder.CreateStore(NewStack, stack_p); if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && TyInfo.first < StackSlotSize) { CharUnits Offset = StackSlotSize - TyInfo.first; OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); } OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy); CGF.EmitBranch(ContBlock); //======================================= // Tidy up //======================================= CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr, OnStackBlock, "vaargs.addr"); if (IsIndirect) return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), TyInfo.second); return ResAddr; } Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { // The backend's lowering doesn't support va_arg for aggregates or // illegal vector types. Lower VAArg here for these cases and use // the LLVM va_arg instruction for everything else. if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) return Address::invalid(); CharUnits SlotSize = CharUnits::fromQuantity(8); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } // The size of the actual thing passed, which might end up just // being a pointer for indirect types. auto TyInfo = getContext().getTypeInfoInChars(Ty); // Arguments bigger than 16 bytes which aren't homogeneous // aggregates should be passed indirectly. bool IsIndirect = false; if (TyInfo.first.getQuantity() > 16) { const Type *Base = nullptr; uint64_t Members = 0; IsIndirect = !isHomogeneousAggregate(Ty, Base, Members); } return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// namespace { class ARMABIInfo : public ABIInfo { public: enum ABIKind { APCS = 0, AAPCS = 1, AAPCS_VFP = 2, AAPCS16_VFP = 3, }; private: ABIKind Kind; public: ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) { setCCs(); } bool isEABI() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::EABI: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: return true; default: return false; } } bool isEABIHF() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: return true; default: return false; } } bool isAndroid() const { return (getTarget().getTriple().getEnvironment() == llvm::Triple::Android); } ABIKind getABIKind() const { return Kind; } private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; bool isIllegalVectorType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) :TargetCodeGenInfo(new ARMABIInfo(CGT, K)) {} const ARMABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 13; } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-15 are the 16 integer registers. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15); return false; } unsigned getSizeOfUnwindException() const override { if (getABIInfo().isEABI()) return 88; return TargetCodeGenInfo::getSizeOfUnwindException(); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; const ARMInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case ARMInterruptAttr::Generic: Kind = ""; break; case ARMInterruptAttr::IRQ: Kind = "IRQ"; break; case ARMInterruptAttr::FIQ: Kind = "FIQ"; break; case ARMInterruptAttr::SWI: Kind = "SWI"; break; case ARMInterruptAttr::ABORT: Kind = "ABORT"; break; case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; } llvm::Function *Fn = cast(GV); Fn->addFnAttr("interrupt", Kind); ARMABIInfo::ABIKind ABI = cast(getABIInfo()).getABIKind(); if (ABI == ARMABIInfo::APCS) return; // AAPCS guarantees that sp will be 8-byte aligned on any public interface, // however this is not necessarily true on taking any interrupt. Instruct // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); Fn->addAttributes(llvm::AttributeSet::FunctionIndex, llvm::AttributeSet::get(CGM.getLLVMContext(), llvm::AttributeSet::FunctionIndex, B)); } }; class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo { public: WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); addStackProbeSizeTargetAttribute(D, GV, CGM); } } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, FI.isVariadic()); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; llvm::CallingConv::ID cc = getRuntimeCC(); if (cc != llvm::CallingConv::C) FI.setEffectiveCallingConvention(cc); } /// Return the default calling convention that LLVM will use. llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { // The default calling convention that LLVM will infer. if (isEABIHF() || getTarget().getTriple().isWatchOS()) return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) return llvm::CallingConv::ARM_AAPCS; else return llvm::CallingConv::ARM_APCS; } /// Return the calling convention that our ABI would like us to use /// as the C calling convention. llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { switch (getABIKind()) { case APCS: return llvm::CallingConv::ARM_APCS; case AAPCS: return llvm::CallingConv::ARM_AAPCS; case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; } llvm_unreachable("bad ABI kind"); } void ARMABIInfo::setCCs() { assert(getRuntimeCC() == llvm::CallingConv::C); // Don't muddy up the IR with a ton of explicit annotations if // they'd just match what LLVM will infer from the triple. llvm::CallingConv::ID abiCC = getABIDefaultCC(); if (abiCC != getLLVMDefaultCC()) RuntimeCC = abiCC; // AAPCS apparently requires runtime support functions to be soft-float, but // that's almost certainly for historic reasons (Thumb1 not supporting VFP // most likely). It's more convenient for AAPCS16_VFP to be hard-float. switch (getABIKind()) { case APCS: case AAPCS16_VFP: if (abiCC != getLLVMDefaultCC()) BuiltinCC = abiCC; break; case AAPCS: case AAPCS_VFP: BuiltinCC = llvm::CallingConv::ARM_AAPCS; break; } } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic; Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64) { llvm::Type *ResType = llvm::VectorType::get( llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { llvm::Type *ResType = llvm::VectorType::get( llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } // __fp16 gets passed as if it were an int or float, but with the top 16 bits // unspecified. This is not done for OpenCL as it handles the half type // natively, and does not need to interwork with AAPCS code. if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) { Ty = EnumTy->getDecl()->getIntegerType(); } return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); if (IsEffectivelyAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Base can be a floating-point or a vector. return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // WatchOS does have homogeneous aggregates. Note that we intentionally use // this convention even for a variadic function: the backend will use GPRs // if needed. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); llvm::Type *Ty = llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } if (getABIKind() == ARMABIInfo::AAPCS16_VFP && getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) { // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're // bigger than 128-bits, they get placed in space allocated by the caller, // and a pointer is passed. return ABIArgInfo::getIndirect( CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); } // Support byval for ARM. // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at // most 8-byte. We realign the indirect argument if type alignment is bigger // than ABI alignment. uint64_t ABIAlign = 4; uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8; if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval"); return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; // FIXME: Try to match the types of the arguments more accurately where // we can. if (getContext().getTypeAlign(Ty) <= 32) { ElemTy = llvm::Type::getInt32Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; } else { ElemTy = llvm::Type::getInt64Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; } return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs)); } static bool isIntegerLikeType(QualType Ty, ASTContext &Context, llvm::LLVMContext &VMContext) { // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure // is called integer-like if its size is less than or equal to one word, and // the offset of each of its addressable sub-fields is zero. uint64_t Size = Context.getTypeSize(Ty); // Check that the type fits in a word. if (Size > 32) return false; // FIXME: Handle vector types! if (Ty->isVectorType()) return false; // Float types are never treated as "integer like". if (Ty->isRealFloatingType()) return false; // If this is a builtin or pointer type then it is ok. if (Ty->getAs() || Ty->isPointerType()) return true; // Small complex integer types are "integer like". if (const ComplexType *CT = Ty->getAs()) return isIntegerLikeType(CT->getElementType(), Context, VMContext); // Single element and zero sized arrays should be allowed, by the definition // above, but they are not. // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // Ignore records with flexible arrays. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // Check that all sub-fields are at offset 0, and are themselves "integer // like". const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); bool HadField = false; unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const FieldDecl *FD = *i; // Bit-fields are not addressable, we only need to verify they are "integer // like". We still have to disallow a subsequent non-bitfield, for example: // struct { int : 0; int x } // is non-integer like according to gcc. if (FD->isBitField()) { if (!RD->isUnion()) HadField = true; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; continue; } // Check if this field is at offset 0. if (Layout.getFieldOffset(idx) != 0) return false; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; // Only allow at most one field in a structure. This doesn't match the // wording above, but follows gcc in situations with a field following an // empty structure. if (!RD->isUnion()) { if (HadField) return false; HadField = true; } } return true; } ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic) const { bool IsEffectivelyAAPCS_VFP = (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic; if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) { return getNaturalAlignIndirect(RetTy); } // __fp16 gets returned as if it were an int or float, but with the top 16 // bits unspecified. This is not done for OpenCL as it handles the half type // natively, and does not need to interwork with AAPCS code. if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) { llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect(); } // Are we following APCS? if (getABIKind() == APCS) { if (isEmptyRecord(getContext(), RetTy, false)) return ABIArgInfo::getIgnore(); // Complex types are all returned as packed integers. // // FIXME: Consider using 2 x vector types if the back end handles them // correctly. if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(llvm::IntegerType::get( getVMContext(), getContext().getTypeSize(RetTy))); // Integer like structures are returned in r0. if (isIntegerLikeType(RetTy, getContext(), getVMContext())) { // Return in the smallest viable integer type. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } // Otherwise return in memory. return getNaturalAlignIndirect(RetTy); } // Otherwise this is an AAPCS variant. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. if (IsEffectivelyAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) { assert(Base && "Base class should be set for homogeneous aggregate"); // Homogeneous Aggregates are returned directly. return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } } // Aggregates <= 4 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); // Return in the smallest viable integer type. if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); llvm::Type *CoerceTy = llvm::ArrayType::get(Int32Ty, llvm::RoundUpToAlignment(Size, 32) / 32); return ABIArgInfo::getDirect(CoerceTy); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs ()) { if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different // vector ABI. The primary differences were that 3-element vector types // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path // accepts that legacy behavior for Android only. // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); // NumElements should be power of 2 or equal to 3. if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3) return true; } else { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; // Size should be greater than 32 bits. return Size <= 32; } } return false; } bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(4); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } auto TyInfo = getContext().getTypeInfoInChars(Ty); CharUnits TyAlignForABI = TyInfo.second; // Use indirect if size of the illegal vector is bigger than 16 bytes. bool IsIndirect = false; const Type *Base = nullptr; uint64_t Members = 0; if (TyInfo.first > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { IsIndirect = true; // ARMv7k passes structs bigger than 16 bytes indirectly, in space // allocated by the caller. } else if (TyInfo.first > CharUnits::fromQuantity(16) && getABIKind() == ARMABIInfo::AAPCS16_VFP && !isHomogeneousAggregate(Ty, Base, Members)) { IsIndirect = true; // Otherwise, bound the type's ABI alignment. // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. // Our callers should be prepared to handle an under-aligned address. } else if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) { TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8)); } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // ARMv7k allows type alignment up to 16 bytes. TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16)); } else { TyAlignForABI = CharUnits::fromQuantity(4); } TyInfo.second = TyAlignForABI; return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// namespace { class NVPTXABIInfo : public ABIInfo { public: NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); }; ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // note: this is different from default ABI if (!RetTy->isScalarType()) return ABIArgInfo::getDirect(); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Return aggregates type as indirect by value if (isAggregateTypeForABI(Ty)) return getNaturalAlignIndirect(Ty, /* byval */ true); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; FI.setEffectiveCallingConvention(getRuntimeCC()); } Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { llvm_unreachable("NVPTX does not support varargs"); } void NVPTXTargetCodeGenInfo:: setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const{ const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); // Perform special handling in OpenCL mode if (M.getLangOpts().OpenCL) { // Use OpenCL function attributes to check for kernel functions // By default, all functions are device functions if (FD->hasAttr()) { // OpenCL __kernel functions get kernel metadata // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } } // Perform special handling in CUDA mode. if (M.getLangOpts().CUDA) { // CUDA __global__ functions get a kernel metadata entry. Since // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. if (FD->hasAttr()) { // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) { // Create !{, metadata !"maxntidx", i32 } node llvm::APSInt MaxThreads(32); MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); if (MaxThreads > 0) addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was // not specified in __launch_bounds__ or if the user specified a 0 value, // we don't have to add a PTX directive. if (Attr->getMinBlocks()) { llvm::APSInt MinBlocks(32); MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); if (MinBlocks > 0) // Create !{, metadata !"minctasm", i32 } node addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); } } } } void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand) { llvm::Module *M = F->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } //===----------------------------------------------------------------------===// // SystemZ ABI Implementation //===----------------------------------------------------------------------===// namespace { class SystemZABIInfo : public ABIInfo { bool HasVector; public: SystemZABIInfo(CodeGenTypes &CGT, bool HV) : ABIInfo(CGT), HasVector(HV) {} bool isPromotableIntegerType(QualType Ty) const; bool isCompoundType(QualType Ty) const; bool isVectorArgumentType(QualType Ty) const; bool isFPArgumentType(QualType Ty) const; QualType GetSingleElementType(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType ArgTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { public: SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector) : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector)) {} }; } bool SystemZABIInfo::isPromotableIntegerType(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (Ty->isPromotableIntegerType()) return true; // 32-bit values must also be promoted. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: return false; } return false; } bool SystemZABIInfo::isCompoundType(QualType Ty) const { return (Ty->isAnyComplexType() || Ty->isVectorType() || isAggregateTypeForABI(Ty)); } bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { return (HasVector && Ty->isVectorType() && getContext().getTypeSize(Ty) <= 128); } bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Float: case BuiltinType::Double: return true; default: return false; } return false; } QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { if (const RecordType *RT = Ty->getAsStructureType()) { const RecordDecl *RD = RT->getDecl(); QualType Found; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) { QualType Base = I.getType(); // Empty bases don't affect things either way. if (isEmptyRecord(getContext(), Base, true)) continue; if (!Found.isNull()) return Ty; Found = GetSingleElementType(Base); } // Check the fields. for (const auto *FD : RD->fields()) { // For compatibility with GCC, ignore empty bitfields in C++ mode. // Unlike isSingleElementStruct(), empty structure and array fields // do count. So do anonymous bitfields that aren't zero-sized. if (getContext().getLangOpts().CPlusPlus && FD->isBitField() && FD->getBitWidthValue(getContext()) == 0) continue; // Unlike isSingleElementStruct(), arrays do not count. // Nested structures still do though. if (!Found.isNull()) return Ty; Found = GetSingleElementType(FD->getType()); } // Unlike isSingleElementStruct(), trailing padding is allowed. // An 8-byte aligned struct s { float f; } is passed as a double. if (!Found.isNull()) return Found; } return Ty; } Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Assume that va_list type is correct; should be pointer to LLVM type: // struct { // i64 __gpr; // i64 __fpr; // i8 *__overflow_arg_area; // i8 *__reg_save_area; // }; // Every non-vector argument occupies 8 bytes and is passed by preference // in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are // always passed on the stack. Ty = getContext().getCanonicalType(Ty); auto TyInfo = getContext().getTypeInfoInChars(Ty); llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty); llvm::Type *DirectTy = ArgTy; ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); bool InFPRs = false; bool IsVector = false; CharUnits UnpaddedSize; CharUnits DirectAlign; if (IsIndirect) { DirectTy = llvm::PointerType::getUnqual(DirectTy); UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8); } else { if (AI.getCoerceToType()) ArgTy = AI.getCoerceToType(); InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy(); IsVector = ArgTy->isVectorTy(); UnpaddedSize = TyInfo.first; DirectAlign = TyInfo.second; } CharUnits PaddedSize = CharUnits::fromQuantity(8); if (IsVector && UnpaddedSize > PaddedSize) PaddedSize = CharUnits::fromQuantity(16); assert((UnpaddedSize <= PaddedSize) && "Invalid argument size."); CharUnits Padding = (PaddedSize - UnpaddedSize); llvm::Type *IndexTy = CGF.Int64Ty; llvm::Value *PaddedSizeV = llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity()); if (IsVector) { // Work out the address of a vector argument on the stack. // Vector arguments are always passed in the high bits of a // single (8 byte) or double (16 byte) stack slot. Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), TyInfo.second); Address MemAddr = CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr"); // Update overflow_arg_area_ptr pointer llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV, "overflow_arg_area"); CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); return MemAddr; } assert(PaddedSize.getQuantity() == 8); unsigned MaxRegs, RegCountField, RegSaveIndex; CharUnits RegPadding; if (InFPRs) { MaxRegs = 4; // Maximum of 4 FPR arguments RegCountField = 1; // __fpr RegSaveIndex = 16; // save offset for f0 RegPadding = CharUnits(); // floats are passed in the high bits of an FPR } else { MaxRegs = 5; // Maximum of 5 GPR arguments RegCountField = 0; // __gpr RegSaveIndex = 2; // save offset for r2 RegPadding = Padding; // values are passed in the low bits of a GPR } Address RegCountPtr = CGF.Builder.CreateStructGEP( VAListAddr, RegCountField, RegCountField * CharUnits::fromQuantity(8), "reg_count_ptr"); llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, "fits_in_regs"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); // Emit code to load the value if it was passed in registers. CGF.EmitBlock(InRegBlock); // Work out the address of an argument register. llvm::Value *ScaledRegCount = CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count"); llvm::Value *RegBase = llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity() + RegPadding.getQuantity()); llvm::Value *RegOffset = CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); Address RegSaveAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), "reg_save_area_ptr"); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset, "raw_reg_addr"), PaddedSize); Address RegAddr = CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr"); // Update the register count llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1); llvm::Value *NewRegCount = CGF.Builder.CreateAdd(RegCount, One, "reg_count"); CGF.Builder.CreateStore(NewRegCount, RegCountPtr); CGF.EmitBranch(ContBlock); // Emit code to load the value if it was passed in memory. CGF.EmitBlock(InMemBlock); // Work out the address of a stack argument. Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP( VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), PaddedSize); Address RawMemAddr = CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr"); Address MemAddr = CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr"); // Update overflow_arg_area_ptr pointer llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV, "overflow_arg_area"); CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); CGF.EmitBranch(ContBlock); // Return the appropriate result. CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, "va_arg.addr"); if (IsIndirect) ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), TyInfo.second); return ResAddr; } ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isVectorArgumentType(RetTy)) return ABIArgInfo::getDirect(); if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { // Handle the generic C++ ABI. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Integers and enums are extended to full register width. if (isPromotableIntegerType(Ty)) return ABIArgInfo::getExtend(); // Handle vector types and vector-like structure types. Note that // as opposed to float-like structure types, we do not allow any // padding for vector-like structures, so verify the sizes match. uint64_t Size = getContext().getTypeSize(Ty); QualType SingleElementTy = GetSingleElementType(Ty); if (isVectorArgumentType(SingleElementTy) && getContext().getTypeSize(SingleElementTy) == Size) return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy)); // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly. if (Size != 8 && Size != 16 && Size != 32 && Size != 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Handle small structures. if (const RecordType *RT = Ty->getAs()) { // Structures with flexible arrays have variable length, so really // fail the size test above. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // The structure is passed as an unextended integer, a float, or a double. llvm::Type *PassTy; if (isFPArgumentType(SingleElementTy)) { assert(Size == 32 || Size == 64); if (Size == 32) PassTy = llvm::Type::getFloatTy(getVMContext()); else PassTy = llvm::Type::getDoubleTy(getVMContext()); } else PassTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(PassTy); } // Non-structure compounds are passed indirectly. if (isCompoundType(Ty)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); return ABIArgInfo::getDirect(nullptr); } //===----------------------------------------------------------------------===// // MSP430 ABI Implementation //===----------------------------------------------------------------------===// namespace { class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; } void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (const MSP430InterruptAttr *attr = FD->getAttr()) { // Handle 'interrupt' attribute: llvm::Function *F = cast(GV); // Step 1: Set ISR calling convention. F->setCallingConv(llvm::CallingConv::MSP430_INTR); // Step 2: Add attributes goodness. F->addFnAttr(llvm::Attribute::NoInline); // Step 3: Emit ISR vector alias. unsigned Num = attr->getNumber() / 2; llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, "__isr_" + Twine(Num), F); } } } //===----------------------------------------------------------------------===// // MIPS ABI Implementation. This works for both little-endian and // big-endian variants. //===----------------------------------------------------------------------===// namespace { class MipsABIInfo : public ABIInfo { bool IsO32; unsigned MinABIStackAlignInBytes, StackAlignInBytes; void CoerceToIntArgs(uint64_t TySize, SmallVectorImpl &ArgList) const; llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const; llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; public: MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) : ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8), StackAlignInBytes(IsO32 ? 8 : 16) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldSignExtUnsignedType(QualType Ty) const override; }; class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { unsigned SizeOfUnwindException; public: MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)), SizeOfUnwindException(IsO32 ? 24 : 32) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 29; } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *Fn = cast(GV); if (FD->hasAttr()) { Fn->addFnAttr("mips16"); } else if (FD->hasAttr()) { Fn->addFnAttr("nomips16"); } const MipsInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case MipsInterruptAttr::eic: Kind = "eic"; break; case MipsInterruptAttr::sw0: Kind = "sw0"; break; case MipsInterruptAttr::sw1: Kind = "sw1"; break; case MipsInterruptAttr::hw0: Kind = "hw0"; break; case MipsInterruptAttr::hw1: Kind = "hw1"; break; case MipsInterruptAttr::hw2: Kind = "hw2"; break; case MipsInterruptAttr::hw3: Kind = "hw3"; break; case MipsInterruptAttr::hw4: Kind = "hw4"; break; case MipsInterruptAttr::hw5: Kind = "hw5"; break; } Fn->addFnAttr("interrupt", Kind); } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; unsigned getSizeOfUnwindException() const override { return SizeOfUnwindException; } }; } void MipsABIInfo::CoerceToIntArgs( uint64_t TySize, SmallVectorImpl &ArgList) const { llvm::IntegerType *IntTy = llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); // Add (TySize / MinABIStackAlignInBytes) args of IntTy. for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ArgList.push_back(IntTy); // If necessary, add one more integer type to ArgList. unsigned R = TySize % (MinABIStackAlignInBytes * 8); if (R) ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); } // In N32/64, an aligned double precision floating point field is passed in // a register. llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { SmallVector ArgList, IntArgList; if (IsO32) { CoerceToIntArgs(TySize, ArgList); return llvm::StructType::get(getVMContext(), ArgList); } if (Ty->isComplexType()) return CGT.ConvertType(Ty); const RecordType *RT = Ty->getAs(); // Unions/vectors are passed in integer registers. if (!RT || !RT->isStructureOrClassType()) { CoerceToIntArgs(TySize, ArgList); return llvm::StructType::get(getVMContext(), ArgList); } const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); assert(!(TySize % 8) && "Size of structure must be multiple of 8."); uint64_t LastOffset = 0; unsigned idx = 0; llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); // Iterate over fields in the struct/class and check if there are any aligned // double fields. for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const QualType Ty = i->getType(); const BuiltinType *BT = Ty->getAs(); if (!BT || BT->getKind() != BuiltinType::Double) continue; uint64_t Offset = Layout.getFieldOffset(idx); if (Offset % 64) // Ignore doubles that are not aligned. continue; // Add ((Offset - LastOffset) / 64) args of type i64. for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ArgList.push_back(I64); // Add double type. ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); LastOffset = Offset + 64; } CoerceToIntArgs(TySize - LastOffset, IntArgList); ArgList.append(IntArgList.begin(), IntArgList.end()); return llvm::StructType::get(getVMContext(), ArgList); } llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset, uint64_t Offset) const { if (OrigOffset + MinABIStackAlignInBytes > Offset) return nullptr; return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); } ABIArgInfo MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { Ty = useFirstFieldIfTransparentUnion(Ty); uint64_t OrigOffset = Offset; uint64_t TySize = getContext().getTypeSize(Ty); uint64_t Align = getContext().getTypeAlign(Ty) / 8; Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), (uint64_t)StackAlignInBytes); unsigned CurrOffset = llvm::RoundUpToAlignment(Offset, Align); Offset = CurrOffset + llvm::RoundUpToAlignment(TySize, Align * 8) / 8; if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { // Ignore empty aggregates. if (TySize == 0) return ABIArgInfo::getIgnore(); if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { Offset = OrigOffset + MinABIStackAlignInBytes; return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // If we have reached here, aggregates are passed directly by coercing to // another structure type. Padding is inserted if the offset of the // aggregate is unaligned. ABIArgInfo ArgInfo = ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, getPaddingType(OrigOffset, CurrOffset)); ArgInfo.setInReg(true); return ArgInfo; } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // All integral types are promoted to the GPR width. if (Ty->isIntegralOrEnumerationType()) return ABIArgInfo::getExtend(); return ABIArgInfo::getDirect( nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); } llvm::Type* MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { const RecordType *RT = RetTy->getAs(); SmallVector RTList; if (RT && RT->isStructureOrClassType()) { const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); unsigned FieldCnt = Layout.getFieldCount(); // N32/64 returns struct/classes in floating point registers if the // following conditions are met: // 1. The size of the struct/class is no larger than 128-bit. // 2. The struct/class has one or two fields all of which are floating // point types. // 3. The offset of the first field is zero (this follows what gcc does). // // Any other composite results are returned in integer registers. // if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); for (; b != e; ++b) { const BuiltinType *BT = b->getType()->getAs(); if (!BT || !BT->isFloatingPoint()) break; RTList.push_back(CGT.ConvertType(b->getType())); } if (b == e) return llvm::StructType::get(getVMContext(), RTList, RD->hasAttr()); RTList.clear(); } } CoerceToIntArgs(Size, RTList); return llvm::StructType::get(getVMContext(), RTList); } ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { uint64_t Size = getContext().getTypeSize(RetTy); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // O32 doesn't treat zero-sized structs differently from other structs. // However, N32/N64 ignores zero sized return values. if (!IsO32 && Size == 0) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { if (Size <= 128) { if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); // O32 returns integer vectors in registers and N32/N64 returns all small // aggregates in registers. if (!IsO32 || (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { ABIArgInfo ArgInfo = ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); ArgInfo.setInReg(true); return ArgInfo; } } return getNaturalAlignIndirect(RetTy); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { ABIArgInfo &RetInfo = FI.getReturnInfo(); if (!getCXXABI().classifyReturnType(FI)) RetInfo = classifyReturnType(FI.getReturnType()); // Check if a pointer to an aggregate is passed as a hidden argument. uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, Offset); } Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType OrigTy) const { QualType Ty = OrigTy; // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64. // Pointers are also promoted in the same way but this only matters for N32. unsigned SlotSizeInBits = IsO32 ? 32 : 64; unsigned PtrWidth = getTarget().getPointerWidth(0); bool DidPromote = false; if ((Ty->isIntegerType() && getContext().getIntWidth(Ty) < SlotSizeInBits) || (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) { DidPromote = true; Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits, Ty->isSignedIntegerType()); } auto TyInfo = getContext().getTypeInfoInChars(Ty); // The alignment of things in the argument area is never larger than // StackAlignInBytes. TyInfo.second = std::min(TyInfo.second, CharUnits::fromQuantity(StackAlignInBytes)); // MinABIStackAlignInBytes is the size of argument slots on the stack. CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true); // If there was a promotion, "unpromote" into a temporary. // TODO: can we just use a pointer into a subset of the original slot? if (DidPromote) { Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); // Truncate down to the right width. llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType() : CGF.IntPtrTy); llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); if (OrigTy->isPointerType()) V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); CGF.Builder.CreateStore(V, Temp); Addr = Temp; } return Addr; } bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const { int TySize = getContext().getTypeSize(Ty); // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) return true; return false; } bool MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This information comes from gcc's implementation, which seems to // as canonical as it gets. // Everything on MIPS is 4 bytes. Double-precision FP registers // are aliased to pairs of single-precision FP registers. llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-31 are the general purpose registers, $0 - $31. // 32-63 are the floating-point registers, $f0 - $f31. // 64 and 65 are the multiply/divide registers, $hi and $lo. // 66 is the (notional, I think) register for signal-handler return. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); // 67-74 are the floating-point status registers, $fcc0 - $fcc7. // They are one bit wide and ignored here. // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. // (coprocessor 1 is the FP unit) // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. // 176-181 are the DSP accumulator registers. AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); return false; } //===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute // handling. //===----------------------------------------------------------------------===// namespace { class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo { public: TCETargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); if (M.getLangOpts().OpenCL) { if (FD->hasAttr()) { // OpenCL C Kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); const ReqdWorkGroupSizeAttr *Attr = FD->getAttr(); if (Attr) { // Convert the reqd_work_group_size() attributes to metadata. llvm::LLVMContext &Context = F->getContext(); llvm::NamedMDNode *OpenCLMetadata = M.getModule().getOrInsertNamedMetadata( "opencl.kernel_wg_size_info"); SmallVector Operands; Operands.push_back(llvm::ConstantAsMetadata::get(F)); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getXDim())))); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getYDim())))); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getZDim())))); // Add a boolean constant operand for "required" (true) or "hint" // (false) for implementing the work_group_size_hint attr later. // Currently always true as the hint is not yet implemented. Operands.push_back( llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context))); OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands)); } } } } } //===----------------------------------------------------------------------===// // Hexagon ABI Implementation //===----------------------------------------------------------------------===// namespace { class HexagonABIInfo : public ABIInfo { public: HexagonABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { public: HexagonTargetCodeGenInfo(CodeGenTypes &CGT) :TargetCodeGenInfo(new HexagonABIInfo(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 29; } }; } void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); uint64_t Size = getContext().getTypeSize(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); // Pass in the smallest viable integer type. else if (Size > 32) return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); else if (Size > 16) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); else if (Size > 8) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); else return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Aggregates <= 8 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 64) { // Return in the smallest viable integer type. if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); } return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); } Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // FIXME: Someone needs to audit that this handle alignment correctly. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // AMDGPU ABI Implementation //===----------------------------------------------------------------------===// namespace { class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; } void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; if (const auto Attr = FD->getAttr()) { llvm::Function *F = cast(GV); uint32_t NumVGPR = Attr->getNumVGPR(); if (NumVGPR != 0) F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR)); } if (const auto Attr = FD->getAttr()) { llvm::Function *F = cast(GV); unsigned NumSGPR = Attr->getNumSGPR(); if (NumSGPR != 0) F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); } } //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. // // Function arguments a mapped to a nominal "parameter array" and promoted to // registers depending on their type. Each argument occupies 8 or 16 bytes in // the array, structs larger than 16 bytes are passed indirectly. // // One case requires special care: // // struct mixed { // int i; // float f; // }; // // When a struct mixed is passed by value, it only occupies 8 bytes in the // parameter array, but the int is passed in an integer register, and the float // is passed in a floating point register. This is represented as two arguments // with the LLVM IR inreg attribute: // // declare void f(i32 inreg %i, float inreg %f) // // The code generator will only allocate 4 bytes from the parameter array for // the inreg arguments. All other arguments are allocated a multiple of 8 // bytes. // namespace { class SparcV9ABIInfo : public ABIInfo { public: SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} private: ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; // Coercion type builder for structs passed in registers. The coercion type // serves two purposes: // // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned' // in registers. // 2. Expose aligned floating point elements as first-level elements, so the // code generator knows to pass them in floating point registers. // // We also compute the InReg flag which indicates that the struct contains // aligned 32-bit floats. // struct CoerceBuilder { llvm::LLVMContext &Context; const llvm::DataLayout &DL; SmallVector Elems; uint64_t Size; bool InReg; CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl) : Context(c), DL(dl), Size(0), InReg(false) {} // Pad Elems with integers until Size is ToSize. void pad(uint64_t ToSize) { assert(ToSize >= Size && "Cannot remove elements"); if (ToSize == Size) return; // Finish the current 64-bit word. uint64_t Aligned = llvm::RoundUpToAlignment(Size, 64); if (Aligned > Size && Aligned <= ToSize) { Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); Size = Aligned; } // Add whole 64-bit words. while (Size + 64 <= ToSize) { Elems.push_back(llvm::Type::getInt64Ty(Context)); Size += 64; } // Final in-word padding. if (Size < ToSize) { Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size)); Size = ToSize; } } // Add a floating point element at Offset. void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) { // Unaligned floats are treated as integers. if (Offset % Bits) return; // The InReg flag is only required if there are any floats < 64 bits. if (Bits < 64) InReg = true; pad(Offset); Elems.push_back(Ty); Size = Offset + Bits; } // Add a struct type to the coercion type, starting at Offset (in bits). void addStruct(uint64_t Offset, llvm::StructType *StrTy) { const llvm::StructLayout *Layout = DL.getStructLayout(StrTy); for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) { llvm::Type *ElemTy = StrTy->getElementType(i); uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i); switch (ElemTy->getTypeID()) { case llvm::Type::StructTyID: addStruct(ElemOffset, cast(ElemTy)); break; case llvm::Type::FloatTyID: addFloat(ElemOffset, ElemTy, 32); break; case llvm::Type::DoubleTyID: addFloat(ElemOffset, ElemTy, 64); break; case llvm::Type::FP128TyID: addFloat(ElemOffset, ElemTy, 128); break; case llvm::Type::PointerTyID: if (ElemOffset % 64 == 0) { pad(ElemOffset); Elems.push_back(ElemTy); Size += 64; } break; default: break; } } } // Check if Ty is a usable substitute for the coercion type. bool isUsableType(llvm::StructType *Ty) const { return llvm::makeArrayRef(Elems) == Ty->elements(); } // Get the coercion type as a literal struct type. llvm::Type *getType() const { if (Elems.size() == 1) return Elems.front(); else return llvm::StructType::get(Context, Elems); } }; }; } // end anonymous namespace ABIArgInfo SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); // Anything too big to fit in registers is passed with an explicit indirect // pointer / sret pointer. if (Size > SizeLimit) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Integer types smaller than a register are extended. if (Size < 64 && Ty->isIntegerType()) return ABIArgInfo::getExtend(); // Other non-aggregates go in registers. if (!isAggregateTypeForABI(Ty)) return ABIArgInfo::getDirect(); // If a C++ object has either a non-trivial copy constructor or a non-trivial // destructor, it is passed with an explicit indirect pointer / sret pointer. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // This is a small aggregate type that should be passed in registers. // Build a coercion type from the LLVM struct type. llvm::StructType *StrTy = dyn_cast(CGT.ConvertType(Ty)); if (!StrTy) return ABIArgInfo::getDirect(); CoerceBuilder CB(getVMContext(), getDataLayout()); CB.addStruct(0, StrTy); CB.pad(llvm::RoundUpToAlignment(CB.DL.getTypeSizeInBits(StrTy), 64)); // Try to use the original type for coercion. llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); if (CB.InReg) return ABIArgInfo::getDirectInReg(CoerceTy); else return ABIArgInfo::getDirect(CoerceTy); } Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { ABIArgInfo AI = classifyType(Ty, 16 * 8); llvm::Type *ArgTy = CGT.ConvertType(Ty); if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) AI.setCoerceToType(ArgTy); CharUnits SlotSize = CharUnits::fromQuantity(8); CGBuilderTy &Builder = CGF.Builder; Address Addr(Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize); llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); auto TypeInfo = getContext().getTypeInfoInChars(Ty); Address ArgAddr = Address::invalid(); CharUnits Stride; switch (AI.getKind()) { case ABIArgInfo::Expand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Extend: { Stride = SlotSize; CharUnits Offset = SlotSize - TypeInfo.first; ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend"); break; } case ABIArgInfo::Direct: { auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); Stride = CharUnits::fromQuantity(AllocSize).RoundUpToAlignment(SlotSize); ArgAddr = Addr; break; } case ABIArgInfo::Indirect: Stride = SlotSize; ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect"); ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), TypeInfo.second); break; case ABIArgInfo::Ignore: return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.second); } // Update VAList. llvm::Value *NextPtr = Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), Stride, "ap.next"); Builder.CreateStore(NextPtr, VAListAddr); return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr"); } void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8); for (auto &I : FI.arguments()) I.info = classifyType(I.type, 16 * 8); } namespace { class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 14; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // end anonymous namespace bool SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); // 0-31: the 8-byte general-purpose registers AssignToArrayRange(Builder, Address, Eight8, 0, 31); // 32-63: f0-31, the 4-byte floating-point registers AssignToArrayRange(Builder, Address, Four8, 32, 63); // Y = 64 // PSR = 65 // WIM = 66 // TBR = 67 // PC = 68 // NPC = 69 // FSR = 70 // CSR = 71 AssignToArrayRange(Builder, Address, Eight8, 64, 71); // 72-87: d0-15, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 72, 87); return false; } //===----------------------------------------------------------------------===// // XCore ABI Implementation //===----------------------------------------------------------------------===// namespace { /// A SmallStringEnc instance is used to build up the TypeString by passing /// it by reference between functions that append to it. typedef llvm::SmallString<128> SmallStringEnc; /// TypeStringCache caches the meta encodings of Types. /// /// The reason for caching TypeStrings is two fold: /// 1. To cache a type's encoding for later uses; /// 2. As a means to break recursive member type inclusion. /// /// A cache Entry can have a Status of: /// NonRecursive: The type encoding is not recursive; /// Recursive: The type encoding is recursive; /// Incomplete: An incomplete TypeString; /// IncompleteUsed: An incomplete TypeString that has been used in a /// Recursive type encoding. /// /// A NonRecursive entry will have all of its sub-members expanded as fully /// as possible. Whilst it may contain types which are recursive, the type /// itself is not recursive and thus its encoding may be safely used whenever /// the type is encountered. /// /// A Recursive entry will have all of its sub-members expanded as fully as /// possible. The type itself is recursive and it may contain other types which /// are recursive. The Recursive encoding must not be used during the expansion /// of a recursive type's recursive branch. For simplicity the code uses /// IncompleteCount to reject all usage of Recursive encodings for member types. /// /// An Incomplete entry is always a RecordType and only encodes its /// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and /// are placed into the cache during type expansion as a means to identify and /// handle recursive inclusion of types as sub-members. If there is recursion /// the entry becomes IncompleteUsed. /// /// During the expansion of a RecordType's members: /// /// If the cache contains a NonRecursive encoding for the member type, the /// cached encoding is used; /// /// If the cache contains a Recursive encoding for the member type, the /// cached encoding is 'Swapped' out, as it may be incorrect, and... /// /// If the member is a RecordType, an Incomplete encoding is placed into the /// cache to break potential recursive inclusion of itself as a sub-member; /// /// Once a member RecordType has been expanded, its temporary incomplete /// entry is removed from the cache. If a Recursive encoding was swapped out /// it is swapped back in; /// /// If an incomplete entry is used to expand a sub-member, the incomplete /// entry is marked as IncompleteUsed. The cache keeps count of how many /// IncompleteUsed entries it currently contains in IncompleteUsedCount; /// /// If a member's encoding is found to be a NonRecursive or Recursive viz: /// IncompleteUsedCount==0, the member's encoding is added to the cache. /// Else the member is part of a recursive type and thus the recursion has /// been exited too soon for the encoding to be correct for the member. /// class TypeStringCache { enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed}; struct Entry { std::string Str; // The encoded TypeString for the type. enum Status State; // Information about the encoding in 'Str'. std::string Swapped; // A temporary place holder for a Recursive encoding // during the expansion of RecordType's members. }; std::map Map; unsigned IncompleteCount; // Number of Incomplete entries in the Map. unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map. public: TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {} void addIncomplete(const IdentifierInfo *ID, std::string StubEnc); bool removeIncomplete(const IdentifierInfo *ID); void addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive); StringRef lookupStr(const IdentifierInfo *ID); }; /// TypeString encodings for enum & union fields must be order. /// FieldEncoding is a helper for this ordering process. class FieldEncoding { bool HasName; std::string Enc; public: FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} StringRef str() {return Enc.c_str();} bool operator<(const FieldEncoding &rhs) const { if (HasName != rhs.HasName) return HasName; return Enc < rhs.Enc; } }; class XCoreABIInfo : public DefaultABIInfo { public: XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { mutable TypeStringCache TSC; public: XCoreTargetCodeGenInfo(CodeGenTypes &CGT) :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {} void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; } // End anonymous namespace. Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CGBuilderTy &Builder = CGF.Builder; // Get the VAList. CharUnits SlotSize = CharUnits::fromQuantity(4); Address AP(Builder.CreateLoad(VAListAddr), SlotSize); // Handle the argument. ABIArgInfo AI = classifyArgumentType(Ty); CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty); llvm::Type *ArgTy = CGT.ConvertType(Ty); if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) AI.setCoerceToType(ArgTy); llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); Address Val = Address::invalid(); CharUnits ArgSize = CharUnits::Zero(); switch (AI.getKind()) { case ABIArgInfo::Expand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Ignore: Val = Address(llvm::UndefValue::get(ArgPtrTy), TypeAlign); ArgSize = CharUnits::Zero(); break; case ABIArgInfo::Extend: case ABIArgInfo::Direct: Val = Builder.CreateBitCast(AP, ArgPtrTy); ArgSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(AI.getCoerceToType())); ArgSize = ArgSize.RoundUpToAlignment(SlotSize); break; case ABIArgInfo::Indirect: Val = Builder.CreateElementBitCast(AP, ArgPtrTy); Val = Address(Builder.CreateLoad(Val), TypeAlign); ArgSize = SlotSize; break; } // Increment the VAList. if (!ArgSize.isZero()) { llvm::Value *APN = Builder.CreateConstInBoundsByteGEP(AP.getPointer(), ArgSize); Builder.CreateStore(APN, VAListAddr); } return Val; } /// During the expansion of a RecordType, an incomplete TypeString is placed /// into the cache as a means to identify and break recursion. /// If there is a Recursive encoding in the cache, it is swapped out and will /// be reinserted by removeIncomplete(). /// All other types of encoding should have been used rather than arriving here. void TypeStringCache::addIncomplete(const IdentifierInfo *ID, std::string StubEnc) { if (!ID) return; Entry &E = Map[ID]; assert( (E.Str.empty() || E.State == Recursive) && "Incorrectly use of addIncomplete"); assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()"); E.Swapped.swap(E.Str); // swap out the Recursive E.Str.swap(StubEnc); E.State = Incomplete; ++IncompleteCount; } /// Once the RecordType has been expanded, the temporary incomplete TypeString /// must be removed from the cache. /// If a Recursive was swapped out by addIncomplete(), it will be replaced. /// Returns true if the RecordType was defined recursively. bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) { if (!ID) return false; auto I = Map.find(ID); assert(I != Map.end() && "Entry not present"); Entry &E = I->second; assert( (E.State == Incomplete || E.State == IncompleteUsed) && "Entry must be an incomplete type"); bool IsRecursive = false; if (E.State == IncompleteUsed) { // We made use of our Incomplete encoding, thus we are recursive. IsRecursive = true; --IncompleteUsedCount; } if (E.Swapped.empty()) Map.erase(I); else { // Swap the Recursive back. E.Swapped.swap(E.Str); E.Swapped.clear(); E.State = Recursive; } --IncompleteCount; return IsRecursive; } /// Add the encoded TypeString to the cache only if it is NonRecursive or /// Recursive (viz: all sub-members were expanded as fully as possible). void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive) { if (!ID || IncompleteUsedCount) return; // No key or it is is an incomplete sub-type so don't add. Entry &E = Map[ID]; if (IsRecursive && !E.Str.empty()) { assert(E.State==Recursive && E.Str.size() == Str.size() && "This is not the same Recursive entry"); // The parent container was not recursive after all, so we could have used // this Recursive sub-member entry after all, but we assumed the worse when // we started viz: IncompleteCount!=0. return; } assert(E.Str.empty() && "Entry already present"); E.Str = Str.str(); E.State = IsRecursive? Recursive : NonRecursive; } /// Return a cached TypeString encoding for the ID. If there isn't one, or we /// are recursively expanding a type (IncompleteCount != 0) and the cached /// encoding is Recursive, return an empty StringRef. StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { if (!ID) return StringRef(); // We have no key. auto I = Map.find(ID); if (I == Map.end()) return StringRef(); // We have no encoding. Entry &E = I->second; if (E.State == Recursive && IncompleteCount) return StringRef(); // We don't use Recursive encodings for member types. if (E.State == Incomplete) { // The incomplete type is being used to break out of recursion. E.State = IncompleteUsed; ++IncompleteUsedCount; } return E.Str.c_str(); } /// The XCore ABI includes a type information section that communicates symbol /// type information to the linker. The linker uses this information to verify /// safety/correctness of things such as array bound and pointers et al. /// The ABI only requires C (and XC) language modules to emit TypeStrings. /// This type information (TypeString) is emitted into meta data for all global /// symbols: definitions, declarations, functions & variables. /// /// The TypeString carries type, qualifier, name, size & value details. /// Please see 'Tools Development Guide' section 2.16.2 for format details: /// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf /// The output is tested by test/CodeGen/xcore-stringtype.c. /// static bool getTypeString(SmallStringEnc &Enc, const Decl *D, CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); /// XCore uses emitTargetMD to emit TypeString metadata for global symbols. void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { SmallStringEnc Enc; if (getTypeString(Enc, D, CGM, TSC)) { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); llvm::SmallVector MDVals; MDVals.push_back(llvm::ConstantAsMetadata::get(GV)); MDVals.push_back(llvm::MDString::get(Ctx, Enc.str())); llvm::NamedMDNode *MD = CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); /// Helper function for appendRecordType(). /// Builds a SmallVector containing the encoded field types in declaration /// order. static bool extractFieldType(SmallVectorImpl &FE, const RecordDecl *RD, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { for (const auto *Field : RD->fields()) { SmallStringEnc Enc; Enc += "m("; Enc += Field->getName(); Enc += "){"; if (Field->isBitField()) { Enc += "b("; llvm::raw_svector_ostream OS(Enc); OS << Field->getBitWidthValue(CGM.getContext()); Enc += ':'; } if (!appendType(Enc, Field->getType(), CGM, TSC)) return false; if (Field->isBitField()) Enc += ')'; Enc += '}'; FE.emplace_back(!Field->getName().empty(), Enc); } return true; } /// Appends structure and union types to Enc and adds encoding to cache. /// Recursively calls appendType (via extractFieldType) for each field. /// Union types have their fields ordered according to the ABI. static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, const IdentifierInfo *ID) { // Append the cached TypeString if we have one. StringRef TypeString = TSC.lookupStr(ID); if (!TypeString.empty()) { Enc += TypeString; return true; } // Start to emit an incomplete TypeString. size_t Start = Enc.size(); Enc += (RT->isUnionType()? 'u' : 's'); Enc += '('; if (ID) Enc += ID->getName(); Enc += "){"; // We collect all encoded fields and order as necessary. bool IsRecursive = false; const RecordDecl *RD = RT->getDecl()->getDefinition(); if (RD && !RD->field_empty()) { // An incomplete TypeString stub is placed in the cache for this RecordType // so that recursive calls to this RecordType will use it whilst building a // complete TypeString for this RecordType. SmallVector FE; std::string StubEnc(Enc.substr(Start).str()); StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString. TSC.addIncomplete(ID, std::move(StubEnc)); if (!extractFieldType(FE, RD, CGM, TSC)) { (void) TSC.removeIncomplete(ID); return false; } IsRecursive = TSC.removeIncomplete(ID); // The ABI requires unions to be sorted but not structures. // See FieldEncoding::operator< for sort algorithm. if (RT->isUnionType()) std::sort(FE.begin(), FE.end()); // We can now complete the TypeString. unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) Enc += ','; Enc += FE[I].str(); } } Enc += '}'; TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive); return true; } /// Appends enum types to Enc and adds the encoding to the cache. static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, TypeStringCache &TSC, const IdentifierInfo *ID) { // Append the cached TypeString if we have one. StringRef TypeString = TSC.lookupStr(ID); if (!TypeString.empty()) { Enc += TypeString; return true; } size_t Start = Enc.size(); Enc += "e("; if (ID) Enc += ID->getName(); Enc += "){"; // We collect all encoded enumerations and order them alphanumerically. if (const EnumDecl *ED = ET->getDecl()->getDefinition()) { SmallVector FE; for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E; ++I) { SmallStringEnc EnumEnc; EnumEnc += "m("; EnumEnc += I->getName(); EnumEnc += "){"; I->getInitVal().toString(EnumEnc); EnumEnc += '}'; FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); } std::sort(FE.begin(), FE.end()); unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) Enc += ','; Enc += FE[I].str(); } } Enc += '}'; TSC.addIfComplete(ID, Enc.substr(Start), false); return true; } /// Appends type's qualifier to Enc. /// This is done prior to appending the type's encoding. static void appendQualifier(SmallStringEnc &Enc, QualType QT) { // Qualifiers are emitted in alphabetical order. static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"}; int Lookup = 0; if (QT.isConstQualified()) Lookup += 1<<0; if (QT.isRestrictQualified()) Lookup += 1<<1; if (QT.isVolatileQualified()) Lookup += 1<<2; Enc += Table[Lookup]; } /// Appends built-in types to Enc. static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) { const char *EncType; switch (BT->getKind()) { case BuiltinType::Void: EncType = "0"; break; case BuiltinType::Bool: EncType = "b"; break; case BuiltinType::Char_U: EncType = "uc"; break; case BuiltinType::UChar: EncType = "uc"; break; case BuiltinType::SChar: EncType = "sc"; break; case BuiltinType::UShort: EncType = "us"; break; case BuiltinType::Short: EncType = "ss"; break; case BuiltinType::UInt: EncType = "ui"; break; case BuiltinType::Int: EncType = "si"; break; case BuiltinType::ULong: EncType = "ul"; break; case BuiltinType::Long: EncType = "sl"; break; case BuiltinType::ULongLong: EncType = "ull"; break; case BuiltinType::LongLong: EncType = "sll"; break; case BuiltinType::Float: EncType = "ft"; break; case BuiltinType::Double: EncType = "d"; break; case BuiltinType::LongDouble: EncType = "ld"; break; default: return false; } Enc += EncType; return true; } /// Appends a pointer encoding to Enc before calling appendType for the pointee. static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { Enc += "p("; if (!appendType(Enc, PT->getPointeeType(), CGM, TSC)) return false; Enc += ')'; return true; } /// Appends array encoding to Enc before calling appendType for the element. static bool appendArrayType(SmallStringEnc &Enc, QualType QT, const ArrayType *AT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, StringRef NoSizeEnc) { if (AT->getSizeModifier() != ArrayType::Normal) return false; Enc += "a("; if (const ConstantArrayType *CAT = dyn_cast(AT)) CAT->getSize().toStringUnsigned(Enc); else Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "". Enc += ':'; // The Qualifiers should be attached to the type rather than the array. appendQualifier(Enc, QT); if (!appendType(Enc, AT->getElementType(), CGM, TSC)) return false; Enc += ')'; return true; } /// Appends a function encoding to Enc, calling appendType for the return type /// and the arguments. static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { Enc += "f{"; if (!appendType(Enc, FT->getReturnType(), CGM, TSC)) return false; Enc += "}("; if (const FunctionProtoType *FPT = FT->getAs()) { // N.B. we are only interested in the adjusted param types. auto I = FPT->param_type_begin(); auto E = FPT->param_type_end(); if (I != E) { do { if (!appendType(Enc, *I, CGM, TSC)) return false; ++I; if (I != E) Enc += ','; } while (I != E); if (FPT->isVariadic()) Enc += ",va"; } else { if (FPT->isVariadic()) Enc += "va"; else Enc += '0'; } } Enc += ')'; return true; } /// Handles the type's qualifier before dispatching a call to handle specific /// type encodings. static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { QualType QT = QType.getCanonicalType(); if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) // The Qualifiers should be attached to the type rather than the array. // Thus we don't call appendQualifier() here. return appendArrayType(Enc, QT, AT, CGM, TSC, ""); appendQualifier(Enc, QT); if (const BuiltinType *BT = QT->getAs()) return appendBuiltinType(Enc, BT); if (const PointerType *PT = QT->getAs()) return appendPointerType(Enc, PT, CGM, TSC); if (const EnumType *ET = QT->getAs()) return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier()); if (const RecordType *RT = QT->getAsStructureType()) return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); if (const RecordType *RT = QT->getAsUnionType()) return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); if (const FunctionType *FT = QT->getAs()) return appendFunctionType(Enc, FT, CGM, TSC); return false; } static bool getTypeString(SmallStringEnc &Enc, const Decl *D, CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { if (!D) return false; if (const FunctionDecl *FD = dyn_cast(D)) { if (FD->getLanguageLinkage() != CLanguageLinkage) return false; return appendType(Enc, FD->getType(), CGM, TSC); } if (const VarDecl *VD = dyn_cast(D)) { if (VD->getLanguageLinkage() != CLanguageLinkage) return false; QualType QT = VD->getType().getCanonicalType(); if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) { // Global ArrayTypes are given a size of '*' if the size is unknown. // The Qualifiers should be attached to the type rather than the array. // Thus we don't call appendQualifier() here. return appendArrayType(Enc, QT, AT, CGM, TSC, "*"); } return appendType(Enc, QT, CGM, TSC); } return false; } //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// const llvm::Triple &CodeGenModule::getTriple() const { return getTarget().getTriple(); } bool CodeGenModule::supportsCOMDAT() const { return !getTriple().isOSBinFormatMachO(); } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (TheTargetCodeGenInfo) return *TheTargetCodeGenInfo; const llvm::Triple &Triple = getTarget().getTriple(); switch (Triple.getArch()) { default: return *(TheTargetCodeGenInfo = new DefaultTargetCodeGenInfo(Types)); case llvm::Triple::le32: return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types)); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types)); return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, true)); case llvm::Triple::mips64: case llvm::Triple::mips64el: return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind)); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: return *(TheTargetCodeGenInfo = new WebAssemblyTargetCodeGenInfo(Types)); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { if (Triple.getOS() == llvm::Triple::Win32) { TheTargetCodeGenInfo = new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP); return *TheTargetCodeGenInfo; } ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; StringRef ABIStr = getTarget().getABI(); if (ABIStr == "apcs-gnu") Kind = ARMABIInfo::APCS; else if (ABIStr == "aapcs16") Kind = ARMABIInfo::AAPCS16_VFP; else if (CodeGenOpts.FloatABI == "hard" || (CodeGenOpts.FloatABI != "soft" && Triple.getEnvironment() == llvm::Triple::GNUEABIHF)) Kind = ARMABIInfo::AAPCS_VFP; return *(TheTargetCodeGenInfo = new ARMTargetCodeGenInfo(Types, Kind)); } case llvm::Triple::ppc: return *(TheTargetCodeGenInfo = new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft")); case llvm::Triple::ppc64: if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); } else return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx") Kind = PPC64_SVR4_ABIInfo::ELFv1; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: return *(TheTargetCodeGenInfo = new NVPTXTargetCodeGenInfo(Types)); case llvm::Triple::msp430: return *(TheTargetCodeGenInfo = new MSP430TargetCodeGenInfo(Types)); case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types, HasVector)); } case llvm::Triple::tce: return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); } else { return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, CodeGenOpts.FloatABI == "soft")); } } case llvm::Triple::x86_64: { StringRef ABI = getTarget().getABI(); X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); case llvm::Triple::PS4: return *(TheTargetCodeGenInfo = new PS4TargetCodeGenInfo(Types, AVXLevel)); default: return *(TheTargetCodeGenInfo = new X86_64TargetCodeGenInfo(Types, AVXLevel)); } } case llvm::Triple::hexagon: return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types)); case llvm::Triple::r600: return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::amdgcn: return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types)); } } Index: projects/clang380-import/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp =================================================================== --- projects/clang380-import/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp (revision 296010) +++ projects/clang380-import/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp (revision 296011) @@ -1,14664 +1,14667 @@ //===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements semantic analysis for expressions. // //===----------------------------------------------------------------------===// #include "clang/Sema/SemaInternal.h" #include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/Template.h" #include "llvm/Support/ConvertUTF.h" using namespace clang; using namespace sema; /// \brief Determine whether the use of this declaration is valid, without /// emitting diagnostics. bool Sema::CanUseDecl(NamedDecl *D) { // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) return false; // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) return false; // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, SourceLocation(), /*Diagnose*/ false)) return false; } // See if this function is unavailable. if (D->getAvailability() == AR_Unavailable && cast(CurContext)->getAvailability() != AR_Unavailable) return false; return true; } static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) { // Warn if this is used but marked unused. if (D->hasAttr()) { const Decl *DC = cast_or_null(S.getCurObjCLexicalContext()); if (DC && !DC->hasAttr()) S.Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName(); } } static bool HasRedeclarationWithoutAvailabilityInCategory(const Decl *D) { const auto *OMD = dyn_cast(D); if (!OMD) return false; const ObjCInterfaceDecl *OID = OMD->getClassInterface(); if (!OID) return false; for (const ObjCCategoryDecl *Cat : OID->visible_categories()) if (ObjCMethodDecl *CatMeth = Cat->getMethod(OMD->getSelector(), OMD->isInstanceMethod())) if (!CatMeth->hasAttr()) return true; return false; } static AvailabilityResult DiagnoseAvailabilityOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc, const ObjCInterfaceDecl *UnknownObjCClass, bool ObjCPropertyAccess) { // See if this declaration is unavailable or deprecated. std::string Message; AvailabilityResult Result = D->getAvailability(&Message); // For typedefs, if the typedef declaration appears available look // to the underlying type to see if it is more restrictive. while (const TypedefNameDecl *TD = dyn_cast(D)) { if (Result == AR_Available) { if (const TagType *TT = TD->getUnderlyingType()->getAs()) { D = TT->getDecl(); Result = D->getAvailability(&Message); continue; } } break; } // Forward class declarations get their attributes from their definition. if (ObjCInterfaceDecl *IDecl = dyn_cast(D)) { if (IDecl->getDefinition()) { D = IDecl->getDefinition(); Result = D->getAvailability(&Message); } } if (const EnumConstantDecl *ECD = dyn_cast(D)) if (Result == AR_Available) { const DeclContext *DC = ECD->getDeclContext(); if (const EnumDecl *TheEnumDecl = dyn_cast(DC)) Result = TheEnumDecl->getAvailability(&Message); } const ObjCPropertyDecl *ObjCPDecl = nullptr; if (Result == AR_Deprecated || Result == AR_Unavailable || AR_NotYetIntroduced) { if (const ObjCMethodDecl *MD = dyn_cast(D)) { if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) { AvailabilityResult PDeclResult = PD->getAvailability(nullptr); if (PDeclResult == Result) ObjCPDecl = PD; } } } switch (Result) { case AR_Available: break; case AR_Deprecated: if (S.getCurContextAvailability() != AR_Deprecated) S.EmitAvailabilityWarning(Sema::AD_Deprecation, D, Message, Loc, UnknownObjCClass, ObjCPDecl, ObjCPropertyAccess); break; case AR_NotYetIntroduced: { // Don't do this for enums, they can't be redeclared. if (isa(D) || isa(D)) break; bool Warn = !D->getAttr()->isInherited(); // Objective-C method declarations in categories are not modelled as // redeclarations, so manually look for a redeclaration in a category // if necessary. if (Warn && HasRedeclarationWithoutAvailabilityInCategory(D)) Warn = false; // In general, D will point to the most recent redeclaration. However, // for `@class A;` decls, this isn't true -- manually go through the // redecl chain in that case. if (Warn && isa(D)) for (Decl *Redecl = D->getMostRecentDecl(); Redecl && Warn; Redecl = Redecl->getPreviousDecl()) if (!Redecl->hasAttr() || Redecl->getAttr()->isInherited()) Warn = false; if (Warn) S.EmitAvailabilityWarning(Sema::AD_Partial, D, Message, Loc, UnknownObjCClass, ObjCPDecl, ObjCPropertyAccess); break; } case AR_Unavailable: if (S.getCurContextAvailability() != AR_Unavailable) S.EmitAvailabilityWarning(Sema::AD_Unavailable, D, Message, Loc, UnknownObjCClass, ObjCPDecl, ObjCPropertyAccess); break; } return Result; } /// \brief Emit a note explaining that this function is deleted. void Sema::NoteDeletedFunction(FunctionDecl *Decl) { assert(Decl->isDeleted()); CXXMethodDecl *Method = dyn_cast(Decl); if (Method && Method->isDeleted() && Method->isDefaulted()) { // If the method was explicitly defaulted, point at that declaration. if (!Method->isImplicit()) Diag(Decl->getLocation(), diag::note_implicitly_deleted); // Try to diagnose why this special member function was implicitly // deleted. This might fail, if that reason no longer applies. CXXSpecialMember CSM = getSpecialMember(Method); if (CSM != CXXInvalid) ShouldDeleteSpecialMember(Method, CSM, /*Diagnose=*/true); return; } if (CXXConstructorDecl *CD = dyn_cast(Decl)) { if (CXXConstructorDecl *BaseCD = const_cast(CD->getInheritedConstructor())) { Diag(Decl->getLocation(), diag::note_inherited_deleted_here); if (BaseCD->isDeleted()) { NoteDeletedFunction(BaseCD); } else { // FIXME: An explanation of why exactly it can't be inherited // would be nice. Diag(BaseCD->getLocation(), diag::note_cannot_inherit); } return; } } Diag(Decl->getLocation(), diag::note_availability_specified_here) << Decl << true; } /// \brief Determine whether a FunctionDecl was ever declared with an /// explicit storage class. static bool hasAnyExplicitStorageClass(const FunctionDecl *D) { for (auto I : D->redecls()) { if (I->getStorageClass() != SC_None) return true; } return false; } /// \brief Check whether we're in an extern inline function and referring to a /// variable or function with internal linkage (C11 6.7.4p3). /// /// This is only a warning because we used to silently accept this code, but /// in many cases it will not behave correctly. This is not enabled in C++ mode /// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6) /// and so while there may still be user mistakes, most of the time we can't /// prove that there are errors. static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, const NamedDecl *D, SourceLocation Loc) { // This is disabled under C++; there are too many ways for this to fire in // contexts where the warning is a false positive, or where it is technically // correct but benign. if (S.getLangOpts().CPlusPlus) return; // Check if this is an inlined function or method. FunctionDecl *Current = S.getCurFunctionDecl(); if (!Current) return; if (!Current->isInlined()) return; if (!Current->isExternallyVisible()) return; // Check if the decl has internal linkage. if (D->getFormalLinkage() != InternalLinkage) return; // Downgrade from ExtWarn to Extension if // (1) the supposedly external inline function is in the main file, // and probably won't be included anywhere else. // (2) the thing we're referencing is a pure function. // (3) the thing we're referencing is another inline function. // This last can give us false negatives, but it's better than warning on // wrappers for simple C library functions. const FunctionDecl *UsedFn = dyn_cast(D); bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc); if (!DowngradeWarning && UsedFn) DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr(); S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet : diag::ext_internal_in_extern_inline) << /*IsVar=*/!UsedFn << D; S.MaybeSuggestAddingStaticToDecl(Current); S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at) << D; } void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) { const FunctionDecl *First = Cur->getFirstDecl(); // Suggest "static" on the function, if possible. if (!hasAnyExplicitStorageClass(First)) { SourceLocation DeclBegin = First->getSourceRange().getBegin(); Diag(DeclBegin, diag::note_convert_inline_to_static) << Cur << FixItHint::CreateInsertion(DeclBegin, "static "); } } /// \brief Determine whether the use of this declaration is valid, and /// emit any corresponding diagnostics. /// /// This routine diagnoses various problems with referencing /// declarations that can occur when using a declaration. For example, /// it might warn if a deprecated or unavailable declaration is being /// used, or produce an error (and return true) if a C++0x deleted /// function is being used. /// /// \returns true if there was an error (this declaration cannot be /// referenced), false otherwise. /// bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc, const ObjCInterfaceDecl *UnknownObjCClass, bool ObjCPropertyAccess) { if (getLangOpts().CPlusPlus && isa(D)) { // If there were any diagnostics suppressed by template argument deduction, // emit them now. auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl()); if (Pos != SuppressedDiagnostics.end()) { for (const PartialDiagnosticAt &Suppressed : Pos->second) Diag(Suppressed.first, Suppressed.second); // Clear out the list of suppressed diagnostics, so that we don't emit // them again for this specialization. However, we don't obsolete this // entry from the table, because we want to avoid ever emitting these // diagnostics again. Pos->second.clear(); } // C++ [basic.start.main]p3: // The function 'main' shall not be used within a program. if (cast(D)->isMain()) Diag(Loc, diag::ext_main_used); } // See if this is an auto-typed variable whose initializer we are parsing. if (ParsingInitForAutoVars.count(D)) { const AutoType *AT = cast(D)->getType()->getContainedAutoType(); Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer) << D->getDeclName() << (unsigned)AT->getKeyword(); return true; } // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { if (FD->isDeleted()) { Diag(Loc, diag::err_deleted_function_use); NoteDeletedFunction(FD); return true; } // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && DeduceReturnType(FD, Loc)) return true; } DiagnoseAvailabilityOfDecl(*this, D, Loc, UnknownObjCClass, ObjCPropertyAccess); DiagnoseUnusedOfDecl(*this, D, Loc); diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); return false; } /// \brief Retrieve the message suffix that should be added to a /// diagnostic complaining about the given function being deleted or /// unavailable. std::string Sema::getDeletedOrUnavailableSuffix(const FunctionDecl *FD) { std::string Message; if (FD->getAvailability(&Message)) return ": " + Message; return std::string(); } /// DiagnoseSentinelCalls - This routine checks whether a call or /// message-send is to a declaration with the sentinel attribute, and /// if so, it checks that the requirements of the sentinel are /// satisfied. void Sema::DiagnoseSentinelCalls(NamedDecl *D, SourceLocation Loc, ArrayRef Args) { const SentinelAttr *attr = D->getAttr(); if (!attr) return; // The number of formal parameters of the declaration. unsigned numFormalParams; // The kind of declaration. This is also an index into a %select in // the diagnostic. enum CalleeType { CT_Function, CT_Method, CT_Block } calleeType; if (ObjCMethodDecl *MD = dyn_cast(D)) { numFormalParams = MD->param_size(); calleeType = CT_Method; } else if (FunctionDecl *FD = dyn_cast(D)) { numFormalParams = FD->param_size(); calleeType = CT_Function; } else if (isa(D)) { QualType type = cast(D)->getType(); const FunctionType *fn = nullptr; if (const PointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->getAs(); if (!fn) return; calleeType = CT_Function; } else if (const BlockPointerType *ptr = type->getAs()) { fn = ptr->getPointeeType()->castAs(); calleeType = CT_Block; } else { return; } if (const FunctionProtoType *proto = dyn_cast(fn)) { numFormalParams = proto->getNumParams(); } else { numFormalParams = 0; } } else { return; } // "nullPos" is the number of formal parameters at the end which // effectively count as part of the variadic arguments. This is // useful if you would prefer to not have *any* formal parameters, // but the language forces you to have at least one. unsigned nullPos = attr->getNullPos(); assert((nullPos == 0 || nullPos == 1) && "invalid null position on sentinel"); numFormalParams = (nullPos > numFormalParams ? 0 : numFormalParams - nullPos); // The number of arguments which should follow the sentinel. unsigned numArgsAfterSentinel = attr->getSentinel(); // If there aren't enough arguments for all the formal parameters, // the sentinel, and the args after the sentinel, complain. if (Args.size() < numFormalParams + numArgsAfterSentinel + 1) { Diag(Loc, diag::warn_not_enough_argument) << D->getDeclName(); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); return; } // Otherwise, find the sentinel expression. Expr *sentinelExpr = Args[Args.size() - numArgsAfterSentinel - 1]; if (!sentinelExpr) return; if (sentinelExpr->isValueDependent()) return; if (Context.isSentinelNullExpr(sentinelExpr)) return; // Pick a reasonable string to insert. Optimistically use 'nil', 'nullptr', // or 'NULL' if those are actually defined in the context. Only use // 'nil' for ObjC methods, where it's much more likely that the // variadic arguments form a list of object pointers. SourceLocation MissingNilLoc = getLocForEndOfToken(sentinelExpr->getLocEnd()); std::string NullValue; if (calleeType == CT_Method && PP.isMacroDefined("nil")) NullValue = "nil"; else if (getLangOpts().CPlusPlus11) NullValue = "nullptr"; else if (PP.isMacroDefined("NULL")) NullValue = "NULL"; else NullValue = "(void*) 0"; if (MissingNilLoc.isInvalid()) Diag(Loc, diag::warn_missing_sentinel) << int(calleeType); else Diag(MissingNilLoc, diag::warn_missing_sentinel) << int(calleeType) << FixItHint::CreateInsertion(MissingNilLoc, ", " + NullValue); Diag(D->getLocation(), diag::note_sentinel_here) << int(calleeType); } SourceRange Sema::getExprRange(Expr *E) const { return E ? E->getSourceRange() : SourceRange(); } //===----------------------------------------------------------------------===// // Standard Promotions and Conversions //===----------------------------------------------------------------------===// /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4). ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) { // Handle any placeholder expressions which made it here. if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type"); if (Ty->isFunctionType()) { // If we are here, we are not calling a function but taking // its address (which is not allowed in OpenCL v1.0 s6.8.a.3). if (getLangOpts().OpenCL) { if (Diagnose) Diag(E->getExprLoc(), diag::err_opencl_taking_function_address); return ExprError(); } if (auto *DRE = dyn_cast(E->IgnoreParenCasts())) if (auto *FD = dyn_cast(DRE->getDecl())) if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc())) return ExprError(); E = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); } else if (Ty->isArrayType()) { // In C90 mode, arrays only promote to pointers if the array expression is // an lvalue. The relevant legalese is C90 6.2.2.1p3: "an lvalue that has // type 'array of type' is converted to an expression that has type 'pointer // to type'...". In C99 this was changed to: C99 6.3.2.1p3: "an expression // that has type 'array of type' ...". The relevant change is "an lvalue" // (C90) to "an expression" (C99). // // C++ 4.2p1: // An lvalue or rvalue of type "array of N T" or "array of unknown bound of // T" can be converted to an rvalue of type "pointer to T". // if (getLangOpts().C99 || getLangOpts().CPlusPlus || E->isLValue()) E = ImpCastExprToType(E, Context.getArrayDecayedType(Ty), CK_ArrayToPointerDecay).get(); } return E; } static void CheckForNullPointerDereference(Sema &S, Expr *E) { // Check to see if we are dereferencing a null pointer. If so, // and if not volatile-qualified, this is undefined behavior that the // optimizer will delete, so warn about it. People sometimes try to use this // to get a deterministic trap and are surprised by clang's behavior. This // only handles the pattern "*null", which is a very syntactic check. if (UnaryOperator *UO = dyn_cast(E->IgnoreParenCasts())) if (UO->getOpcode() == UO_Deref && UO->getSubExpr()->IgnoreParenCasts()-> isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull) && !UO->getType().isVolatileQualified()) { S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::warn_indirection_through_null) << UO->getSubExpr()->getSourceRange()); S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO, S.PDiag(diag::note_indirection_through_null)); } } static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE, SourceLocation AssignLoc, const Expr* RHS) { const ObjCIvarDecl *IV = OIRE->getDecl(); if (!IV) return; DeclarationName MemberName = IV->getDeclName(); IdentifierInfo *Member = MemberName.getAsIdentifierInfo(); if (!Member || !Member->isStr("isa")) return; const Expr *Base = OIRE->getBase(); QualType BaseType = Base->getType(); if (OIRE->isArrow()) BaseType = BaseType->getPointeeType(); if (const ObjCObjectType *OTy = BaseType->getAs()) if (ObjCInterfaceDecl *IDecl = OTy->getInterface()) { ObjCInterfaceDecl *ClassDeclared = nullptr; ObjCIvarDecl *IV = IDecl->lookupInstanceVariable(Member, ClassDeclared); if (!ClassDeclared->getSuperClass() && (*ClassDeclared->ivar_begin()) == IV) { if (RHS) { NamedDecl *ObjectSetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_setClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectSetClass) { SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getLocEnd()); S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(OIRE->getLocStart(), "object_setClass(") << FixItHint::CreateReplacement(SourceRange(OIRE->getOpLoc(), AssignLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_assign); } else { NamedDecl *ObjectGetClass = S.LookupSingleName(S.TUScope, &S.Context.Idents.get("object_getClass"), SourceLocation(), S.LookupOrdinaryName); if (ObjectGetClass) S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OIRE->getLocStart(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OIRE->getOpLoc(), OIRE->getLocEnd()), ")"); else S.Diag(OIRE->getLocation(), diag::warn_objc_isa_use); } S.Diag(IV->getLocation(), diag::note_ivar_decl); } } } ExprResult Sema::DefaultLvalueConversion(Expr *E) { // Handle any placeholder expressions which made it here. if (E->getType()->isPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); } // C++ [conv.lval]p1: // A glvalue of a non-function, non-array type T can be // converted to a prvalue. if (!E->isGLValue()) return E; QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); // We don't want to throw lvalue-to-rvalue casts on top of // expressions of certain types in C++. if (getLangOpts().CPlusPlus && (E->getType() == Context.OverloadTy || T->isDependentType() || T->isRecordType())) return E; // The C standard is actually really unclear on this point, and // DR106 tells us what the result should be but not why. It's // generally best to say that void types just doesn't undergo // lvalue-to-rvalue at all. Note that expressions of unqualified // 'void' type are never l-values, but qualified void can be. if (T->isVoidType()) return E; // OpenCL usually rejects direct accesses to values of 'half' type. if (getLangOpts().OpenCL && !getOpenCLOptions().cl_khr_fp16 && T->isHalfType()) { Diag(E->getExprLoc(), diag::err_opencl_half_load_store) << 0 << T; return ExprError(); } CheckForNullPointerDereference(*this, E); if (const ObjCIsaExpr *OISA = dyn_cast(E->IgnoreParenCasts())) { NamedDecl *ObjectGetClass = LookupSingleName(TUScope, &Context.Idents.get("object_getClass"), SourceLocation(), LookupOrdinaryName); if (ObjectGetClass) Diag(E->getExprLoc(), diag::warn_objc_isa_use) << FixItHint::CreateInsertion(OISA->getLocStart(), "object_getClass(") << FixItHint::CreateReplacement( SourceRange(OISA->getOpLoc(), OISA->getIsaMemberLoc()), ")"); else Diag(E->getExprLoc(), diag::warn_objc_isa_use); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(E->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, SourceLocation(), /* Expr*/nullptr); // C++ [conv.lval]p1: // [...] If T is a non-class type, the type of the prvalue is the // cv-unqualified version of T. Otherwise, the type of the // rvalue is T. // // C99 6.3.2.1p2: // If the lvalue has qualified type, the value has the unqualified // version of the type of the lvalue; otherwise, the value has the // type of the lvalue. if (T.hasQualifiers()) T = T.getUnqualifiedType(); // Under the MS ABI, lock down the inheritance model now. if (T->isMemberPointerType() && Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(E->getExprLoc(), T); UpdateMarkingForLValueToRValue(E); // Loading a __weak object implicitly retains the value, so we need a cleanup to // balance that. if (getLangOpts().ObjCAutoRefCount && E->getType().getObjCLifetime() == Qualifiers::OCL_Weak) ExprNeedsCleanups = true; ExprResult Res = ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, E, nullptr, VK_RValue); // C11 6.3.2.1p2: // ... if the lvalue has atomic type, the value has the non-atomic version // of the type of the lvalue ... if (const AtomicType *Atomic = T->getAs()) { T = Atomic->getValueType().getUnqualifiedType(); Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(), nullptr, VK_RValue); } return Res; } ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) { ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose); if (Res.isInvalid()) return ExprError(); Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res; } /// CallExprUnaryConversions - a special case of an unary conversion /// performed on a function designator of a call expression. ExprResult Sema::CallExprUnaryConversions(Expr *E) { QualType Ty = E->getType(); ExprResult Res = E; // Only do implicit cast for a function type, but not for a pointer // to function type. if (Ty->isFunctionType()) { Res = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).get(); if (Res.isInvalid()) return ExprError(); } Res = DefaultLvalueConversion(Res.get()); if (Res.isInvalid()) return ExprError(); return Res.get(); } /// UsualUnaryConversions - Performs various conversions that are common to most /// operators (C99 6.3). The conversions of array and function types are /// sometimes suppressed. For example, the array->pointer conversion doesn't /// apply if the array is an argument to the sizeof or address (&) operators. /// In these instances, this routine should *not* be called. ExprResult Sema::UsualUnaryConversions(Expr *E) { // First, convert to an r-value. ExprResult Res = DefaultFunctionArrayLvalueConversion(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); QualType Ty = E->getType(); assert(!Ty.isNull() && "UsualUnaryConversions - missing type"); // Half FP have to be promoted to float unless it is natively supported if (Ty->isHalfType() && !getLangOpts().NativeHalfType) return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast); // Try to perform integral promotions if the object has a theoretically // promotable type. if (Ty->isIntegralOrUnscopedEnumerationType()) { // C99 6.3.1.1p2: // // The following may be used in an expression wherever an int or // unsigned int may be used: // - an object or expression with an integer type whose integer // conversion rank is less than or equal to the rank of int // and unsigned int. // - A bit-field of type _Bool, int, signed int, or unsigned int. // // If an int can represent all values of the original type, the // value is converted to an int; otherwise, it is converted to an // unsigned int. These are called the integer promotions. All // other types are unchanged by the integer promotions. QualType PTy = Context.isPromotableBitField(E); if (!PTy.isNull()) { E = ImpCastExprToType(E, PTy, CK_IntegralCast).get(); return E; } if (Ty->isPromotableIntegerType()) { QualType PT = Context.getPromotedIntegerType(Ty); E = ImpCastExprToType(E, PT, CK_IntegralCast).get(); return E; } } return E; } /// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that /// do not have a prototype. Arguments that have type float or __fp16 /// are promoted to double. All other argument types are converted by /// UsualUnaryConversions(). ExprResult Sema::DefaultArgumentPromotion(Expr *E) { QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type"); ExprResult Res = UsualUnaryConversions(E); if (Res.isInvalid()) return ExprError(); E = Res.get(); // If this is a 'float' or '__fp16' (CVR qualified or typedef) promote to // double. const BuiltinType *BTy = Ty->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); // C++ performs lvalue-to-rvalue conversion as a default argument // promotion, even on class types, but note: // C++11 [conv.lval]p2: // When an lvalue-to-rvalue conversion occurs in an unevaluated // operand or a subexpression thereof the value contained in the // referenced object is not accessed. Otherwise, if the glvalue // has a class type, the conversion copy-initializes a temporary // of type T from the glvalue and the result of the conversion // is a prvalue for the temporary. // FIXME: add some way to gate this entire thing for correctness in // potentially potentially evaluated contexts. if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) { ExprResult Temp = PerformCopyInitialization( InitializedEntity::InitializeTemporary(E->getType()), E->getExprLoc(), E); if (Temp.isInvalid()) return ExprError(); E = Temp.get(); } return E; } /// Determine the degree of POD-ness for an expression. /// Incomplete types are considered POD, since this check can be performed /// when we're in an unevaluated context. Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) { if (Ty->isIncompleteType()) { // C++11 [expr.call]p7: // After these conversions, if the argument does not have arithmetic, // enumeration, pointer, pointer to member, or class type, the program // is ill-formed. // // Since we've already performed array-to-pointer and function-to-pointer // decay, the only such type in C++ is cv void. This also handles // initializer lists as variadic arguments. if (Ty->isVoidType()) return VAK_Invalid; if (Ty->isObjCObjectType()) return VAK_Invalid; return VAK_Valid; } if (Ty.isCXX98PODType(Context)) return VAK_Valid; // C++11 [expr.call]p7: // Passing a potentially-evaluated argument of class type (Clause 9) // having a non-trivial copy constructor, a non-trivial move constructor, // or a non-trivial destructor, with no corresponding parameter, // is conditionally-supported with implementation-defined semantics. if (getLangOpts().CPlusPlus11 && !Ty->isDependentType()) if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl()) if (!Record->hasNonTrivialCopyConstructor() && !Record->hasNonTrivialMoveConstructor() && !Record->hasNonTrivialDestructor()) return VAK_ValidInCXX11; if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType()) return VAK_Valid; if (Ty->isObjCObjectType()) return VAK_Invalid; if (getLangOpts().MSVCCompat) return VAK_MSVCUndefined; // FIXME: In C++11, these cases are conditionally-supported, meaning we're // permitted to reject them. We should consider doing so. return VAK_Undefined; } void Sema::checkVariadicArgument(const Expr *E, VariadicCallType CT) { // Don't allow one to pass an Objective-C interface to a vararg. const QualType &Ty = E->getType(); VarArgKind VAK = isValidVarArgType(Ty); // Complain about passing non-POD types through varargs. switch (VAK) { case VAK_ValidInCXX11: DiagRuntimeBehavior( E->getLocStart(), nullptr, PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) << Ty << CT); // Fall through. case VAK_Valid: if (Ty->isRecordType()) { // This is unlikely to be what the user intended. If the class has a // 'c_str' member function, the user probably meant to call that. DiagRuntimeBehavior(E->getLocStart(), nullptr, PDiag(diag::warn_pass_class_arg_to_vararg) << Ty << CT << hasCStrMethod(E) << ".c_str()"); } break; case VAK_Undefined: case VAK_MSVCUndefined: DiagRuntimeBehavior( E->getLocStart(), nullptr, PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg) << getLangOpts().CPlusPlus11 << Ty << CT); break; case VAK_Invalid: if (Ty->isObjCObjectType()) DiagRuntimeBehavior( E->getLocStart(), nullptr, PDiag(diag::err_cannot_pass_objc_interface_to_vararg) << Ty << CT); else Diag(E->getLocStart(), diag::err_cannot_pass_to_vararg) << isa(E) << Ty << CT; break; } } /// DefaultVariadicArgumentPromotion - Like DefaultArgumentPromotion, but /// will create a trap if the resulting type is not a POD type. ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT, FunctionDecl *FDecl) { if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) { // Strip the unbridged-cast placeholder expression off, if applicable. if (PlaceholderTy->getKind() == BuiltinType::ARCUnbridgedCast && (CT == VariadicMethod || (FDecl && FDecl->hasAttr()))) { E = stripARCUnbridgedCast(E); // Otherwise, do normal placeholder checking. } else { ExprResult ExprRes = CheckPlaceholderExpr(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); } } ExprResult ExprRes = DefaultArgumentPromotion(E); if (ExprRes.isInvalid()) return ExprError(); E = ExprRes.get(); // Diagnostics regarding non-POD argument types are // emitted along with format string checking in Sema::CheckFunctionCall(). if (isValidVarArgType(E->getType()) == VAK_Undefined) { // Turn this into a trap. CXXScopeSpec SS; SourceLocation TemplateKWLoc; UnqualifiedId Name; Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"), E->getLocStart()); ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, Name, true, false); if (TrapFn.isInvalid()) return ExprError(); ExprResult Call = ActOnCallExpr(TUScope, TrapFn.get(), E->getLocStart(), None, E->getLocEnd()); if (Call.isInvalid()) return ExprError(); ExprResult Comma = ActOnBinOp(TUScope, E->getLocStart(), tok::comma, Call.get(), E); if (Comma.isInvalid()) return ExprError(); return Comma.get(); } if (!getLangOpts().CPlusPlus && RequireCompleteType(E->getExprLoc(), E->getType(), diag::err_call_incomplete_argument)) return ExprError(); return E; } /// \brief Converts an integer to complex float type. Helper function of /// UsualArithmeticConversions() /// /// \return false if the integer expression is an integer type and is /// successfully converted to the complex type. static bool handleIntegerToComplexFloatConversion(Sema &S, ExprResult &IntExpr, ExprResult &ComplexExpr, QualType IntTy, QualType ComplexTy, bool SkipCast) { if (IntTy->isComplexType() || IntTy->isRealFloatingType()) return true; if (SkipCast) return false; if (IntTy->isIntegerType()) { QualType fpTy = cast(ComplexTy)->getElementType(); IntExpr = S.ImpCastExprToType(IntExpr.get(), fpTy, CK_IntegralToFloating); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_FloatingRealToComplex); } else { assert(IntTy->isComplexIntegerType()); IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy, CK_IntegralComplexToFloatingComplex); } return false; } /// \brief Handle arithmetic conversion with complex types. Helper function of /// UsualArithmeticConversions() static QualType handleComplexFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // if we have an integer operand, the result is the complex type. if (!handleIntegerToComplexFloatConversion(S, RHS, LHS, RHSType, LHSType, /*skipCast*/false)) return LHSType; if (!handleIntegerToComplexFloatConversion(S, LHS, RHS, LHSType, RHSType, /*skipCast*/IsCompAssign)) return RHSType; // This handles complex/complex, complex/float, or float/complex. // When both operands are complex, the shorter operand is converted to the // type of the longer, and that is the type of the result. This corresponds // to what is done when combining two real floating-point operands. // The fun begins when size promotion occur across type domains. // From H&S 6.3.4: When one operand is complex and the other is a real // floating-point type, the less precise type is converted, within it's // real or complex domain, to the precision of the other type. For example, // when combining a "long double" with a "double _Complex", the // "double _Complex" is promoted to "long double _Complex". // Compute the rank of the two types, regardless of whether they are complex. int Order = S.Context.getFloatingTypeOrder(LHSType, RHSType); auto *LHSComplexType = dyn_cast(LHSType); auto *RHSComplexType = dyn_cast(RHSType); QualType LHSElementType = LHSComplexType ? LHSComplexType->getElementType() : LHSType; QualType RHSElementType = RHSComplexType ? RHSComplexType->getElementType() : RHSType; QualType ResultType = S.Context.getComplexType(LHSElementType); if (Order < 0) { // Promote the precision of the LHS if not an assignment. ResultType = S.Context.getComplexType(RHSElementType); if (!IsCompAssign) { if (LHSComplexType) LHS = S.ImpCastExprToType(LHS.get(), ResultType, CK_FloatingComplexCast); else LHS = S.ImpCastExprToType(LHS.get(), RHSElementType, CK_FloatingCast); } } else if (Order > 0) { // Promote the precision of the RHS. if (RHSComplexType) RHS = S.ImpCastExprToType(RHS.get(), ResultType, CK_FloatingComplexCast); else RHS = S.ImpCastExprToType(RHS.get(), LHSElementType, CK_FloatingCast); } return ResultType; } /// \brief Hande arithmetic conversion from integer to float. Helper function /// of UsualArithmeticConversions() static QualType handleIntToFloatConversion(Sema &S, ExprResult &FloatExpr, ExprResult &IntExpr, QualType FloatTy, QualType IntTy, bool ConvertFloat, bool ConvertInt) { if (IntTy->isIntegerType()) { if (ConvertInt) // Convert intExpr to the lhs floating point type. IntExpr = S.ImpCastExprToType(IntExpr.get(), FloatTy, CK_IntegralToFloating); return FloatTy; } // Convert both sides to the appropriate complex float. assert(IntTy->isComplexIntegerType()); QualType result = S.Context.getComplexType(FloatTy); // _Complex int -> _Complex float if (ConvertInt) IntExpr = S.ImpCastExprToType(IntExpr.get(), result, CK_IntegralComplexToFloatingComplex); // float -> _Complex float if (ConvertFloat) FloatExpr = S.ImpCastExprToType(FloatExpr.get(), result, CK_FloatingRealToComplex); return result; } /// \brief Handle arithmethic conversion with floating point types. Helper /// function of UsualArithmeticConversions() static QualType handleFloatConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { bool LHSFloat = LHSType->isRealFloatingType(); bool RHSFloat = RHSType->isRealFloatingType(); // If we have two real floating types, convert the smaller operand // to the bigger result. if (LHSFloat && RHSFloat) { int order = S.Context.getFloatingTypeOrder(LHSType, RHSType); if (order > 0) { RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FloatingCast); return LHSType; } assert(order < 0 && "illegal float comparison"); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FloatingCast); return RHSType; } if (LHSFloat) { // Half FP has to be promoted to float unless it is natively supported if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType) LHSType = S.Context.FloatTy; return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType, /*convertFloat=*/!IsCompAssign, /*convertInt=*/ true); } assert(RHSFloat); return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType, /*convertInt=*/ true, /*convertFloat=*/!IsCompAssign); } typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType); namespace { /// These helper callbacks are placed in an anonymous namespace to /// permit their use as function template parameters. ExprResult doIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, toType, CK_IntegralCast); } ExprResult doComplexIntegralCast(Sema &S, Expr *op, QualType toType) { return S.ImpCastExprToType(op, S.Context.getComplexType(toType), CK_IntegralComplexCast); } } /// \brief Handle integer arithmetic conversions. Helper function of /// UsualArithmeticConversions() template static QualType handleIntegerConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { // The rules for this case are in C99 6.3.1.8 int order = S.Context.getIntegerTypeOrder(LHSType, RHSType); bool LHSSigned = LHSType->hasSignedIntegerRepresentation(); bool RHSSigned = RHSType->hasSignedIntegerRepresentation(); if (LHSSigned == RHSSigned) { // Same signedness; use the higher-ranked type if (order >= 0) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (order != (LHSSigned ? 1 : -1)) { // The unsigned type has greater than or equal rank to the // signed type, so use the unsigned type if (RHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else if (S.Context.getIntWidth(LHSType) != S.Context.getIntWidth(RHSType)) { // The two types are different widths; if we are here, that // means the signed type is larger than the unsigned type, so // use the signed type. if (LHSSigned) { RHS = (*doRHSCast)(S, RHS.get(), LHSType); return LHSType; } else if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), RHSType); return RHSType; } else { // The signed type is higher-ranked than the unsigned type, // but isn't actually any bigger (like unsigned int and long // on most 32-bit systems). Use the unsigned type corresponding // to the signed type. QualType result = S.Context.getCorrespondingUnsignedType(LHSSigned ? LHSType : RHSType); RHS = (*doRHSCast)(S, RHS.get(), result); if (!IsCompAssign) LHS = (*doLHSCast)(S, LHS.get(), result); return result; } } /// \brief Handle conversions with GCC complex int extension. Helper function /// of UsualArithmeticConversions() static QualType handleComplexIntConversion(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType LHSType, QualType RHSType, bool IsCompAssign) { const ComplexType *LHSComplexInt = LHSType->getAsComplexIntegerType(); const ComplexType *RHSComplexInt = RHSType->getAsComplexIntegerType(); if (LHSComplexInt && RHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSEltType, IsCompAssign); return S.Context.getComplexType(ScalarType); } if (LHSComplexInt) { QualType LHSEltType = LHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSEltType, RHSType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); RHS = S.ImpCastExprToType(RHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } assert(RHSComplexInt); QualType RHSEltType = RHSComplexInt->getElementType(); QualType ScalarType = handleIntegerConversion (S, LHS, RHS, LHSType, RHSEltType, IsCompAssign); QualType ComplexType = S.Context.getComplexType(ScalarType); if (!IsCompAssign) LHS = S.ImpCastExprToType(LHS.get(), ComplexType, CK_IntegralRealToComplex); return ComplexType; } /// UsualArithmeticConversions - Performs various conversions that are common to /// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this /// routine returns the first non-arithmetic type found. The client is /// responsible for emitting appropriate error diagnostics. QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, bool IsCompAssign) { if (!IsCompAssign) { LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); // For conversion purposes, we ignore any atomic qualifier on the LHS. if (const AtomicType *AtomicLHS = LHSType->getAs()) LHSType = AtomicLHS->getValueType(); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // If either side is a non-arithmetic type (e.g. a pointer), we are done. // The caller can deal with this (e.g. pointer + int). if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType()) return QualType(); // Apply unary and bitfield promotions to the LHS's type. QualType LHSUnpromotedType = LHSType; if (LHSType->isPromotableIntegerType()) LHSType = Context.getPromotedIntegerType(LHSType); QualType LHSBitfieldPromoteTy = Context.isPromotableBitField(LHS.get()); if (!LHSBitfieldPromoteTy.isNull()) LHSType = LHSBitfieldPromoteTy; if (LHSType != LHSUnpromotedType && !IsCompAssign) LHS = ImpCastExprToType(LHS.get(), LHSType, CK_IntegralCast); // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // At this point, we have two different arithmetic types. // Handle complex types first (C99 6.3.1.8p1). if (LHSType->isComplexType() || RHSType->isComplexType()) return handleComplexFloatConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Handle GCC complex int extension. if (LHSType->isComplexIntegerType() || RHSType->isComplexIntegerType()) return handleComplexIntConversion(*this, LHS, RHS, LHSType, RHSType, IsCompAssign); // Finally, we have two differing integer types. return handleIntegerConversion (*this, LHS, RHS, LHSType, RHSType, IsCompAssign); } //===----------------------------------------------------------------------===// // Semantic Analysis for various Expression Types //===----------------------------------------------------------------------===// ExprResult Sema::ActOnGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef ArgTypes, ArrayRef ArgExprs) { unsigned NumAssocs = ArgTypes.size(); assert(NumAssocs == ArgExprs.size()); TypeSourceInfo **Types = new TypeSourceInfo*[NumAssocs]; for (unsigned i = 0; i < NumAssocs; ++i) { if (ArgTypes[i]) (void) GetTypeFromParser(ArgTypes[i], &Types[i]); else Types[i] = nullptr; } ExprResult ER = CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc, ControllingExpr, llvm::makeArrayRef(Types, NumAssocs), ArgExprs); delete [] Types; return ER; } ExprResult Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc, Expr *ControllingExpr, ArrayRef Types, ArrayRef Exprs) { unsigned NumAssocs = Types.size(); assert(NumAssocs == Exprs.size()); // Decay and strip qualifiers for the controlling expression type, and handle // placeholder type replacement. See committee discussion from WG14 DR423. - ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); - if (R.isInvalid()) - return ExprError(); - ControllingExpr = R.get(); + { + EnterExpressionEvaluationContext Unevaluated(*this, Sema::Unevaluated); + ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); + if (R.isInvalid()) + return ExprError(); + ControllingExpr = R.get(); + } // The controlling expression is an unevaluated operand, so side effects are // likely unintended. if (ActiveTemplateInstantiations.empty() && ControllingExpr->HasSideEffects(Context, false)) Diag(ControllingExpr->getExprLoc(), diag::warn_side_effects_unevaluated_context); bool TypeErrorFound = false, IsResultDependent = ControllingExpr->isTypeDependent(), ContainsUnexpandedParameterPack = ControllingExpr->containsUnexpandedParameterPack(); for (unsigned i = 0; i < NumAssocs; ++i) { if (Exprs[i]->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]) { if (Types[i]->getType()->containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (Types[i]->getType()->isDependentType()) { IsResultDependent = true; } else { // C11 6.5.1.1p2 "The type name in a generic association shall specify a // complete object type other than a variably modified type." unsigned D = 0; if (Types[i]->getType()->isIncompleteType()) D = diag::err_assoc_type_incomplete; else if (!Types[i]->getType()->isObjectType()) D = diag::err_assoc_type_nonobject; else if (Types[i]->getType()->isVariablyModifiedType()) D = diag::err_assoc_type_variably_modified; if (D != 0) { Diag(Types[i]->getTypeLoc().getBeginLoc(), D) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } // C11 6.5.1.1p2 "No two generic associations in the same generic // selection shall specify compatible types." for (unsigned j = i+1; j < NumAssocs; ++j) if (Types[j] && !Types[j]->getType()->isDependentType() && Context.typesAreCompatible(Types[i]->getType(), Types[j]->getType())) { Diag(Types[j]->getTypeLoc().getBeginLoc(), diag::err_assoc_compatible_types) << Types[j]->getTypeLoc().getSourceRange() << Types[j]->getType() << Types[i]->getType(); Diag(Types[i]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[i]->getTypeLoc().getSourceRange() << Types[i]->getType(); TypeErrorFound = true; } } } } if (TypeErrorFound) return ExprError(); // If we determined that the generic selection is result-dependent, don't // try to compute the result expression. if (IsResultDependent) return new (Context) GenericSelectionExpr( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack); SmallVector CompatIndices; unsigned DefaultIndex = -1U; for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; else if (Context.typesAreCompatible(ControllingExpr->getType(), Types[i]->getType())) CompatIndices.push_back(i); } // C11 6.5.1.1p2 "The controlling expression of a generic selection shall have // type compatible with at most one of the types named in its generic // association list." if (CompatIndices.size() > 1) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_multi_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType() << (unsigned) CompatIndices.size(); for (unsigned I : CompatIndices) { Diag(Types[I]->getTypeLoc().getBeginLoc(), diag::note_compat_assoc) << Types[I]->getTypeLoc().getSourceRange() << Types[I]->getType(); } return ExprError(); } // C11 6.5.1.1p2 "If a generic selection has no default generic association, // its controlling expression shall have type compatible with exactly one of // the types named in its generic association list." if (DefaultIndex == -1U && CompatIndices.size() == 0) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_no_match) << ControllingExpr->getSourceRange() << ControllingExpr->getType(); return ExprError(); } // C11 6.5.1.1p3 "If a generic selection has a generic association with a // type name that is compatible with the type of the controlling expression, // then the result expression of the generic selection is the expression // in that generic association. Otherwise, the result expression of the // generic selection is the expression in the default generic association." unsigned ResultIndex = CompatIndices.size() ? CompatIndices[0] : DefaultIndex; return new (Context) GenericSelectionExpr( Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc, ContainsUnexpandedParameterPack, ResultIndex); } /// getUDSuffixLoc - Create a SourceLocation for a ud-suffix, given the /// location of the token and the offset of the ud-suffix within it. static SourceLocation getUDSuffixLoc(Sema &S, SourceLocation TokLoc, unsigned Offset) { return Lexer::AdvanceToTokenCharacter(TokLoc, Offset, S.getSourceManager(), S.getLangOpts()); } /// BuildCookedLiteralOperatorCall - A user-defined literal was found. Look up /// the corresponding cooked (non-raw) literal operator, and build a call to it. static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope, IdentifierInfo *UDSuffix, SourceLocation UDSuffixLoc, ArrayRef Args, SourceLocation LitEndLoc) { assert(Args.size() <= 2 && "too many arguments for literal operator"); QualType ArgTy[2]; for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) { ArgTy[ArgIdx] = Args[ArgIdx]->getType(); if (ArgTy[ArgIdx]->isArrayType()) ArgTy[ArgIdx] = S.Context.getArrayDecayedType(ArgTy[ArgIdx]); } DeclarationName OpName = S.Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); LookupResult R(S, OpName, UDSuffixLoc, Sema::LookupOrdinaryName); if (S.LookupLiteralOperator(Scope, R, llvm::makeArrayRef(ArgTy, Args.size()), /*AllowRaw*/false, /*AllowTemplate*/false, /*AllowStringTemplate*/false) == Sema::LOLR_Error) return ExprError(); return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc); } /// ActOnStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from /// multiple tokens. However, the common case is that StringToks points to one /// string. /// ExprResult Sema::ActOnStringLiteral(ArrayRef StringToks, Scope *UDLScope) { assert(!StringToks.empty() && "Must have at least one string!"); StringLiteralParser Literal(StringToks, PP); if (Literal.hadError) return ExprError(); SmallVector StringTokLocs; for (const Token &Tok : StringToks) StringTokLocs.push_back(Tok.getLocation()); QualType CharTy = Context.CharTy; StringLiteral::StringKind Kind = StringLiteral::Ascii; if (Literal.isWide()) { CharTy = Context.getWideCharType(); Kind = StringLiteral::Wide; } else if (Literal.isUTF8()) { Kind = StringLiteral::UTF8; } else if (Literal.isUTF16()) { CharTy = Context.Char16Ty; Kind = StringLiteral::UTF16; } else if (Literal.isUTF32()) { CharTy = Context.Char32Ty; Kind = StringLiteral::UTF32; } else if (Literal.isPascal()) { CharTy = Context.UnsignedCharTy; } QualType CharTyConst = CharTy; // A C++ string literal has a const-qualified element type (C++ 2.13.4p1). if (getLangOpts().CPlusPlus || getLangOpts().ConstStrings) CharTyConst.addConst(); // Get an array type for the string, according to C99 6.4.5. This includes // the nul terminator character as well as the string length for pascal // strings. QualType StrTy = Context.getConstantArrayType(CharTyConst, llvm::APInt(32, Literal.GetNumStringChars()+1), ArrayType::Normal, 0); // OpenCL v1.1 s6.5.3: a string literal is in the constant address space. if (getLangOpts().OpenCL) { StrTy = Context.getAddrSpaceQualType(StrTy, LangAS::opencl_constant); } // Pass &StringTokLocs[0], StringTokLocs.size() to factory! StringLiteral *Lit = StringLiteral::Create(Context, Literal.GetString(), Kind, Literal.Pascal, StrTy, &StringTokLocs[0], StringTokLocs.size()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()], Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl)); // C++11 [lex.ext]p5: The literal L is treated as a call of the form // operator "" X (str, len) QualType SizeType = Context.getSizeType(); DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); QualType ArgTy[] = { Context.getArrayDecayedType(StrTy), SizeType }; LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, ArgTy, /*AllowRaw*/false, /*AllowTemplate*/false, /*AllowStringTemplate*/true)) { case LOLR_Cooked: { llvm::APInt Len(Context.getIntWidth(SizeType), Literal.GetNumStringChars()); IntegerLiteral *LenArg = IntegerLiteral::Create(Context, Len, SizeType, StringTokLocs[0]); Expr *Args[] = { Lit, LenArg }; return BuildLiteralOperatorCall(R, OpNameInfo, Args, StringTokLocs.back()); } case LOLR_StringTemplate: { TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(CharTy); bool CharIsUnsigned = CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); TemplateArgument TypeArg(CharTy); TemplateArgumentLocInfo TypeArgInfo(Context.getTrivialTypeSourceInfo(CharTy)); ExplicitArgs.addArgument(TemplateArgumentLoc(TypeArg, TypeArgInfo)); for (unsigned I = 0, N = Lit->getLength(); I != N; ++I) { Value = Lit->getCodeUnit(I); TemplateArgument Arg(Context, Value, CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), &ExplicitArgs); } case LOLR_Raw: case LOLR_Template: llvm_unreachable("unexpected literal operator lookup result"); case LOLR_Error: return ExprError(); } llvm_unreachable("unexpected literal operator lookup result"); } ExprResult Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, SourceLocation Loc, const CXXScopeSpec *SS) { DeclarationNameInfo NameInfo(D->getDeclName(), Loc); return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS); } /// BuildDeclRefExpr - Build an expression that references a /// declaration that does not require a closure capture. ExprResult Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK, const DeclarationNameInfo &NameInfo, const CXXScopeSpec *SS, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs) { if (getLangOpts().CUDA) if (const FunctionDecl *Caller = dyn_cast(CurContext)) if (const FunctionDecl *Callee = dyn_cast(D)) { if (CheckCUDATarget(Caller, Callee)) { Diag(NameInfo.getLoc(), diag::err_ref_bad_target) << IdentifyCUDATarget(Callee) << D->getIdentifier() << IdentifyCUDATarget(Caller); Diag(D->getLocation(), diag::note_previous_decl) << D->getIdentifier(); return ExprError(); } } bool RefersToCapturedVariable = isa(D) && NeedToCaptureVariable(cast(D), NameInfo.getLoc()); DeclRefExpr *E; if (isa(D)) { VarTemplateSpecializationDecl *VarSpec = cast(D); E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(), VarSpec->getTemplateKeywordLoc(), D, RefersToCapturedVariable, NameInfo.getLoc(), Ty, VK, FoundD, TemplateArgs); } else { assert(!TemplateArgs && "No template arguments for non-variable" " template specialization references"); E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc(), SourceLocation(), D, RefersToCapturedVariable, NameInfo, Ty, VK, FoundD); } MarkDeclRefReferenced(E); if (getLangOpts().ObjCWeak && isa(D) && Ty.getObjCLifetime() == Qualifiers::OCL_Weak && !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getLocStart())) recordUseOfEvaluatedWeak(E); // Just in case we're building an illegal pointer-to-member. FieldDecl *FD = dyn_cast(D); if (FD && FD->isBitField()) E->setObjectKind(OK_BitField); return E; } /// Decomposes the given name into a DeclarationNameInfo, its location, and /// possibly a list of template arguments. /// /// If this produces template arguments, it is permitted to call /// DecomposeTemplateName. /// /// This actually loses a lot of source location information for /// non-standard name kinds; we should consider preserving that in /// some way. void Sema::DecomposeUnqualifiedId(const UnqualifiedId &Id, TemplateArgumentListInfo &Buffer, DeclarationNameInfo &NameInfo, const TemplateArgumentListInfo *&TemplateArgs) { if (Id.getKind() == UnqualifiedId::IK_TemplateId) { Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc); Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc); ASTTemplateArgsPtr TemplateArgsPtr(Id.TemplateId->getTemplateArgs(), Id.TemplateId->NumArgs); translateTemplateArguments(TemplateArgsPtr, Buffer); TemplateName TName = Id.TemplateId->Template.get(); SourceLocation TNameLoc = Id.TemplateId->TemplateNameLoc; NameInfo = Context.getNameForTemplate(TName, TNameLoc); TemplateArgs = &Buffer; } else { NameInfo = GetNameFromUnqualifiedId(Id); TemplateArgs = nullptr; } } static void emitEmptyLookupTypoDiagnostic( const TypoCorrection &TC, Sema &SemaRef, const CXXScopeSpec &SS, DeclarationName Typo, SourceLocation TypoLoc, ArrayRef Args, unsigned DiagnosticID, unsigned DiagnosticSuggestID) { DeclContext *Ctx = SS.isEmpty() ? nullptr : SemaRef.computeDeclContext(SS, false); if (!TC) { // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (Ctx) SemaRef.Diag(TypoLoc, diag::err_no_member) << Typo << Ctx << SS.getRange(); else SemaRef.Diag(TypoLoc, DiagnosticID) << Typo; return; } std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts()); bool DroppedSpecifier = TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr; unsigned NoteID = TC.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (!Ctx) SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo, SemaRef.PDiag(NoteID)); else SemaRef.diagnoseTypo(TC, SemaRef.PDiag(diag::err_no_member_suggest) << Typo << Ctx << DroppedSpecifier << SS.getRange(), SemaRef.PDiag(NoteID)); } /// Diagnose an empty lookup. /// /// \return false if new lookup candidates were found bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, std::unique_ptr CCC, TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, TypoExpr **Out) { DeclarationName Name = R.getLookupName(); unsigned diagnostic = diag::err_undeclared_var_use; unsigned diagnostic_suggest = diag::err_undeclared_var_use_suggest; if (Name.getNameKind() == DeclarationName::CXXOperatorName || Name.getNameKind() == DeclarationName::CXXLiteralOperatorName || Name.getNameKind() == DeclarationName::CXXConversionFunctionName) { diagnostic = diag::err_undeclared_use; diagnostic_suggest = diag::err_undeclared_use_suggest; } // If the original lookup was an unqualified lookup, fake an // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. DeclContext *DC = SS.isEmpty() ? CurContext : nullptr; while (DC) { if (isa(DC)) { LookupQualifiedName(R, DC); if (!R.empty()) { // Don't give errors about ambiguities in this lookup. R.suppressDiagnostics(); // During a default argument instantiation the CurContext points // to a CXXMethodDecl; but we can't apply a this-> fixit inside a // function parameter list, hence add an explicit check. bool isDefaultArgument = !ActiveTemplateInstantiations.empty() && ActiveTemplateInstantiations.back().Kind == ActiveTemplateInstantiation::DefaultFunctionArgumentInstantiation; CXXMethodDecl *CurMethod = dyn_cast(CurContext); bool isInstance = CurMethod && CurMethod->isInstance() && DC == CurMethod->getParent() && !isDefaultArgument; // Give a code modification hint to insert 'this->'. // TODO: fixit for inserting 'Base::' in the other cases. // Actually quite difficult! if (getLangOpts().MSVCCompat) diagnostic = diag::ext_found_via_dependent_bases_lookup; if (isInstance) { Diag(R.getNameLoc(), diagnostic) << Name << FixItHint::CreateInsertion(R.getNameLoc(), "this->"); CheckCXXThisCapture(R.getNameLoc()); } else { Diag(R.getNameLoc(), diagnostic) << Name; } // Do we really want to note all of these? for (NamedDecl *D : R) Diag(D->getLocation(), diag::note_dependent_var_use); // Return true if we are inside a default argument instantiation // and the found name refers to an instance member function, otherwise // the function calling DiagnoseEmptyLookup will try to create an // implicit member call and this is wrong for default argument. if (isDefaultArgument && ((*R.begin())->isCXXInstanceMember())) { Diag(R.getNameLoc(), diag::err_member_call_without_object); return true; } // Tell the callee to try to recover. return false; } R.clear(); } // In Microsoft mode, if we are performing lookup from within a friend // function definition declared at class scope then we must set // DC to the lexical parent to be able to search into the parent // class. if (getLangOpts().MSVCCompat && isa(DC) && cast(DC)->getFriendObjectKind() && DC->getLexicalParent()->isRecord()) DC = DC->getLexicalParent(); else DC = DC->getParent(); } // We didn't find anything, so try to correct for a typo. TypoCorrection Corrected; if (S && Out) { SourceLocation TypoLoc = R.getNameLoc(); assert(!ExplicitTemplateArgs && "Diagnosing an empty lookup with explicit template args!"); *Out = CorrectTypoDelayed( R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC), [=](const TypoCorrection &TC) { emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args, diagnostic, diagnostic_suggest); }, nullptr, CTK_ErrorRecovery); if (*Out) return true; } else if (S && (Corrected = CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S, &SS, std::move(CCC), CTK_ErrorRecovery))) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); bool DroppedSpecifier = Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr; R.setLookupName(Corrected.getCorrection()); bool AcceptableWithRecovery = false; bool AcceptableWithoutRecovery = false; NamedDecl *ND = Corrected.getFoundDecl(); if (ND) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(R.getNameLoc(), OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionTemplateDecl *FTD = dyn_cast(CD)) AddTemplateOverloadCandidate( FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs, Args, OCS); else if (FunctionDecl *FD = dyn_cast(CD)) if (!ExplicitTemplateArgs || ExplicitTemplateArgs->size() == 0) AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: // FIXME: Arbitrarily pick the first declaration for the note. Corrected.setCorrectionDecl(ND); break; } } R.addDecl(ND); if (getLangOpts().CPlusPlus && ND->isCXXClassMember()) { CXXRecordDecl *Record = nullptr; if (Corrected.getCorrectionSpecifier()) { const Type *Ty = Corrected.getCorrectionSpecifier()->getAsType(); Record = Ty->getAsCXXRecordDecl(); } if (!Record) Record = cast( ND->getDeclContext()->getRedeclContext()); R.setNamingClass(Record); } auto *UnderlyingND = ND->getUnderlyingDecl(); AcceptableWithRecovery = isa(UnderlyingND) || isa(UnderlyingND); // FIXME: If we ended up with a typo for a type name or // Objective-C class name, we're in trouble because the parser // is in the wrong place to recover. Suggest the typo // correction, but don't make it a fix-it since we're not going // to recover well anyway. AcceptableWithoutRecovery = isa(UnderlyingND) || isa(UnderlyingND); } else { // FIXME: We found a keyword. Suggest it, but don't provide a fix-it // because we aren't able to recover. AcceptableWithoutRecovery = true; } if (AcceptableWithRecovery || AcceptableWithoutRecovery) { unsigned NoteID = Corrected.getCorrectionDeclAs() ? diag::note_implicit_param_decl : diag::note_previous_decl; if (SS.isEmpty()) diagnoseTypo(Corrected, PDiag(diagnostic_suggest) << Name, PDiag(NoteID), AcceptableWithRecovery); else diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest) << Name << computeDeclContext(SS, false) << DroppedSpecifier << SS.getRange(), PDiag(NoteID), AcceptableWithRecovery); // Tell the callee whether to try to recover. return !AcceptableWithRecovery; } } R.clear(); // Emit a special diagnostic for failed member lookups. // FIXME: computing the declaration context might fail here (?) if (!SS.isEmpty()) { Diag(R.getNameLoc(), diag::err_no_member) << Name << computeDeclContext(SS, false) << SS.getRange(); return true; } // Give up, we can't recover. Diag(R.getNameLoc(), diagnostic) << Name; return true; } /// In Microsoft mode, if we are inside a template class whose parent class has /// dependent base classes, and we can't resolve an unqualified identifier, then /// assume the identifier is a member of a dependent base class. We can only /// recover successfully in static methods, instance methods, and other contexts /// where 'this' is available. This doesn't precisely match MSVC's /// instantiation model, but it's close enough. static Expr * recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context, DeclarationNameInfo &NameInfo, SourceLocation TemplateKWLoc, const TemplateArgumentListInfo *TemplateArgs) { // Only try to recover from lookup into dependent bases in static methods or // contexts where 'this' is available. QualType ThisType = S.getCurrentThisType(); const CXXRecordDecl *RD = nullptr; if (!ThisType.isNull()) RD = ThisType->getPointeeType()->getAsCXXRecordDecl(); else if (auto *MD = dyn_cast(S.CurContext)) RD = MD->getParent(); if (!RD || !RD->hasAnyDependentBases()) return nullptr; // Diagnose this as unqualified lookup into a dependent base class. If 'this' // is available, suggest inserting 'this->' as a fixit. SourceLocation Loc = NameInfo.getLoc(); auto DB = S.Diag(Loc, diag::ext_undeclared_unqual_id_with_dependent_base); DB << NameInfo.getName() << RD; if (!ThisType.isNull()) { DB << FixItHint::CreateInsertion(Loc, "this->"); return CXXDependentScopeMemberExpr::Create( Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true, /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc, /*FirstQualifierInScope=*/nullptr, NameInfo, TemplateArgs); } // Synthesize a fake NNS that points to the derived class. This will // perform name lookup during template instantiation. CXXScopeSpec SS; auto *NNS = NestedNameSpecifier::Create(Context, nullptr, true, RD->getTypeForDecl()); SS.MakeTrivial(Context, NNS, SourceRange(Loc, Loc)); return DependentScopeDeclRefExpr::Create( Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo, TemplateArgs); } ExprResult Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc, UnqualifiedId &Id, bool HasTrailingLParen, bool IsAddressOfOperand, std::unique_ptr CCC, bool IsInlineAsmIdentifier, Token *KeywordReplacement) { assert(!(IsAddressOfOperand && HasTrailingLParen) && "cannot be direct & operand and have a trailing lparen"); if (SS.isInvalid()) return ExprError(); TemplateArgumentListInfo TemplateArgsBuffer; // Decompose the UnqualifiedId into the following data. DeclarationNameInfo NameInfo; const TemplateArgumentListInfo *TemplateArgs; DecomposeUnqualifiedId(Id, TemplateArgsBuffer, NameInfo, TemplateArgs); DeclarationName Name = NameInfo.getName(); IdentifierInfo *II = Name.getAsIdentifierInfo(); SourceLocation NameLoc = NameInfo.getLoc(); // C++ [temp.dep.expr]p3: // An id-expression is type-dependent if it contains: // -- an identifier that was declared with a dependent type, // (note: handled after lookup) // -- a template-id that is dependent, // (note: handled in BuildTemplateIdExpr) // -- a conversion-function-id that specifies a dependent type, // -- a nested-name-specifier that contains a class-name that // names a dependent type. // Determine whether this is a member of an unknown specialization; // we need to handle these differently. bool DependentID = false; if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName && Name.getCXXNameType()->isDependentType()) { DependentID = true; } else if (SS.isSet()) { if (DeclContext *DC = computeDeclContext(SS, false)) { if (RequireCompleteDeclContext(SS, DC)) return ExprError(); } else { DependentID = true; } } if (DependentID) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // Perform the required lookup. LookupResult R(*this, NameInfo, (Id.getKind() == UnqualifiedId::IK_ImplicitSelfParam) ? LookupObjCImplicitSelfParam : LookupOrdinaryName); if (TemplateArgs) { // Lookup the template name again to correctly establish the context in // which it was found. This is really unfortunate as we already did the // lookup to determine that it was a template name in the first place. If // this becomes a performance hit, we can work harder to preserve those // results until we get here but it's likely not worth it. bool MemberOfUnknownSpecialization; LookupTemplateName(R, S, SS, QualType(), /*EnteringContext=*/false, MemberOfUnknownSpecialization); if (MemberOfUnknownSpecialization || (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation)) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); } else { bool IvarLookupFollowUp = II && !SS.isSet() && getCurMethodDecl(); LookupParsedName(R, S, &SS, !IvarLookupFollowUp); // If the result might be in a dependent base class, this is a dependent // id-expression. if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo, IsAddressOfOperand, TemplateArgs); // If this reference is in an Objective-C method, then we need to do // some special Objective-C lookup, too. if (IvarLookupFollowUp) { ExprResult E(LookupInObjCMethod(R, S, II, true)); if (E.isInvalid()) return ExprError(); if (Expr *Ex = E.getAs()) return Ex; } } if (R.isAmbiguous()) return ExprError(); // This could be an implicitly declared function reference (legal in C90, // extension in C99, forbidden in C++). if (R.empty() && HasTrailingLParen && II && !getLangOpts().CPlusPlus) { NamedDecl *D = ImplicitlyDefineFunction(NameLoc, *II, S); if (D) R.addDecl(D); } // Determine whether this name might be a candidate for // argument-dependent lookup. bool ADL = UseArgumentDependentLookup(SS, R, HasTrailingLParen); if (R.empty() && !ADL) { if (SS.isEmpty() && getLangOpts().MSVCCompat) { if (Expr *E = recoverFromMSUnqualifiedLookup(*this, Context, NameInfo, TemplateKWLoc, TemplateArgs)) return E; } // Don't diagnose an empty lookup for inline assembly. if (IsInlineAsmIdentifier) return ExprError(); // If this name wasn't predeclared and if this is not a function // call, diagnose the problem. TypoExpr *TE = nullptr; auto DefaultValidator = llvm::make_unique( II, SS.isValid() ? SS.getScopeRep() : nullptr); DefaultValidator->IsAddressOfOperand = IsAddressOfOperand; assert((!CCC || CCC->IsAddressOfOperand == IsAddressOfOperand) && "Typo correction callback misconfigured"); if (CCC) { // Make sure the callback knows what the typo being diagnosed is. CCC->setTypoName(II); if (SS.isValid()) CCC->setTypoNNS(SS.getScopeRep()); } if (DiagnoseEmptyLookup(S, SS, R, CCC ? std::move(CCC) : std::move(DefaultValidator), nullptr, None, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); if (BestTC.isKeyword()) { auto *II = BestTC.getCorrectionAsIdentifierInfo(); if (State.DiagHandler) State.DiagHandler(BestTC); KeywordReplacement->startToken(); KeywordReplacement->setKind(II->getTokenID()); KeywordReplacement->setIdentifierInfo(II); KeywordReplacement->setLocation(BestTC.getCorrectionRange().getBegin()); // Clean up the state associated with the TypoExpr, since it has // now been diagnosed (without a call to CorrectDelayedTyposInExpr). clearDelayedTypo(TE); // Signal that a correction to a keyword was performed by returning a // valid-but-null ExprResult. return (Expr*)nullptr; } State.Consumer->resetCorrectionStream(); } return TE ? TE : ExprError(); } assert(!R.empty() && "DiagnoseEmptyLookup returned false but added no results"); // If we found an Objective-C instance variable, let // LookupInObjCMethod build the appropriate expression to // reference the ivar. if (ObjCIvarDecl *Ivar = R.getAsSingle()) { R.clear(); ExprResult E(LookupInObjCMethod(R, S, Ivar->getIdentifier())); // In a hopelessly buggy code, Objective-C instance variable // lookup fails and no expression will be built to reference it. if (!E.isInvalid() && !E.get()) return ExprError(); return E; } } // This is guaranteed from this point on. assert(!R.empty() || ADL); // Check whether this might be a C++ implicit instance member access. // C++ [class.mfct.non-static]p3: // When an id-expression that is not part of a class member access // syntax and not used to form a pointer to member is used in the // body of a non-static member function of class X, if name lookup // resolves the name in the id-expression to a non-static non-type // member of some class C, the id-expression is transformed into a // class member access expression using (*this) as the // postfix-expression to the left of the . operator. // // But we don't actually need to do this for '&' operands if R // resolved to a function or overloaded function set, because the // expression is ill-formed if it actually works out to be a // non-static member function: // // C++ [expr.ref]p4: // Otherwise, if E1.E2 refers to a non-static member function. . . // [t]he expression can be used only as the left-hand operand of a // member function call. // // There are other safeguards against such uses, but it's important // to get this right here so that we don't end up making a // spuriously dependent expression if we're inside a dependent // instance method. if (!R.empty() && (*R.begin())->isCXXClassMember()) { bool MightBeImplicitMember; if (!IsAddressOfOperand) MightBeImplicitMember = true; else if (!SS.isEmpty()) MightBeImplicitMember = false; else if (R.isOverloadedResult()) MightBeImplicitMember = false; else if (R.isUnresolvableResult()) MightBeImplicitMember = true; else MightBeImplicitMember = isa(R.getFoundDecl()) || isa(R.getFoundDecl()) || isa(R.getFoundDecl()); if (MightBeImplicitMember) return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, S); } if (TemplateArgs || TemplateKWLoc.isValid()) { // In C++1y, if this is a variable template id, then check it // in BuildTemplateIdExpr(). // The single lookup result must be a variable template declaration. if (Id.getKind() == UnqualifiedId::IK_TemplateId && Id.TemplateId && Id.TemplateId->Kind == TNK_Var_template) { assert(R.getAsSingle() && "There should only be one declaration found."); } return BuildTemplateIdExpr(SS, TemplateKWLoc, R, ADL, TemplateArgs); } return BuildDeclarationNameExpr(SS, R, ADL); } /// BuildQualifiedDeclarationNameExpr - Build a C++ qualified /// declaration name, generally during template instantiation. /// There's a large number of things which don't need to be done along /// this path. ExprResult Sema::BuildQualifiedDeclarationNameExpr( CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, bool IsAddressOfOperand, const Scope *S, TypeSourceInfo **RecoveryTSI) { DeclContext *DC = computeDeclContext(SS, false); if (!DC) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (RequireCompleteDeclContext(SS, DC)) return ExprError(); LookupResult R(*this, NameInfo, LookupOrdinaryName); LookupQualifiedName(R, DC); if (R.isAmbiguous()) return ExprError(); if (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation) return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(), NameInfo, /*TemplateArgs=*/nullptr); if (R.empty()) { Diag(NameInfo.getLoc(), diag::err_no_member) << NameInfo.getName() << DC << SS.getRange(); return ExprError(); } if (const TypeDecl *TD = R.getAsSingle()) { // Diagnose a missing typename if this resolved unambiguously to a type in // a dependent context. If we can recover with a type, downgrade this to // a warning in Microsoft compatibility mode. unsigned DiagID = diag::err_typename_missing; if (RecoveryTSI && getLangOpts().MSVCCompat) DiagID = diag::ext_typename_missing; SourceLocation Loc = SS.getBeginLoc(); auto D = Diag(Loc, DiagID); D << SS.getScopeRep() << NameInfo.getName().getAsString() << SourceRange(Loc, NameInfo.getEndLoc()); // Don't recover if the caller isn't expecting us to or if we're in a SFINAE // context. if (!RecoveryTSI) return ExprError(); // Only issue the fixit if we're prepared to recover. D << FixItHint::CreateInsertion(Loc, "typename "); // Recover by pretending this was an elaborated type. QualType Ty = Context.getTypeDeclType(TD); TypeLocBuilder TLB; TLB.pushTypeSpec(Ty).setNameLoc(NameInfo.getLoc()); QualType ET = getElaboratedType(ETK_None, SS, Ty); ElaboratedTypeLoc QTL = TLB.push(ET); QTL.setElaboratedKeywordLoc(SourceLocation()); QTL.setQualifierLoc(SS.getWithLocInContext(Context)); *RecoveryTSI = TLB.getTypeSourceInfo(Context, ET); return ExprEmpty(); } // Defend against this resolving to an implicit member access. We usually // won't get here if this might be a legitimate a class member (we end up in // BuildMemberReferenceExpr instead), but this can be valid if we're forming // a pointer-to-member or in an unevaluated context in C++11. if (!R.empty() && (*R.begin())->isCXXClassMember() && !IsAddressOfOperand) return BuildPossibleImplicitMemberExpr(SS, /*TemplateKWLoc=*/SourceLocation(), R, /*TemplateArgs=*/nullptr, S); return BuildDeclarationNameExpr(SS, R, /* ADL */ false); } /// LookupInObjCMethod - The parser has read a name in, and Sema has /// detected that we're currently inside an ObjC method. Perform some /// additional lookup. /// /// Ideally, most of this would be done by lookup, but there's /// actually quite a lot of extra work involved. /// /// Returns a null sentinel to indicate trivial success. ExprResult Sema::LookupInObjCMethod(LookupResult &Lookup, Scope *S, IdentifierInfo *II, bool AllowBuiltinCreation) { SourceLocation Loc = Lookup.getNameLoc(); ObjCMethodDecl *CurMethod = getCurMethodDecl(); // Check for error condition which is already reported. if (!CurMethod) return ExprError(); // There are two cases to handle here. 1) scoped lookup could have failed, // in which case we should look for an ivar. 2) scoped lookup could have // found a decl, but that decl is outside the current instance method (i.e. // a global variable). In these two cases, we do a lookup for an ivar with // this name, if the lookup sucedes, we replace it our current decl. // If we're in a class method, we don't normally want to look for // ivars. But if we don't find anything else, and there's an // ivar, that's an error. bool IsClassMethod = CurMethod->isClassMethod(); bool LookForIvars; if (Lookup.empty()) LookForIvars = true; else if (IsClassMethod) LookForIvars = false; else LookForIvars = (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()); ObjCInterfaceDecl *IFace = nullptr; if (LookForIvars) { IFace = CurMethod->getClassInterface(); ObjCInterfaceDecl *ClassDeclared; ObjCIvarDecl *IV = nullptr; if (IFace && (IV = IFace->lookupInstanceVariable(II, ClassDeclared))) { // Diagnose using an ivar in a class method. if (IsClassMethod) return ExprError(Diag(Loc, diag::error_ivar_use_in_class_method) << IV->getDeclName()); // If we're referencing an invalid decl, just return this as a silent // error node. The error diagnostic was already emitted on the decl. if (IV->isInvalidDecl()) return ExprError(); // Check if referencing a field with __attribute__((deprecated)). if (DiagnoseUseOfDecl(IV, Loc)) return ExprError(); // Diagnose the use of an ivar outside of the declaring class. if (IV->getAccessControl() == ObjCIvarDecl::Private && !declaresSameEntity(ClassDeclared, IFace) && !getLangOpts().DebuggerSupport) Diag(Loc, diag::error_private_ivar_access) << IV->getDeclName(); // FIXME: This should use a new expr for a direct reference, don't // turn this into Self->ivar, just return a BareIVarExpr or something. IdentifierInfo &II = Context.Idents.get("self"); UnqualifiedId SelfName; SelfName.setIdentifier(&II, SourceLocation()); SelfName.setKind(UnqualifiedId::IK_ImplicitSelfParam); CXXScopeSpec SelfScopeSpec; SourceLocation TemplateKWLoc; ExprResult SelfExpr = ActOnIdExpression(S, SelfScopeSpec, TemplateKWLoc, SelfName, false, false); if (SelfExpr.isInvalid()) return ExprError(); SelfExpr = DefaultLvalueConversion(SelfExpr.get()); if (SelfExpr.isInvalid()) return ExprError(); MarkAnyDeclReferenced(Loc, IV, true); ObjCMethodFamily MF = CurMethod->getMethodFamily(); if (MF != OMF_init && MF != OMF_dealloc && MF != OMF_finalize && !IvarBacksCurrentMethodAccessor(IFace, CurMethod, IV)) Diag(Loc, diag::warn_direct_ivar_access) << IV->getDeclName(); ObjCIvarRefExpr *Result = new (Context) ObjCIvarRefExpr(IV, IV->getUsageType(SelfExpr.get()->getType()), Loc, IV->getLocation(), SelfExpr.get(), true, true); if (getLangOpts().ObjCAutoRefCount) { if (IV->getType().getObjCLifetime() == Qualifiers::OCL_Weak) { if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, Loc)) recordUseOfEvaluatedWeak(Result); } if (CurContext->isClosure()) Diag(Loc, diag::warn_implicitly_retains_self) << FixItHint::CreateInsertion(Loc, "self->"); } return Result; } } else if (CurMethod->isInstanceMethod()) { // We should warn if a local variable hides an ivar. if (ObjCInterfaceDecl *IFace = CurMethod->getClassInterface()) { ObjCInterfaceDecl *ClassDeclared; if (ObjCIvarDecl *IV = IFace->lookupInstanceVariable(II, ClassDeclared)) { if (IV->getAccessControl() != ObjCIvarDecl::Private || declaresSameEntity(IFace, ClassDeclared)) Diag(Loc, diag::warn_ivar_use_hidden) << IV->getDeclName(); } } } else if (Lookup.isSingleResult() && Lookup.getFoundDecl()->isDefinedOutsideFunctionOrMethod()) { // If accessing a stand-alone ivar in a class method, this is an error. if (const ObjCIvarDecl *IV = dyn_cast(Lookup.getFoundDecl())) return ExprError(Diag(Loc, diag::error_ivar_use_in_class_method) << IV->getDeclName()); } if (Lookup.empty() && II && AllowBuiltinCreation) { // FIXME. Consolidate this with similar code in LookupName. if (unsigned BuiltinID = II->getBuiltinID()) { if (!(getLangOpts().CPlusPlus && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))) { NamedDecl *D = LazilyCreateBuiltin((IdentifierInfo *)II, BuiltinID, S, Lookup.isForRedeclaration(), Lookup.getNameLoc()); if (D) Lookup.addDecl(D); } } } // Sentinel value saying that we didn't do anything special. return ExprResult((Expr *)nullptr); } /// \brief Cast a base object to a member's actual type. /// /// Logically this happens in three phases: /// /// * First we cast from the base type to the naming class. /// The naming class is the class into which we were looking /// when we found the member; it's the qualifier type if a /// qualifier was provided, and otherwise it's the base type. /// /// * Next we cast from the naming class to the declaring class. /// If the member we found was brought into a class's scope by /// a using declaration, this is that class; otherwise it's /// the class declaring the member. /// /// * Finally we cast from the declaring class to the "true" /// declaring class of the member. This conversion does not /// obey access control. ExprResult Sema::PerformObjectMemberConversion(Expr *From, NestedNameSpecifier *Qualifier, NamedDecl *FoundDecl, NamedDecl *Member) { CXXRecordDecl *RD = dyn_cast(Member->getDeclContext()); if (!RD) return From; QualType DestRecordType; QualType DestType; QualType FromRecordType; QualType FromType = From->getType(); bool PointerConversions = false; if (isa(Member)) { DestRecordType = Context.getCanonicalType(Context.getTypeDeclType(RD)); if (FromType->getAs()) { DestType = Context.getPointerType(DestRecordType); FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { DestType = DestRecordType; FromRecordType = FromType; } } else if (CXXMethodDecl *Method = dyn_cast(Member)) { if (Method->isStatic()) return From; DestType = Method->getThisType(Context); DestRecordType = DestType->getPointeeType(); if (FromType->getAs()) { FromRecordType = FromType->getPointeeType(); PointerConversions = true; } else { FromRecordType = FromType; DestType = DestRecordType; } } else { // No conversion necessary. return From; } if (DestType->isDependentType() || FromType->isDependentType()) return From; // If the unqualified types are the same, no conversion is necessary. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; SourceRange FromRange = From->getSourceRange(); SourceLocation FromLoc = FromRange.getBegin(); ExprValueKind VK = From->getValueKind(); // C++ [class.member.lookup]p8: // [...] Ambiguities can often be resolved by qualifying a name with its // class name. // // If the member was a qualified name and the qualified referred to a // specific base subobject type, we'll cast to that intermediate type // first and then to the object in which the member is declared. That allows // one to resolve ambiguities in, e.g., a diamond-shaped hierarchy such as: // // class Base { public: int x; }; // class Derived1 : public Base { }; // class Derived2 : public Base { }; // class VeryDerived : public Derived1, public Derived2 { void f(); }; // // void VeryDerived::f() { // x = 17; // error: ambiguous base subobjects // Derived1::x = 17; // okay, pick the Base subobject of Derived1 // } if (Qualifier && Qualifier->getAsType()) { QualType QType = QualType(Qualifier->getAsType(), 0); assert(QType->isRecordType() && "lookup done with non-record type"); QualType QRecordType = QualType(QType->getAs(), 0); // In C++98, the qualifier type doesn't actually have to be a base // type of the object type, in which case we just ignore it. // Otherwise build the appropriate casts. if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) { CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, QRecordType, FromLoc, FromRange, &BasePath)) return ExprError(); if (PointerConversions) QType = Context.getPointerType(QType); From = ImpCastExprToType(From, QType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = QType; FromRecordType = QRecordType; // If the qualifier type was the same as the destination type, // we're done. if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType)) return From; } } bool IgnoreAccess = false; // If we actually found the member through a using declaration, cast // down to the using declaration's type. // // Pointer equality is fine here because only one declaration of a // class ever has member declarations. if (FoundDecl->getDeclContext() != Member->getDeclContext()) { assert(isa(FoundDecl)); QualType URecordType = Context.getTypeDeclType( cast(FoundDecl->getDeclContext())); // We only need to do this if the naming-class to declaring-class // conversion is non-trivial. if (!Context.hasSameUnqualifiedType(FromRecordType, URecordType)) { assert(IsDerivedFrom(FromLoc, FromRecordType, URecordType)); CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, URecordType, FromLoc, FromRange, &BasePath)) return ExprError(); QualType UType = URecordType; if (PointerConversions) UType = Context.getPointerType(UType); From = ImpCastExprToType(From, UType, CK_UncheckedDerivedToBase, VK, &BasePath).get(); FromType = UType; FromRecordType = URecordType; } // We don't do access control for the conversion from the // declaring class to the true declaring class. IgnoreAccess = true; } CXXCastPath BasePath; if (CheckDerivedToBaseConversion(FromRecordType, DestRecordType, FromLoc, FromRange, &BasePath, IgnoreAccess)) return ExprError(); return ImpCastExprToType(From, DestType, CK_UncheckedDerivedToBase, VK, &BasePath); } bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, const LookupResult &R, bool HasTrailingLParen) { // Only when used directly as the postfix-expression of a call. if (!HasTrailingLParen) return false; // Never if a scope specifier was provided. if (SS.isSet()) return false; // Only in C++ or ObjC++. if (!getLangOpts().CPlusPlus) return false; // Turn off ADL when we find certain kinds of declarations during // normal lookup: for (NamedDecl *D : R) { // C++0x [basic.lookup.argdep]p3: // -- a declaration of a class member // Since using decls preserve this property, we check this on the // original decl. if (D->isCXXClassMember()) return false; // C++0x [basic.lookup.argdep]p3: // -- a block-scope function declaration that is not a // using-declaration // NOTE: we also trigger this for function templates (in fact, we // don't check the decl type at all, since all other decl types // turn off ADL anyway). if (isa(D)) D = cast(D)->getTargetDecl(); else if (D->getLexicalDeclContext()->isFunctionOrMethod()) return false; // C++0x [basic.lookup.argdep]p3: // -- a declaration that is neither a function or a function // template // And also for builtin functions. if (isa(D)) { FunctionDecl *FDecl = cast(D); // But also builtin functions. if (FDecl->getBuiltinID() && FDecl->isImplicit()) return false; } else if (!isa(D)) return false; } return true; } /// Diagnoses obvious problems with the use of the given declaration /// as an expression. This is only actually called for lookups that /// were not overloaded, and it doesn't promise that the declaration /// will in fact be used. static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D) { if (isa(D)) { S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_interface) << D->getDeclName(); return true; } if (isa(D)) { S.Diag(Loc, diag::err_unexpected_namespace) << D->getDeclName(); return true; } return false; } ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, bool AcceptInvalidDecl) { // If this is a single, fully-resolved result and we don't need ADL, // just build an ordinary singleton decl ref. if (!NeedsADL && R.isSingleResult() && !R.getAsSingle()) return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(), R.getRepresentativeDecl(), nullptr, AcceptInvalidDecl); // We only need to check the declaration if there's exactly one // result, because in the overloaded case the results can only be // functions and function templates. if (R.isSingleResult() && CheckDeclInExpr(*this, R.getNameLoc(), R.getFoundDecl())) return ExprError(); // Otherwise, just build an unresolved lookup expression. Suppress // any lookup-related diagnostics; we'll hash these out later, when // we've picked a target. R.suppressDiagnostics(); UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(Context, R.getNamingClass(), SS.getWithLocInContext(Context), R.getLookupNameInfo(), NeedsADL, R.isOverloadedResult(), R.begin(), R.end()); return ULE; } /// \brief Complete semantic analysis for a reference to the given declaration. ExprResult Sema::BuildDeclarationNameExpr( const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D, NamedDecl *FoundD, const TemplateArgumentListInfo *TemplateArgs, bool AcceptInvalidDecl) { assert(D && "Cannot refer to a NULL declaration"); assert(!isa(D) && "Cannot refer unambiguously to a function template"); SourceLocation Loc = NameInfo.getLoc(); if (CheckDeclInExpr(*this, Loc, D)) return ExprError(); if (TemplateDecl *Template = dyn_cast(D)) { // Specifically diagnose references to class templates that are missing // a template argument list. Diag(Loc, diag::err_template_decl_ref) << (isa(D) ? 1 : 0) << Template << SS.getRange(); Diag(Template->getLocation(), diag::note_template_decl_here); return ExprError(); } // Make sure that we're referring to a value. ValueDecl *VD = dyn_cast(D); if (!VD) { Diag(Loc, diag::err_ref_non_value) << D << SS.getRange(); Diag(D->getLocation(), diag::note_declared_at); return ExprError(); } // Check whether this declaration can be used. Note that we suppress // this check when we're going to perform argument-dependent lookup // on this function name, because this might not be the function // that overload resolution actually selects. if (DiagnoseUseOfDecl(VD, Loc)) return ExprError(); // Only create DeclRefExpr's for valid Decl's. if (VD->isInvalidDecl() && !AcceptInvalidDecl) return ExprError(); // Handle members of anonymous structs and unions. If we got here, // and the reference is to a class member indirect field, then this // must be the subject of a pointer-to-member expression. if (IndirectFieldDecl *indirectField = dyn_cast(VD)) if (!indirectField->isCXXClassMember()) return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(), indirectField); { QualType type = VD->getType(); ExprValueKind valueKind = VK_RValue; switch (D->getKind()) { // Ignore all the non-ValueDecl kinds. #define ABSTRACT_DECL(kind) #define VALUE(type, base) #define DECL(type, base) \ case Decl::type: #include "clang/AST/DeclNodes.inc" llvm_unreachable("invalid value decl kind"); // These shouldn't make it here. case Decl::ObjCAtDefsField: case Decl::ObjCIvar: llvm_unreachable("forming non-member reference to ivar?"); // Enum constants are always r-values and never references. // Unresolved using declarations are dependent. case Decl::EnumConstant: case Decl::UnresolvedUsingValue: valueKind = VK_RValue; break; // Fields and indirect fields that got here must be for // pointer-to-member expressions; we just call them l-values for // internal consistency, because this subexpression doesn't really // exist in the high-level semantics. case Decl::Field: case Decl::IndirectField: assert(getLangOpts().CPlusPlus && "building reference to field in C?"); // These can't have reference type in well-formed programs, but // for internal consistency we do this anyway. type = type.getNonReferenceType(); valueKind = VK_LValue; break; // Non-type template parameters are either l-values or r-values // depending on the type. case Decl::NonTypeTemplateParm: { if (const ReferenceType *reftype = type->getAs()) { type = reftype->getPointeeType(); valueKind = VK_LValue; // even if the parameter is an r-value reference break; } // For non-references, we need to strip qualifiers just in case // the template parameter was declared as 'const int' or whatever. valueKind = VK_RValue; type = type.getUnqualifiedType(); break; } case Decl::Var: case Decl::VarTemplateSpecialization: case Decl::VarTemplatePartialSpecialization: // In C, "extern void blah;" is valid and is an r-value. if (!getLangOpts().CPlusPlus && !type.hasQualifiers() && type->isVoidType()) { valueKind = VK_RValue; break; } // fallthrough case Decl::ImplicitParam: case Decl::ParmVar: { // These are always l-values. valueKind = VK_LValue; type = type.getNonReferenceType(); // FIXME: Does the addition of const really only apply in // potentially-evaluated contexts? Since the variable isn't actually // captured in an unevaluated context, it seems that the answer is no. if (!isUnevaluatedContext()) { QualType CapturedType = getCapturedDeclRefType(cast(VD), Loc); if (!CapturedType.isNull()) type = CapturedType; } break; } case Decl::Function: { if (unsigned BID = cast(VD)->getBuiltinID()) { if (!Context.BuiltinInfo.isPredefinedLibFunction(BID)) { type = Context.BuiltinFnTy; valueKind = VK_RValue; break; } } const FunctionType *fty = type->castAs(); // If we're referring to a function with an __unknown_anytype // result type, make the entire expression __unknown_anytype. if (fty->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_RValue; break; } // Functions are l-values in C++. if (getLangOpts().CPlusPlus) { valueKind = VK_LValue; break; } // C99 DR 316 says that, if a function type comes from a // function definition (without a prototype), that type is only // used for checking compatibility. Therefore, when referencing // the function, we pretend that we don't have the full function // type. if (!cast(VD)->hasPrototype() && isa(fty)) type = Context.getFunctionNoProtoType(fty->getReturnType(), fty->getExtInfo()); // Functions are r-values in C. valueKind = VK_RValue; break; } case Decl::MSProperty: valueKind = VK_LValue; break; case Decl::CXXMethod: // If we're referring to a method with an __unknown_anytype // result type, make the entire expression __unknown_anytype. // This should only be possible with a type written directly. if (const FunctionProtoType *proto = dyn_cast(VD->getType())) if (proto->getReturnType() == Context.UnknownAnyTy) { type = Context.UnknownAnyTy; valueKind = VK_RValue; break; } // C++ methods are l-values if static, r-values if non-static. if (cast(VD)->isStatic()) { valueKind = VK_LValue; break; } // fallthrough case Decl::CXXConversion: case Decl::CXXDestructor: case Decl::CXXConstructor: valueKind = VK_RValue; break; } return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD, TemplateArgs); } } static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, SmallString<32> &Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char *ResultPtr = &Target[0]; const UTF8 *ErrorPtr; bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); (void)success; assert(success); Target.resize(ResultPtr - &Target[0]); } ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc, PredefinedExpr::IdentType IT) { // Pick the current block, lambda, captured statement or function. Decl *currentDecl = nullptr; if (const BlockScopeInfo *BSI = getCurBlock()) currentDecl = BSI->TheDecl; else if (const LambdaScopeInfo *LSI = getCurLambda()) currentDecl = LSI->CallOperator; else if (const CapturedRegionScopeInfo *CSI = getCurCapturedRegion()) currentDecl = CSI->TheCapturedDecl; else currentDecl = getCurFunctionOrMethodDecl(); if (!currentDecl) { Diag(Loc, diag::ext_predef_outside_function); currentDecl = Context.getTranslationUnitDecl(); } QualType ResTy; StringLiteral *SL = nullptr; if (cast(currentDecl)->isDependentContext()) ResTy = Context.DependentTy; else { // Pre-defined identifiers are of type char[x], where x is the length of // the string. auto Str = PredefinedExpr::ComputeName(IT, currentDecl); unsigned Length = Str.length(); llvm::APInt LengthI(32, Length + 1); if (IT == PredefinedExpr::LFunction) { ResTy = Context.WideCharTy.withConst(); SmallString<32> RawChars; ConvertUTF8ToWideString(Context.getTypeSizeInChars(ResTy).getQuantity(), Str, RawChars); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, RawChars, StringLiteral::Wide, /*Pascal*/ false, ResTy, Loc); } else { ResTy = Context.CharTy.withConst(); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, /*IndexTypeQuals*/ 0); SL = StringLiteral::Create(Context, Str, StringLiteral::Ascii, /*Pascal*/ false, ResTy, Loc); } } return new (Context) PredefinedExpr(Loc, ResTy, IT, SL); } ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { PredefinedExpr::IdentType IT; switch (Kind) { default: llvm_unreachable("Unknown simple primary expr!"); case tok::kw___func__: IT = PredefinedExpr::Func; break; // [C99 6.4.2.2] case tok::kw___FUNCTION__: IT = PredefinedExpr::Function; break; case tok::kw___FUNCDNAME__: IT = PredefinedExpr::FuncDName; break; // [MS] case tok::kw___FUNCSIG__: IT = PredefinedExpr::FuncSig; break; // [MS] case tok::kw_L__FUNCTION__: IT = PredefinedExpr::LFunction; break; case tok::kw___PRETTY_FUNCTION__: IT = PredefinedExpr::PrettyFunction; break; } return BuildPredefinedExpr(Loc, IT); } ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) { SmallString<16> CharBuffer; bool Invalid = false; StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid); if (Invalid) return ExprError(); CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(), PP, Tok.getKind()); if (Literal.hadError()) return ExprError(); QualType Ty; if (Literal.isWide()) Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++. else if (Literal.isUTF16()) Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11. else if (Literal.isUTF32()) Ty = Context.Char32Ty; // U'x' -> char32_t in C11 and C++11. else if (!getLangOpts().CPlusPlus || Literal.isMultiChar()) Ty = Context.IntTy; // 'x' -> int in C, 'wxyz' -> int in C++. else Ty = Context.CharTy; // 'x' -> char in C++ CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii; if (Literal.isWide()) Kind = CharacterLiteral::Wide; else if (Literal.isUTF16()) Kind = CharacterLiteral::UTF16; else if (Literal.isUTF32()) Kind = CharacterLiteral::UTF32; else if (Literal.isUTF8()) Kind = CharacterLiteral::UTF8; Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty, Tok.getLocation()); if (Literal.getUDSuffix().empty()) return Lit; // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_character_udl)); // C++11 [lex.ext]p6: The literal L is treated as a call of the form // operator "" X (ch) return BuildCookedLiteralOperatorCall(*this, UDLScope, UDSuffix, UDSuffixLoc, Lit, Tok.getLocation()); } ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) { unsigned IntSize = Context.getTargetInfo().getIntWidth(); return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val), Context.IntTy, Loc); } static Expr *BuildFloatingLiteral(Sema &S, NumericLiteralParser &Literal, QualType Ty, SourceLocation Loc) { const llvm::fltSemantics &Format = S.Context.getFloatTypeSemantics(Ty); using llvm::APFloat; APFloat Val(Format); APFloat::opStatus result = Literal.GetFloatValue(Val); // Overflow is always an error, but underflow is only an error if // we underflowed to zero (APFloat reports denormals as underflow). if ((result & APFloat::opOverflow) || ((result & APFloat::opUnderflow) && Val.isZero())) { unsigned diagnostic; SmallString<20> buffer; if (result & APFloat::opOverflow) { diagnostic = diag::warn_float_overflow; APFloat::getLargest(Format).toString(buffer); } else { diagnostic = diag::warn_float_underflow; APFloat::getSmallest(Format).toString(buffer); } S.Diag(Loc, diagnostic) << Ty << StringRef(buffer.data(), buffer.size()); } bool isExact = (result == APFloat::opOK); return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc); } bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc) { assert(E && "Invalid expression"); if (E->isValueDependent()) return false; QualType QT = E->getType(); if (!QT->isIntegerType() || QT->isBooleanType() || QT->isCharType()) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_type) << QT; return true; } llvm::APSInt ValueAPS; ExprResult R = VerifyIntegerConstantExpression(E, &ValueAPS); if (R.isInvalid()) return true; bool ValueIsPositive = ValueAPS.isStrictlyPositive(); if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) << ValueAPS.toString(10) << ValueIsPositive; return true; } return false; } ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { // Fast path for a single digit (which is quite common). A single digit // cannot have a trigraph, escaped newline, radix prefix, or suffix. if (Tok.getLength() == 1) { const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok); return ActOnIntegerConstant(Tok.getLocation(), Val-'0'); } SmallString<128> SpellingBuffer; // NumericLiteralParser wants to overread by one character. Add padding to // the buffer in case the token is copied to the buffer. If getSpelling() // returns a StringRef to the memory buffer, it should have a null char at // the EOF, so it is also safe. SpellingBuffer.resize(Tok.getLength() + 1); // Get the spelling of the token, which eliminates trigraphs, etc. bool Invalid = false; StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid); if (Invalid) return ExprError(); NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP); if (Literal.hadError) return ExprError(); if (Literal.hasUDSuffix()) { // We're building a user-defined literal. IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix()); SourceLocation UDSuffixLoc = getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset()); // Make sure we're allowed user-defined literals here. if (!UDLScope) return ExprError(Diag(UDSuffixLoc, diag::err_invalid_numeric_udl)); QualType CookedTy; if (Literal.isFloatingLiteral()) { // C++11 [lex.ext]p4: If S contains a literal operator with parameter type // long double, the literal is treated as a call of the form // operator "" X (f L) CookedTy = Context.LongDoubleTy; } else { // C++11 [lex.ext]p3: If S contains a literal operator with parameter type // unsigned long long, the literal is treated as a call of the form // operator "" X (n ULL) CookedTy = Context.UnsignedLongLongTy; } DeclarationName OpName = Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix); DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc); OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc); SourceLocation TokLoc = Tok.getLocation(); // Perform literal operator lookup to determine if we're building a raw // literal or a cooked one. LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName); switch (LookupLiteralOperator(UDLScope, R, CookedTy, /*AllowRaw*/true, /*AllowTemplate*/true, /*AllowStringTemplate*/false)) { case LOLR_Error: return ExprError(); case LOLR_Cooked: { Expr *Lit; if (Literal.isFloatingLiteral()) { Lit = BuildFloatingLiteral(*this, Literal, CookedTy, Tok.getLocation()); } else { llvm::APInt ResultVal(Context.getTargetInfo().getLongLongWidth(), 0); if (Literal.GetIntegerValue(ResultVal)) Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Lit = IntegerLiteral::Create(Context, ResultVal, CookedTy, Tok.getLocation()); } return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Raw: { // C++11 [lit.ext]p3, p4: If S contains a raw literal operator, the // literal is treated as a call of the form // operator "" X ("n") unsigned Length = Literal.getUDSuffixOffset(); QualType StrTy = Context.getConstantArrayType( Context.CharTy.withConst(), llvm::APInt(32, Length + 1), ArrayType::Normal, 0); Expr *Lit = StringLiteral::Create( Context, StringRef(TokSpelling.data(), Length), StringLiteral::Ascii, /*Pascal*/false, StrTy, &TokLoc, 1); return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc); } case LOLR_Template: { // C++11 [lit.ext]p3, p4: Otherwise (S contains a literal operator // template), L is treated as a call fo the form // operator "" X <'c1', 'c2', ... 'ck'>() // where n is the source character sequence c1 c2 ... ck. TemplateArgumentListInfo ExplicitArgs; unsigned CharBits = Context.getIntWidth(Context.CharTy); bool CharIsUnsigned = Context.CharTy->isUnsignedIntegerType(); llvm::APSInt Value(CharBits, CharIsUnsigned); for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) { Value = TokSpelling[I]; TemplateArgument Arg(Context, Value, Context.CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } return BuildLiteralOperatorCall(R, OpNameInfo, None, TokLoc, &ExplicitArgs); } case LOLR_StringTemplate: llvm_unreachable("unexpected literal operator lookup result"); } } Expr *Res; if (Literal.isFloatingLiteral()) { QualType Ty; if (Literal.isFloat) Ty = Context.FloatTy; else if (!Literal.isLong) Ty = Context.DoubleTy; else Ty = Context.LongDoubleTy; Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation()); if (Ty == Context.DoubleTy) { if (getLangOpts().SinglePrecisionConstants) { Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } else if (getLangOpts().OpenCL && !((getLangOpts().OpenCLVersion >= 120) || getOpenCLOptions().cl_khr_fp64)) { Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64); Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get(); } } } else if (!Literal.isIntegerLiteral()) { return ExprError(); } else { QualType Ty; // 'long long' is a C99 or C++11 feature. if (!getLangOpts().C99 && Literal.isLongLong) { if (getLangOpts().CPlusPlus) Diag(Tok.getLocation(), getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else Diag(Tok.getLocation(), diag::ext_c99_longlong); } // Get the value in the widest-possible width. unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth(); llvm::APInt ResultVal(MaxWidth, 0); if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into uintmax_t, error and force to ull. Diag(Tok.getLocation(), diag::err_integer_literal_too_large) << /* Unsigned */ 1; Ty = Context.UnsignedLongLongTy; assert(Context.getTypeSize(Ty) == ResultVal.getBitWidth() && "long long is not intmax_t?"); } else { // If this value fits into a ULL, try to figure out what else it fits into // according to the rules of C99 6.4.4.1p5. // Octal, Hexadecimal, and integers with a U suffix are allowed to // be an unsigned int. bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; // Check from smallest to largest, picking the smallest type we can. unsigned Width = 0; // Microsoft specific integer suffixes are explicitly sized. if (Literal.MicrosoftInteger) { if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) { Width = 8; Ty = Context.CharTy; } else { Width = Literal.MicrosoftInteger; Ty = Context.getIntTypeForBitwidth(Width, /*Signed=*/!Literal.isUnsigned); } } if (Ty.isNull() && !Literal.isLong && !Literal.isLongLong) { // Are int/unsigned possibilities? unsigned IntSize = Context.getTargetInfo().getIntWidth(); // Does it fit in a unsigned int? if (ResultVal.isIntN(IntSize)) { // Does it fit in a signed int? if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) Ty = Context.IntTy; else if (AllowUnsigned) Ty = Context.UnsignedIntTy; Width = IntSize; } } // Are long/unsigned long possibilities? if (Ty.isNull() && !Literal.isLongLong) { unsigned LongSize = Context.getTargetInfo().getLongWidth(); // Does it fit in a unsigned long? if (ResultVal.isIntN(LongSize)) { // Does it fit in a signed long? if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0) Ty = Context.LongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongTy; // Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2 // is compatible. else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) { const unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); Diag(Tok.getLocation(), getLangOpts().CPlusPlus ? Literal.isLong ? diag::warn_old_implicitly_unsigned_long_cxx : /*C++98 UB*/ diag:: ext_old_implicitly_unsigned_long_cxx : diag::warn_old_implicitly_unsigned_long) << (LongLongSize > LongSize ? /*will have type 'long long'*/ 0 : /*will be ill-formed*/ 1); Ty = Context.UnsignedLongTy; } Width = LongSize; } } // Check long long if needed. if (Ty.isNull()) { unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); // Does it fit in a unsigned long long? if (ResultVal.isIntN(LongLongSize)) { // Does it fit in a signed long long? // To be compatible with MSVC, hex integer literals ending with the // LL or i64 suffix are always signed in Microsoft mode. if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 || (getLangOpts().MicrosoftExt && Literal.isLongLong))) Ty = Context.LongLongTy; else if (AllowUnsigned) Ty = Context.UnsignedLongLongTy; Width = LongLongSize; } } // If we still couldn't decide a type, we probably have something that // does not fit in a signed long long, but has no U suffix. if (Ty.isNull()) { Diag(Tok.getLocation(), diag::ext_integer_literal_too_large_for_signed); Ty = Context.UnsignedLongLongTy; Width = Context.getTargetInfo().getLongLongWidth(); } if (ResultVal.getBitWidth() != Width) ResultVal = ResultVal.trunc(Width); } Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation()); } // If this is an imaginary literal, create the ImaginaryLiteral wrapper. if (Literal.isImaginary) Res = new (Context) ImaginaryLiteral(Res, Context.getComplexType(Res->getType())); return Res; } ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); return new (Context) ParenExpr(L, R, E); } static bool CheckVecStepTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange) { // [OpenCL 1.1 6.11.12] "The vec_step built-in function takes a built-in // scalar or vector data type argument..." // Every built-in scalar type (OpenCL 1.1 6.1.1) is either an arithmetic // type (C99 6.2.5p18) or void. if (!(T->isArithmeticType() || T->isVoidType() || T->isVectorType())) { S.Diag(Loc, diag::err_vecstep_non_scalar_vector_type) << T << ArgRange; return true; } assert((T->isVoidType() || !T->isIncompleteType()) && "Scalar types should always be complete"); return false; } static bool CheckExtensionTraitOperandType(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Invalid types must be hard errors for SFINAE in C++. if (S.LangOpts.CPlusPlus) return true; // C99 6.5.3.4p1: if (T->isFunctionType() && (TraitKind == UETT_SizeOf || TraitKind == UETT_AlignOf)) { // sizeof(function)/alignof(function) is allowed as an extension. S.Diag(Loc, diag::ext_sizeof_alignof_function_type) << TraitKind << ArgRange; return false; } // Allow sizeof(void)/alignof(void) as an extension, unless in OpenCL where // this is an error (OpenCL v1.1 s6.3.k) if (T->isVoidType()) { unsigned DiagID = S.LangOpts.OpenCL ? diag::err_opencl_sizeof_alignof_type : diag::ext_sizeof_alignof_void_type; S.Diag(Loc, DiagID) << TraitKind << ArgRange; return false; } return true; } static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { // Reject sizeof(interface) and sizeof(interface) if the // runtime doesn't allow it. if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) { S.Diag(Loc, diag::err_sizeof_nonfragile_interface) << T << (TraitKind == UETT_SizeOf) << ArgRange; return true; } return false; } /// \brief Check whether E is a pointer from a decayed array type (the decayed /// pointer type is equal to T) and emit a warning if it is. static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T, Expr *E) { // Don't warn if the operation changed the type. if (T != E->getType()) return; // Now look for array decays. ImplicitCastExpr *ICE = dyn_cast(E); if (!ICE || ICE->getCastKind() != CK_ArrayToPointerDecay) return; S.Diag(Loc, diag::warn_sizeof_array_decay) << ICE->getSourceRange() << ICE->getType() << ICE->getSubExpr()->getType(); } /// \brief Check the constraints on expression operands to unary type expression /// and type traits. /// /// Completes any types necessary and validates the constraints on the operand /// expression. The logic mostly mirrors the type-based overload, but may modify /// the expression as it completes the type for that expression through template /// instantiation, etc. bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E, UnaryExprOrTypeTrait ExprKind) { QualType ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange()); // Whitelist some types as extensions if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return false; // 'alignof' applied to an expression only requires the base element type of // the expression to be complete. 'sizeof' requires the expression's type to // be complete (and will attempt to complete it if it's an array of unknown // bound). if (ExprKind == UETT_AlignOf) { if (RequireCompleteType(E->getExprLoc(), Context.getBaseElementType(E->getType()), diag::err_sizeof_alignof_incomplete_type, ExprKind, E->getSourceRange())) return true; } else { if (RequireCompleteExprType(E, diag::err_sizeof_alignof_incomplete_type, ExprKind, E->getSourceRange())) return true; } // Completing the expression's type may have changed it. ExprTy = E->getType(); assert(!ExprTy->isReferenceType()); if (ExprTy->isFunctionType()) { Diag(E->getExprLoc(), diag::err_sizeof_alignof_function_type) << ExprKind << E->getSourceRange(); return true; } // The operand for sizeof and alignof is in an unevaluated expression context, // so side effects could result in unintended consequences. if ((ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf) && ActiveTemplateInstantiations.empty() && E->HasSideEffects(Context, false)) Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context); if (CheckObjCTraitOperandConstraints(*this, ExprTy, E->getExprLoc(), E->getSourceRange(), ExprKind)) return true; if (ExprKind == UETT_SizeOf) { if (DeclRefExpr *DeclRef = dyn_cast(E->IgnoreParens())) { if (ParmVarDecl *PVD = dyn_cast(DeclRef->getFoundDecl())) { QualType OType = PVD->getOriginalType(); QualType Type = PVD->getType(); if (Type->isPointerType() && OType->isArrayType()) { Diag(E->getExprLoc(), diag::warn_sizeof_array_param) << Type << OType; Diag(PVD->getLocation(), diag::note_declared_at); } } } // Warn on "sizeof(array op x)" and "sizeof(x op array)", where the array // decays into a pointer and returns an unintended result. This is most // likely a typo for "sizeof(array) op x". if (BinaryOperator *BO = dyn_cast(E->IgnoreParens())) { warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getLHS()); warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(), BO->getRHS()); } } return false; } /// \brief Check the constraints on operands to unary expression and type /// traits. /// /// This will complete any types necessary, and validate the various constraints /// on those operands. /// /// The UsualUnaryConversions() function is *not* called by this routine. /// C99 6.3.2.1p[2-4] all state: /// Except when it is the operand of the sizeof operator ... /// /// C++ [expr.sizeof]p4 /// The lvalue-to-rvalue, array-to-pointer, and function-to-pointer /// standard conversions are not applied to the operand of sizeof. /// /// This policy is followed for all of the unary trait expressions. bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation OpLoc, SourceRange ExprRange, UnaryExprOrTypeTrait ExprKind) { if (ExprType->isDependentType()) return false; // C++ [expr.sizeof]p2: // When applied to a reference or a reference type, the result // is the size of the referenced type. // C++11 [expr.alignof]p3: // When alignof is applied to a reference type, the result // shall be the alignment of the referenced type. if (const ReferenceType *Ref = ExprType->getAs()) ExprType = Ref->getPointeeType(); // C11 6.5.3.4/3, C++11 [expr.alignof]p3: // When alignof or _Alignof is applied to an array type, the result // is the alignment of the element type. if (ExprKind == UETT_AlignOf || ExprKind == UETT_OpenMPRequiredSimdAlign) ExprType = Context.getBaseElementType(ExprType); if (ExprKind == UETT_VecStep) return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange); // Whitelist some types as extensions if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange, ExprKind)) return false; if (RequireCompleteType(OpLoc, ExprType, diag::err_sizeof_alignof_incomplete_type, ExprKind, ExprRange)) return true; if (ExprType->isFunctionType()) { Diag(OpLoc, diag::err_sizeof_alignof_function_type) << ExprKind << ExprRange; return true; } if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange, ExprKind)) return true; return false; } static bool CheckAlignOfExpr(Sema &S, Expr *E) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; if (E->getObjectKind() == OK_BitField) { S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 1 << E->getSourceRange(); return true; } ValueDecl *D = nullptr; if (DeclRefExpr *DRE = dyn_cast(E)) { D = DRE->getDecl(); } else if (MemberExpr *ME = dyn_cast(E)) { D = ME->getMemberDecl(); } // If it's a field, require the containing struct to have a // complete definition so that we can compute the layout. // // This can happen in C++11 onwards, either by naming the member // in a way that is not transformed into a member access expression // (in an unevaluated operand, for instance), or by naming the member // in a trailing-return-type. // // For the record, since __alignof__ on expressions is a GCC // extension, GCC seems to permit this but always gives the // nonsensical answer 0. // // We don't really need the layout here --- we could instead just // directly check for all the appropriate alignment-lowing // attributes --- but that would require duplicating a lot of // logic that just isn't worth duplicating for such a marginal // use-case. if (FieldDecl *FD = dyn_cast_or_null(D)) { // Fast path this check, since we at least know the record has a // definition if we can find a member of it. if (!FD->getParent()->isCompleteDefinition()) { S.Diag(E->getExprLoc(), diag::err_alignof_member_of_incomplete_type) << E->getSourceRange(); return true; } // Otherwise, if it's a field, and the field doesn't have // reference type, then it must have a complete type (or be a // flexible array member, which we explicitly want to // white-list anyway), which makes the following checks trivial. if (!FD->getType()->isReferenceType()) return false; } return S.CheckUnaryExprOrTypeTraitOperand(E, UETT_AlignOf); } bool Sema::CheckVecStepExpr(Expr *E) { E = E->IgnoreParens(); // Cannot know anything else if the expression is dependent. if (E->isTypeDependent()) return false; return CheckUnaryExprOrTypeTraitOperand(E, UETT_VecStep); } static void captureVariablyModifiedType(ASTContext &Context, QualType T, CapturingScopeInfo *CSI) { assert(T->isVariablyModifiedType()); assert(CSI != nullptr); // We're going to walk down into the type and look for VLA expressions. do { const Type *Ty = T.getTypePtr(); switch (Ty->getTypeClass()) { #define TYPE(Class, Base) #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) #include "clang/AST/TypeNodes.def" T = QualType(); break; // These types are never variably-modified. case Type::Builtin: case Type::Complex: case Type::Vector: case Type::ExtVector: case Type::Record: case Type::Enum: case Type::Elaborated: case Type::TemplateSpecialization: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: case Type::Pipe: llvm_unreachable("type class is never variably-modified!"); case Type::Adjusted: T = cast(Ty)->getOriginalType(); break; case Type::Decayed: T = cast(Ty)->getPointeeType(); break; case Type::Pointer: T = cast(Ty)->getPointeeType(); break; case Type::BlockPointer: T = cast(Ty)->getPointeeType(); break; case Type::LValueReference: case Type::RValueReference: T = cast(Ty)->getPointeeType(); break; case Type::MemberPointer: T = cast(Ty)->getPointeeType(); break; case Type::ConstantArray: case Type::IncompleteArray: // Losing element qualification here is fine. T = cast(Ty)->getElementType(); break; case Type::VariableArray: { // Losing element qualification here is fine. const VariableArrayType *VAT = cast(Ty); // Unknown size indication requires no size computation. // Otherwise, evaluate and record it. if (auto Size = VAT->getSizeExpr()) { if (!CSI->isVLATypeCaptured(VAT)) { RecordDecl *CapRecord = nullptr; if (auto LSI = dyn_cast(CSI)) { CapRecord = LSI->Lambda; } else if (auto CRSI = dyn_cast(CSI)) { CapRecord = CRSI->TheRecordDecl; } if (CapRecord) { auto ExprLoc = Size->getExprLoc(); auto SizeType = Context.getSizeType(); // Build the non-static data member. auto Field = FieldDecl::Create(Context, CapRecord, ExprLoc, ExprLoc, /*Id*/ nullptr, SizeType, /*TInfo*/ nullptr, /*BW*/ nullptr, /*Mutable*/ false, /*InitStyle*/ ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Field->setCapturedVLAType(VAT); CapRecord->addDecl(Field); CSI->addVLATypeCapture(ExprLoc, SizeType); } } } T = VAT->getElementType(); break; } case Type::FunctionProto: case Type::FunctionNoProto: T = cast(Ty)->getReturnType(); break; case Type::Paren: case Type::TypeOf: case Type::UnaryTransform: case Type::Attributed: case Type::SubstTemplateTypeParm: case Type::PackExpansion: // Keep walking after single level desugaring. T = T.getSingleStepDesugaredType(Context); break; case Type::Typedef: T = cast(Ty)->desugar(); break; case Type::Decltype: T = cast(Ty)->desugar(); break; case Type::Auto: T = cast(Ty)->getDeducedType(); break; case Type::TypeOfExpr: T = cast(Ty)->getUnderlyingExpr()->getType(); break; case Type::Atomic: T = cast(Ty)->getValueType(); break; } } while (!T.isNull() && T->isVariablyModifiedType()); } /// \brief Build a sizeof or alignof expression given a type operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, SourceRange R) { if (!TInfo) return ExprError(); QualType T = TInfo->getType(); if (!T->isDependentType() && CheckUnaryExprOrTypeTraitOperand(T, OpLoc, R, ExprKind)) return ExprError(); if (T->isVariablyModifiedType() && FunctionScopes.size() > 1) { if (auto *TT = T->getAs()) { if (auto *CSI = dyn_cast(FunctionScopes.back())) { DeclContext *DC = nullptr; if (auto LSI = dyn_cast(CSI)) DC = LSI->CallOperator; else if (auto CRSI = dyn_cast(CSI)) DC = CRSI->TheCapturedDecl; if (DC && TT->getDecl()->getDeclContext() != DC) captureVariablyModifiedType(Context, T, CSI); } } } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, TInfo, Context.getSizeType(), OpLoc, R.getEnd()); } /// \brief Build a sizeof or alignof expression given an expression /// operand. ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind) { ExprResult PE = CheckPlaceholderExpr(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); // Verify that the operand is valid. bool isInvalid = false; if (E->isTypeDependent()) { // Delay type-checking for type-dependent expressions. } else if (ExprKind == UETT_AlignOf) { isInvalid = CheckAlignOfExpr(*this, E); } else if (ExprKind == UETT_VecStep) { isInvalid = CheckVecStepExpr(E); } else if (ExprKind == UETT_OpenMPRequiredSimdAlign) { Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr); isInvalid = true; } else if (E->refersToBitField()) { // C99 6.5.3.4p1. Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0; isInvalid = true; } else { isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf); } if (isInvalid) return ExprError(); if (ExprKind == UETT_SizeOf && E->getType()->isVariableArrayType()) { PE = TransformToPotentiallyEvaluated(E); if (PE.isInvalid()) return ExprError(); E = PE.get(); } // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. return new (Context) UnaryExprOrTypeTraitExpr( ExprKind, E, Context.getSizeType(), OpLoc, E->getSourceRange().getEnd()); } /// ActOnUnaryExprOrTypeTraitExpr - Handle @c sizeof(type) and @c sizeof @c /// expr and the same for @c alignof and @c __alignof /// Note that the ArgRange is invalid if isType is false. ExprResult Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc, UnaryExprOrTypeTrait ExprKind, bool IsType, void *TyOrEx, SourceRange ArgRange) { // If error parsing type, ignore. if (!TyOrEx) return ExprError(); if (IsType) { TypeSourceInfo *TInfo; (void) GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrEx), &TInfo); return CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, ArgRange); } Expr *ArgEx = (Expr *)TyOrEx; ExprResult Result = CreateUnaryExprOrTypeTraitExpr(ArgEx, OpLoc, ExprKind); return Result; } static QualType CheckRealImagOperand(Sema &S, ExprResult &V, SourceLocation Loc, bool IsReal) { if (V.get()->isTypeDependent()) return S.Context.DependentTy; // _Real and _Imag are only l-values for normal l-values. if (V.get()->getObjectKind() != OK_Ordinary) { V = S.DefaultLvalueConversion(V.get()); if (V.isInvalid()) return QualType(); } // These operators return the element type of a complex type. if (const ComplexType *CT = V.get()->getType()->getAs()) return CT->getElementType(); // Otherwise they pass through real integer and floating point types here. if (V.get()->getType()->isArithmeticType()) return V.get()->getType(); // Test for placeholders. ExprResult PR = S.CheckPlaceholderExpr(V.get()); if (PR.isInvalid()) return QualType(); if (PR.get() != V.get()) { V = PR; return CheckRealImagOperand(S, V, Loc, IsReal); } // Reject anything else. S.Diag(Loc, diag::err_realimag_invalid_type) << V.get()->getType() << (IsReal ? "__real" : "__imag"); return QualType(); } ExprResult Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Kind, Expr *Input) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PostInc; break; case tok::minusminus: Opc = UO_PostDec; break; } // Since this might is a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); return BuildUnaryOp(S, OpLoc, Opc, Input); } /// \brief Diagnose if arithmetic on the given ObjC pointer is illegal. /// /// \return true on error static bool checkArithmeticOnObjCPointer(Sema &S, SourceLocation opLoc, Expr *op) { assert(op->getType()->isObjCObjectPointerType()); if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic() && !S.LangOpts.ObjCSubscriptingLegacyRuntime) return false; S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface) << op->getType()->castAs()->getPointeeType() << op->getSourceRange(); return true; } static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) { auto *BaseNoParens = Base->IgnoreParens(); if (auto *MSProp = dyn_cast(BaseNoParens)) return MSProp->getPropertyDecl()->getType()->isArrayType(); return isa(BaseNoParens); } ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, Expr *idx, SourceLocation rbLoc) { if (base && !base->getType().isNull() && base->getType()->isSpecificPlaceholderType(BuiltinType::OMPArraySection)) return ActOnOMPArraySectionExpr(base, lbLoc, idx, SourceLocation(), /*Length=*/nullptr, rbLoc); // Since this might be a postfix expression, get rid of ParenListExprs. if (isa(base)) { ExprResult result = MaybeConvertParenListExprToParenExpr(S, base); if (result.isInvalid()) return ExprError(); base = result.get(); } // Handle any non-overload placeholder types in the base and index // expressions. We can't handle overloads here because the other // operand might be an overloadable type, in which case the overload // resolution for the operator overload should get the first crack // at the overload. bool IsMSPropertySubscript = false; if (base->getType()->isNonOverloadPlaceholderType()) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { ExprResult result = CheckPlaceholderExpr(base); if (result.isInvalid()) return ExprError(); base = result.get(); } } if (idx->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(idx); if (result.isInvalid()) return ExprError(); idx = result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (getLangOpts().CPlusPlus && (base->isTypeDependent() || idx->isTypeDependent())) { return new (Context) ArraySubscriptExpr(base, idx, Context.DependentTy, VK_LValue, OK_Ordinary, rbLoc); } // MSDN, property (C++) // https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx // This attribute can also be used in the declaration of an empty array in a // class or structure definition. For example: // __declspec(property(get=GetX, put=PutX)) int x[]; // The above statement indicates that x[] can be used with one or more array // indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b), // and p->x[a][b] = i will be turned into p->PutX(a, b, i); if (IsMSPropertySubscript) { // Build MS property subscript expression if base is MS property reference // or MS property subscript. return new (Context) MSPropertySubscriptExpr( base, idx, Context.PseudoObjectTy, VK_LValue, OK_Ordinary, rbLoc); } // Use C++ overloaded-operator rules if either operand has record // type. The spec says to do this if either type is *overloadable*, // but enum types can't declare subscript operators or conversion // operators, so there's nothing interesting for overload resolution // to do if there aren't any record types involved. // // ObjC pointers have their own subscripting logic that is not tied // to overload resolution and so should not take this path. if (getLangOpts().CPlusPlus && (base->getType()->isRecordType() || (!base->getType()->isObjCObjectPointerType() && idx->getType()->isRecordType()))) { return CreateOverloadedArraySubscriptExpr(lbLoc, rbLoc, base, idx); } return CreateBuiltinArraySubscriptExpr(base, lbLoc, idx, rbLoc); } ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc, Expr *LowerBound, SourceLocation ColonLoc, Expr *Length, SourceLocation RBLoc) { if (Base->getType()->isPlaceholderType() && !Base->getType()->isSpecificPlaceholderType( BuiltinType::OMPArraySection)) { ExprResult Result = CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(LowerBound); if (Result.isInvalid()) return ExprError(); LowerBound = Result.get(); } if (Length && Length->getType()->isNonOverloadPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(Length); if (Result.isInvalid()) return ExprError(); Length = Result.get(); } // Build an unanalyzed expression if either operand is type-dependent. if (Base->isTypeDependent() || (LowerBound && (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || (Length && (Length->isTypeDependent() || Length->isValueDependent()))) { return new (Context) OMPArraySectionExpr(Base, LowerBound, Length, Context.DependentTy, VK_LValue, OK_Ordinary, ColonLoc, RBLoc); } // Perform default conversions. QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); QualType ResultTy; if (OriginalTy->isAnyPointerType()) { ResultTy = OriginalTy->getPointeeType(); } else if (OriginalTy->isArrayType()) { ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType(); } else { return ExprError( Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value) << Base->getSourceRange()); } // C99 6.5.2.1p1 if (LowerBound) { auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(), LowerBound); if (Res.isInvalid()) return ExprError(Diag(LowerBound->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 0 << LowerBound->getSourceRange()); LowerBound = Res.get(); if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char) << 0 << LowerBound->getSourceRange(); } if (Length) { auto Res = PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length); if (Res.isInvalid()) return ExprError(Diag(Length->getExprLoc(), diag::err_omp_typecheck_section_not_integer) << 1 << Length->getSourceRange()); Length = Res.get(); if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) Diag(Length->getExprLoc(), diag::warn_omp_section_is_char) << 1 << Length->getSourceRange(); } // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultTy->isFunctionType()) { Diag(Base->getExprLoc(), diag::err_omp_section_function_type) << ResultTy << Base->getSourceRange(); return ExprError(); } if (RequireCompleteType(Base->getExprLoc(), ResultTy, diag::err_omp_section_incomplete_type, Base)) return ExprError(); if (LowerBound) { llvm::APSInt LowerBoundValue; if (LowerBound->EvaluateAsInt(LowerBoundValue, Context)) { // OpenMP 4.0, [2.4 Array Sections] // The lower-bound and length must evaluate to non-negative integers. if (LowerBoundValue.isNegative()) { Diag(LowerBound->getExprLoc(), diag::err_omp_section_negative) << 0 << LowerBoundValue.toString(/*Radix=*/10, /*Signed=*/true) << LowerBound->getSourceRange(); return ExprError(); } } } if (Length) { llvm::APSInt LengthValue; if (Length->EvaluateAsInt(LengthValue, Context)) { // OpenMP 4.0, [2.4 Array Sections] // The lower-bound and length must evaluate to non-negative integers. if (LengthValue.isNegative()) { Diag(Length->getExprLoc(), diag::err_omp_section_negative) << 1 << LengthValue.toString(/*Radix=*/10, /*Signed=*/true) << Length->getSourceRange(); return ExprError(); } } } else if (ColonLoc.isValid() && (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() && !OriginalTy->isVariableArrayType()))) { // OpenMP 4.0, [2.4 Array Sections] // When the size of the array dimension is not known, the length must be // specified explicitly. Diag(ColonLoc, diag::err_omp_section_length_undefined) << (!OriginalTy.isNull() && OriginalTy->isArrayType()); return ExprError(); } return new (Context) OMPArraySectionExpr(Base, LowerBound, Length, Context.OMPArraySectionTy, VK_LValue, OK_Ordinary, ColonLoc, RBLoc); } ExprResult Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { Expr *LHSExp = Base; Expr *RHSExp = Idx; // Perform default conversions. if (!LHSExp->getType()->getAs()) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); LHSExp = Result.get(); } ExprResult Result = DefaultFunctionArrayLvalueConversion(RHSExp); if (Result.isInvalid()) return ExprError(); RHSExp = Result.get(); QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType(); ExprValueKind VK = VK_LValue; ExprObjectKind OK = OK_Ordinary; // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent // to the expression *((e1)+(e2)). This means the array "Base" may actually be // in the subscript position. As a result, we need to derive the array base // and index from the expression types. Expr *BaseExpr, *IndexExpr; QualType ResultType; if (LHSTy->isDependentType() || RHSTy->isDependentType()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = Context.DependentTy; } else if (const PointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; // Use custom logic if this should be the pseudo-object subscript // expression. if (!LangOpts.isSubscriptPointerArithmetic()) return BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, nullptr, nullptr); ResultType = PTy->getPointeeType(); } else if (const PointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); if (!LangOpts.isSubscriptPointerArithmetic()) { Diag(LLoc, diag::err_subscript_nonfragile_interface) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } } else if (const VectorType *VTy = LHSTy->getAs()) { BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; VK = LHSExp->getValueKind(); if (VK != VK_RValue) OK = OK_VectorComponent; // FIXME: need to deal with const... ResultType = VTy->getElementType(); } else if (LHSTy->isArrayType()) { // If we see an array that wasn't promoted by // DefaultFunctionArrayLvalueConversion, it must be an array that // wasn't promoted because of the C90 rule that doesn't // allow promoting non-lvalue arrays. Warn, then // force the promotion here. Diag(LHSExp->getLocStart(), diag::ext_subscript_non_lvalue) << LHSExp->getSourceRange(); LHSExp = ImpCastExprToType(LHSExp, Context.getArrayDecayedType(LHSTy), CK_ArrayToPointerDecay).get(); LHSTy = LHSExp->getType(); BaseExpr = LHSExp; IndexExpr = RHSExp; ResultType = LHSTy->getAs()->getPointeeType(); } else if (RHSTy->isArrayType()) { // Same as previous, except for 123[f().a] case Diag(RHSExp->getLocStart(), diag::ext_subscript_non_lvalue) << RHSExp->getSourceRange(); RHSExp = ImpCastExprToType(RHSExp, Context.getArrayDecayedType(RHSTy), CK_ArrayToPointerDecay).get(); RHSTy = RHSExp->getType(); BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = RHSTy->getAs()->getPointeeType(); } else { return ExprError(Diag(LLoc, diag::err_typecheck_subscript_value) << LHSExp->getSourceRange() << RHSExp->getSourceRange()); } // C99 6.5.2.1p1 if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent()) return ExprError(Diag(LLoc, diag::err_typecheck_subscript_not_integer) << IndexExpr->getSourceRange()); if ((IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_S) || IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) && !IndexExpr->isTypeDependent()) Diag(LLoc, diag::warn_subscript_is_char) << IndexExpr->getSourceRange(); // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly, // C++ [expr.sub]p1: The type "T" shall be a completely-defined object // type. Note that Functions are not objects, and that (in C99 parlance) // incomplete types are not object types. if (ResultType->isFunctionType()) { Diag(BaseExpr->getLocStart(), diag::err_subscript_function_type) << ResultType << BaseExpr->getSourceRange(); return ExprError(); } if (ResultType->isVoidType() && !getLangOpts().CPlusPlus) { // GNU extension: subscripting on pointer to void Diag(LLoc, diag::ext_gnu_subscript_void_type) << BaseExpr->getSourceRange(); // C forbids expressions of unqualified void type from being l-values. // See IsCForbiddenLValueType. if (!ResultType.hasQualifiers()) VK = VK_RValue; } else if (!ResultType->isDependentType() && RequireCompleteType(LLoc, ResultType, diag::err_subscript_incomplete_type, BaseExpr)) return ExprError(); assert(VK == VK_RValue || LangOpts.CPlusPlus || !ResultType.isCForbiddenLValueType()); return new (Context) ArraySubscriptExpr(LHSExp, RHSExp, ResultType, VK, OK, RLoc); } ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { if (Param->hasUnparsedDefaultArg()) { Diag(CallLoc, diag::err_use_of_default_argument_to_function_declared_later) << FD << cast(FD->getDeclContext())->getDeclName(); Diag(UnparsedDefaultArgLocs[Param], diag::note_default_argument_declared_here); return ExprError(); } if (Param->hasUninstantiatedDefaultArg()) { Expr *UninstExpr = Param->getUninstantiatedDefaultArg(); EnterExpressionEvaluationContext EvalContext(*this, PotentiallyEvaluated, Param); // Instantiate the expression. MultiLevelTemplateArgumentList MutiLevelArgList = getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true); InstantiatingTemplate Inst(*this, CallLoc, Param, MutiLevelArgList.getInnermost()); if (Inst.isInvalid()) return ExprError(); ExprResult Result; { // C++ [dcl.fct.default]p5: // The names in the [default argument] expression are bound, and // the semantic constraints are checked, at the point where the // default argument expression appears. ContextRAII SavedContext(*this, FD); LocalInstantiationScope Local(*this); Result = SubstExpr(UninstExpr, MutiLevelArgList); } if (Result.isInvalid()) return ExprError(); // Check the expression as an initializer for the parameter. InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, Param); InitializationKind Kind = InitializationKind::CreateCopy(Param->getLocation(), /*FIXME:EqualLoc*/UninstExpr->getLocStart()); Expr *ResultE = Result.getAs(); InitializationSequence InitSeq(*this, Entity, Kind, ResultE); Result = InitSeq.Perform(*this, Entity, Kind, ResultE); if (Result.isInvalid()) return ExprError(); Result = ActOnFinishFullExpr(Result.getAs(), Param->getOuterLocStart()); if (Result.isInvalid()) return ExprError(); // Remember the instantiated default argument. Param->setDefaultArg(Result.getAs()); if (ASTMutationListener *L = getASTMutationListener()) { L->DefaultArgumentInstantiated(Param); } } // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll // be properly destroyed. // FIXME: We should really be rebuilding the default argument with new // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. if (isa(Param->getInit())) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. ExprNeedsCleanups = true; // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. assert(!cast(Param->getInit())->getNumObjects() && "default argument expression has capturing blocks?"); } // We already type-checked the argument, so we know it works. // Just mark all of the declarations in this potentially-evaluated expression // as being "referenced". MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), /*SkipLocalVariables=*/true); return CXXDefaultArgExpr::Create(Context, CallLoc, Param); } Sema::VariadicCallType Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto, Expr *Fn) { if (Proto && Proto->isVariadic()) { if (dyn_cast_or_null(FDecl)) return VariadicConstructor; else if (Fn && Fn->getType()->isBlockPointerType()) return VariadicBlock; else if (FDecl) { if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (Method->isInstance()) return VariadicMethod; } else if (Fn && Fn->getType() == Context.BoundMemberTy) return VariadicMethod; return VariadicFunction; } return VariadicDoesNotApply; } namespace { class FunctionCallCCC : public FunctionCallFilterCCC { public: FunctionCallCCC(Sema &SemaRef, const IdentifierInfo *FuncName, unsigned NumArgs, MemberExpr *ME) : FunctionCallFilterCCC(SemaRef, NumArgs, false, ME), FunctionName(FuncName) {} bool ValidateCandidate(const TypoCorrection &candidate) override { if (!candidate.getCorrectionSpecifier() || candidate.getCorrectionAsIdentifierInfo() != FunctionName) { return false; } return FunctionCallFilterCCC::ValidateCandidate(candidate); } private: const IdentifierInfo *const FunctionName; }; } static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn, FunctionDecl *FDecl, ArrayRef Args) { MemberExpr *ME = dyn_cast(Fn); DeclarationName FuncName = FDecl->getDeclName(); SourceLocation NameLoc = ME ? ME->getMemberLoc() : Fn->getLocStart(); if (TypoCorrection Corrected = S.CorrectTypo( DeclarationNameInfo(FuncName, NameLoc), Sema::LookupOrdinaryName, S.getScopeForContext(S.CurContext), nullptr, llvm::make_unique(S, FuncName.getAsIdentifierInfo(), Args.size(), ME), Sema::CTK_ErrorRecovery)) { if (NamedDecl *ND = Corrected.getFoundDecl()) { if (Corrected.isOverloaded()) { OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal); OverloadCandidateSet::iterator Best; for (NamedDecl *CD : Corrected) { if (FunctionDecl *FD = dyn_cast(CD)) S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args, OCS); } switch (OCS.BestViableFunction(S, NameLoc, Best)) { case OR_Success: ND = Best->FoundDecl; Corrected.setCorrectionDecl(ND); break; default: break; } } ND = ND->getUnderlyingDecl(); if (isa(ND) || isa(ND)) return Corrected; } } return TypoCorrection(); } /// ConvertArgumentsForCall - Converts the arguments specified in /// Args/NumArgs to the parameter types of the function FDecl with /// function prototype Proto. Call is the call expression itself, and /// Fn is the function expression. For a C++ member function, this /// routine does not attempt to convert the object argument. Returns /// true if the call is ill-formed. bool Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, FunctionDecl *FDecl, const FunctionProtoType *Proto, ArrayRef Args, SourceLocation RParenLoc, bool IsExecConfig) { // Bail out early if calling a builtin with custom typechecking. if (FDecl) if (unsigned ID = FDecl->getBuiltinID()) if (Context.BuiltinInfo.hasCustomTypechecking(ID)) return false; // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by // assignment, to the types of the corresponding parameter, ... unsigned NumParams = Proto->getNumParams(); bool Invalid = false; unsigned MinArgs = FDecl ? FDecl->getMinRequiredArguments() : NumParams; unsigned FnKind = Fn->getType()->isBlockPointerType() ? 1 /* block */ : (IsExecConfig ? 3 /* kernel function (exec config) */ : 0 /* function */); // If too few arguments are available (and we don't have default // arguments for the remaining parameters), don't make the call. if (Args.size() < NumParams) { if (Args.size() < MinArgs) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_suggest : diag::err_typecheck_call_too_few_args_at_least_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << MinArgs << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (MinArgs == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args_one : diag::err_typecheck_call_too_few_args_at_least_one) << FnKind << FDecl->getParamDecl(0) << Fn->getSourceRange(); else Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_few_args : diag::err_typecheck_call_too_few_args_at_least) << FnKind << MinArgs << static_cast(Args.size()) << Fn->getSourceRange(); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getLocStart(), diag::note_callee_decl) << FDecl; return true; } Call->setNumArgs(Context, NumParams); } // If too many are passed and not variadic, error on the extras and drop // them. if (Args.size() > NumParams) { if (!Proto->isVariadic()) { TypoCorrection TC; if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) { unsigned diag_id = MinArgs == NumParams && !Proto->isVariadic() ? diag::err_typecheck_call_too_many_args_suggest : diag::err_typecheck_call_too_many_args_at_most_suggest; diagnoseTypo(TC, PDiag(diag_id) << FnKind << NumParams << static_cast(Args.size()) << TC.getCorrectionRange()); } else if (NumParams == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) Diag(Args[NumParams]->getLocStart(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args_one : diag::err_typecheck_call_too_many_args_at_most_one) << FnKind << FDecl->getParamDecl(0) << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getLocStart(), Args.back()->getLocEnd()); else Diag(Args[NumParams]->getLocStart(), MinArgs == NumParams ? diag::err_typecheck_call_too_many_args : diag::err_typecheck_call_too_many_args_at_most) << FnKind << NumParams << static_cast(Args.size()) << Fn->getSourceRange() << SourceRange(Args[NumParams]->getLocStart(), Args.back()->getLocEnd()); // Emit the location of the prototype. if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig) Diag(FDecl->getLocStart(), diag::note_callee_decl) << FDecl; // This deletes the extra arguments. Call->setNumArgs(Context, NumParams); return true; } } SmallVector AllArgs; VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn); Invalid = GatherArgumentsForCall(Call->getLocStart(), FDecl, Proto, 0, Args, AllArgs, CallType); if (Invalid) return true; unsigned TotalNumArgs = AllArgs.size(); for (unsigned i = 0; i < TotalNumArgs; ++i) Call->setArg(i, AllArgs[i]); return false; } bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl, const FunctionProtoType *Proto, unsigned FirstParam, ArrayRef Args, SmallVectorImpl &AllArgs, VariadicCallType CallType, bool AllowExplicit, bool IsListInitialization) { unsigned NumParams = Proto->getNumParams(); bool Invalid = false; size_t ArgIx = 0; // Continue to check argument types (even if we have too few/many args). for (unsigned i = FirstParam; i < NumParams; i++) { QualType ProtoArgType = Proto->getParamType(i); Expr *Arg; ParmVarDecl *Param = FDecl ? FDecl->getParamDecl(i) : nullptr; if (ArgIx < Args.size()) { Arg = Args[ArgIx++]; if (RequireCompleteType(Arg->getLocStart(), ProtoArgType, diag::err_call_incomplete_argument, Arg)) return true; // Strip the unbridged-cast placeholder expression off, if applicable. bool CFAudited = false; if (Arg->getType() == Context.ARCUnbridgedCastTy && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) Arg = stripARCUnbridgedCast(Arg); else if (getLangOpts().ObjCAutoRefCount && FDecl && FDecl->hasAttr() && (!Param || !Param->hasAttr())) CFAudited = true; InitializedEntity Entity = Param ? InitializedEntity::InitializeParameter(Context, Param, ProtoArgType) : InitializedEntity::InitializeParameter( Context, ProtoArgType, Proto->isParamConsumed(i)); // Remember that parameter belongs to a CF audited API. if (CFAudited) Entity.setParameterCFAudited(); ExprResult ArgE = PerformCopyInitialization( Entity, SourceLocation(), Arg, IsListInitialization, AllowExplicit); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { assert(Param && "can't use default arguments without a known callee"); ExprResult ArgExpr = BuildCXXDefaultArgExpr(CallLoc, FDecl, Param); if (ArgExpr.isInvalid()) return true; Arg = ArgExpr.getAs(); } // Check for array bounds violations for each argument to the call. This // check only triggers warnings when the argument isn't a more complex Expr // with its own checking, such as a BinaryOperator. CheckArrayAccess(Arg); // Check for violations of C99 static array rules (C99 6.7.5.3p7). CheckStaticArrayArgument(CallLoc, Param, Arg); AllArgs.push_back(Arg); } // If this is a variadic call, handle args passed through "...". if (CallType != VariadicDoesNotApply) { // Assume that extern "C" functions with variadic arguments that // return __unknown_anytype aren't *really* variadic. if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl && FDecl->isExternC()) { for (Expr *A : Args.slice(ArgIx)) { QualType paramType; // ignored ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType); Invalid |= arg.isInvalid(); AllArgs.push_back(arg.get()); } // Otherwise do argument promotion, (C99 6.5.2.2p7). } else { for (Expr *A : Args.slice(ArgIx)) { ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl); Invalid |= Arg.isInvalid(); AllArgs.push_back(Arg.get()); } } // Check for array bounds violations. for (Expr *A : Args.slice(ArgIx)) CheckArrayAccess(A); } return Invalid; } static void DiagnoseCalleeStaticArrayParam(Sema &S, ParmVarDecl *PVD) { TypeLoc TL = PVD->getTypeSourceInfo()->getTypeLoc(); if (DecayedTypeLoc DTL = TL.getAs()) TL = DTL.getOriginalLoc(); if (ArrayTypeLoc ATL = TL.getAs()) S.Diag(PVD->getLocation(), diag::note_callee_static_array) << ATL.getLocalSourceRange(); } /// CheckStaticArrayArgument - If the given argument corresponds to a static /// array parameter, check that it is non-null, and that if it is formed by /// array-to-pointer decay, the underlying array is sufficiently large. /// /// C99 6.7.5.3p7: If the keyword static also appears within the [ and ] of the /// array type derivation, then for each call to the function, the value of the /// corresponding actual argument shall provide access to the first element of /// an array with at least as many elements as specified by the size expression. void Sema::CheckStaticArrayArgument(SourceLocation CallLoc, ParmVarDecl *Param, const Expr *ArgExpr) { // Static array parameters are not supported in C++. if (!Param || getLangOpts().CPlusPlus) return; QualType OrigTy = Param->getOriginalType(); const ArrayType *AT = Context.getAsArrayType(OrigTy); if (!AT || AT->getSizeModifier() != ArrayType::Static) return; if (ArgExpr->isNullPointerConstant(Context, Expr::NPC_NeverValueDependent)) { Diag(CallLoc, diag::warn_null_arg) << ArgExpr->getSourceRange(); DiagnoseCalleeStaticArrayParam(*this, Param); return; } const ConstantArrayType *CAT = dyn_cast(AT); if (!CAT) return; const ConstantArrayType *ArgCAT = Context.getAsConstantArrayType(ArgExpr->IgnoreParenImpCasts()->getType()); if (!ArgCAT) return; if (ArgCAT->getSize().ult(CAT->getSize())) { Diag(CallLoc, diag::warn_static_array_too_small) << ArgExpr->getSourceRange() << (unsigned) ArgCAT->getSize().getZExtValue() << (unsigned) CAT->getSize().getZExtValue(); DiagnoseCalleeStaticArrayParam(*this, Param); } } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *fn); /// Is the given type a placeholder that we need to lower out /// immediately during argument processing? static bool isPlaceholderToRemoveAsArg(QualType type) { // Placeholders are never sugared. const BuiltinType *placeholder = dyn_cast(type); if (!placeholder) return false; switch (placeholder->getKind()) { // Ignore all the non-placeholder types. #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: #include "clang/AST/BuiltinTypes.def" return false; // We cannot lower out overload sets; they might validly be resolved // by the call machinery. case BuiltinType::Overload: return false; // Unbridged casts in ARC can be handled in some call positions and // should be left in place. case BuiltinType::ARCUnbridgedCast: return false; // Pseudo-objects should be converted as soon as possible. case BuiltinType::PseudoObject: return true; // The debugger mode could theoretically but currently does not try // to resolve unknown-typed arguments based on known parameter types. case BuiltinType::UnknownAny: return true; // These are always invalid as call arguments and should be reported. case BuiltinType::BoundMember: case BuiltinType::BuiltinFn: case BuiltinType::OMPArraySection: return true; } llvm_unreachable("bad builtin type kind"); } /// Check an argument list for placeholders that we won't try to /// handle later. static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) { // Apply this processing to all the arguments at once instead of // dying at the first failure. bool hasInvalid = false; for (size_t i = 0, e = args.size(); i != e; i++) { if (isPlaceholderToRemoveAsArg(args[i]->getType())) { ExprResult result = S.CheckPlaceholderExpr(args[i]); if (result.isInvalid()) hasInvalid = true; else args[i] = result.get(); } else if (hasInvalid) { (void)S.CorrectDelayedTyposInExpr(args[i]); } } return hasInvalid; } /// If a builtin function has a pointer argument with no explicit address /// space, then it should be able to accept a pointer to any address /// space as input. In order to do this, we need to replace the /// standard builtin declaration with one that uses the same address space /// as the call. /// /// \returns nullptr If this builtin is not a candidate for a rewrite i.e. /// it does not contain any pointer arguments without /// an address space qualifer. Otherwise the rewritten /// FunctionDecl is returned. /// TODO: Handle pointer return types. static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, const FunctionDecl *FDecl, MultiExprArg ArgExprs) { QualType DeclType = FDecl->getType(); const FunctionProtoType *FT = dyn_cast(DeclType); if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) || !FT || FT->isVariadic() || ArgExprs.size() != FT->getNumParams()) return nullptr; bool NeedsNewDecl = false; unsigned i = 0; SmallVector OverloadParams; for (QualType ParamType : FT->param_types()) { // Convert array arguments to pointer to simplify type lookup. Expr *Arg = Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]).get(); QualType ArgType = Arg->getType(); if (!ParamType->isPointerType() || ParamType.getQualifiers().hasAddressSpace() || !ArgType->isPointerType() || !ArgType->getPointeeType().getQualifiers().hasAddressSpace()) { OverloadParams.push_back(ParamType); continue; } NeedsNewDecl = true; unsigned AS = ArgType->getPointeeType().getQualifiers().getAddressSpace(); QualType PointeeType = ParamType->getPointeeType(); PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); OverloadParams.push_back(Context.getPointerType(PointeeType)); } if (!NeedsNewDecl) return nullptr; FunctionProtoType::ExtProtoInfo EPI; QualType OverloadTy = Context.getFunctionType(FT->getReturnType(), OverloadParams, EPI); DeclContext *Parent = Context.getTranslationUnitDecl(); FunctionDecl *OverloadDecl = FunctionDecl::Create(Context, Parent, FDecl->getLocation(), FDecl->getLocation(), FDecl->getIdentifier(), OverloadTy, /*TInfo=*/nullptr, SC_Extern, false, /*hasPrototype=*/true); SmallVector Params; FT = cast(OverloadTy); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { QualType ParamType = FT->getParamType(i); ParmVarDecl *Parm = ParmVarDecl::Create(Context, OverloadDecl, SourceLocation(), SourceLocation(), nullptr, ParamType, /*TInfo=*/nullptr, SC_None, nullptr); Parm->setScopeInfo(0, i); Params.push_back(Parm); } OverloadDecl->setParams(Params); return OverloadDecl; } /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments. /// This provides the location of the left/right parens and a list of comma /// locations. ExprResult Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig, bool IsExecConfig) { // Since this might be a postfix expression, get rid of ParenListExprs. ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Fn); if (Result.isInvalid()) return ExprError(); Fn = Result.get(); if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); if (getLangOpts().CPlusPlus) { // If this is a pseudo-destructor expression, build the call immediately. if (isa(Fn)) { if (!ArgExprs.empty()) { // Pseudo-destructor calls should not have any arguments. Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args) << FixItHint::CreateRemoval( SourceRange(ArgExprs.front()->getLocStart(), ArgExprs.back()->getLocEnd())); } return new (Context) CallExpr(Context, Fn, None, Context.VoidTy, VK_RValue, RParenLoc); } if (Fn->getType() == Context.PseudoObjectTy) { ExprResult result = CheckPlaceholderExpr(Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } // Determine whether this is a dependent call inside a C++ template, // in which case we won't do any semantic analysis now. // FIXME: Will need to cache the results of name lookup (including ADL) in // Fn. bool Dependent = false; if (Fn->isTypeDependent()) Dependent = true; else if (Expr::hasAnyTypeDependentArguments(ArgExprs)) Dependent = true; if (Dependent) { if (ExecConfig) { return new (Context) CUDAKernelCallExpr( Context, Fn, cast(ExecConfig), ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } else { return new (Context) CallExpr( Context, Fn, ArgExprs, Context.DependentTy, VK_RValue, RParenLoc); } } // Determine whether this is a call to an object (C++ [over.call.object]). if (Fn->getType()->isRecordType()) return BuildCallToObjectOfClassType(S, Fn, LParenLoc, ArgExprs, RParenLoc); if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } if (Fn->getType() == Context.BoundMemberTy) { return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc); } } // Check for overloaded calls. This can happen even in C due to extensions. if (Fn->getType() == Context.OverloadTy) { OverloadExpr::FindResult find = OverloadExpr::find(Fn); // We aren't supposed to apply this logic for if there's an '&' involved. if (!find.HasFormOfMemberPointer) { OverloadExpr *ovl = find.Expression; if (UnresolvedLookupExpr *ULE = dyn_cast(ovl)) return BuildOverloadedCallExpr(S, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig, /*AllowTypoCorrection=*/true, find.IsAddressOfOperand); return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc); } } // If we're directly calling a function, get the appropriate declaration. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult result = rebuildUnknownAnyFunction(*this, Fn); if (result.isInvalid()) return ExprError(); Fn = result.get(); } Expr *NakedFn = Fn->IgnoreParens(); bool CallingNDeclIndirectly = false; NamedDecl *NDecl = nullptr; if (UnaryOperator *UnOp = dyn_cast(NakedFn)) { if (UnOp->getOpcode() == UO_AddrOf) { CallingNDeclIndirectly = true; NakedFn = UnOp->getSubExpr()->IgnoreParens(); } } if (isa(NakedFn)) { NDecl = cast(NakedFn)->getDecl(); FunctionDecl *FDecl = dyn_cast(NDecl); if (FDecl && FDecl->getBuiltinID()) { // Rewrite the function decl for this builtin by replacing parameters // with no explicit address space with the address space of the arguments // in ArgExprs. if ((FDecl = rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) { NDecl = FDecl; Fn = DeclRefExpr::Create(Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false, SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl); } } } else if (isa(NakedFn)) NDecl = cast(NakedFn)->getMemberDecl(); if (FunctionDecl *FD = dyn_cast_or_null(NDecl)) { if (CallingNDeclIndirectly && !checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, Fn->getLocStart())) return ExprError(); if (FD->hasAttr()) { if (const EnableIfAttr *Attr = CheckEnableIf(FD, ArgExprs, true)) { Diag(Fn->getLocStart(), isa(FD) ? diag::err_ovl_no_viable_member_function_in_call : diag::err_ovl_no_viable_function_in_call) << FD << FD->getSourceRange(); Diag(FD->getLocation(), diag::note_ovl_candidate_disabled_by_enable_if_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); } } } return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc, ExecConfig, IsExecConfig); } /// ActOnAsTypeExpr - create a new asType (bitcast) from the arguments. /// /// __builtin_astype( value, dst type ) /// ExprResult Sema::ActOnAsTypeExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType DstTy = GetTypeFromParser(ParsedDestTy); QualType SrcTy = E->getType(); if (Context.getTypeSize(DstTy) != Context.getTypeSize(SrcTy)) return ExprError(Diag(BuiltinLoc, diag::err_invalid_astype_of_different_size) << DstTy << SrcTy << E->getSourceRange()); return new (Context) AsTypeExpr(E, DstTy, VK, OK, BuiltinLoc, RParenLoc); } /// ActOnConvertVectorExpr - create a new convert-vector expression from the /// provided arguments. /// /// __builtin_convertvector( value, dst type ) /// ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation BuiltinLoc, SourceLocation RParenLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(ParsedDestTy, &TInfo); return SemaConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc); } /// BuildResolvedCallExpr - Build a call to a resolved expression, /// i.e. an expression not of \p OverloadTy. The expression should /// unary-convert to an expression of function-pointer or /// block-pointer type. /// /// \param NDecl the declaration being called, if available ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, SourceLocation LParenLoc, ArrayRef Args, SourceLocation RParenLoc, Expr *Config, bool IsExecConfig) { FunctionDecl *FDecl = dyn_cast_or_null(NDecl); unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0); // Promote the function operand. // We special-case function promotion here because we only allow promoting // builtin functions to function pointers in the callee of a call. ExprResult Result; if (BuiltinID && Fn->getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)) { Result = ImpCastExprToType(Fn, Context.getPointerType(FDecl->getType()), CK_BuiltinFnToFnPtr).get(); } else { Result = CallExprUnaryConversions(Fn); } if (Result.isInvalid()) return ExprError(); Fn = Result.get(); // Make the call expr early, before semantic checks. This guarantees cleanup // of arguments and function on error. CallExpr *TheCall; if (Config) TheCall = new (Context) CUDAKernelCallExpr(Context, Fn, cast(Config), Args, Context.BoolTy, VK_RValue, RParenLoc); else TheCall = new (Context) CallExpr(Context, Fn, Args, Context.BoolTy, VK_RValue, RParenLoc); if (!getLangOpts().CPlusPlus) { // C cannot always handle TypoExpr nodes in builtin calls and direct // function calls as their argument checking don't necessarily handle // dependent types properly, so make sure any TypoExprs have been // dealt with. ExprResult Result = CorrectDelayedTyposInExpr(TheCall); if (!Result.isUsable()) return ExprError(); TheCall = dyn_cast(Result.get()); if (!TheCall) return Result; Args = llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs()); } // Bail out early if calling a builtin with custom typechecking. if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); retry: const FunctionType *FuncT; if (const PointerType *PT = Fn->getType()->getAs()) { // C99 6.5.2.2p1 - "The expression that denotes the called function shall // have type pointer to function". FuncT = PT->getPointeeType()->getAs(); if (!FuncT) return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } else if (const BlockPointerType *BPT = Fn->getType()->getAs()) { FuncT = BPT->getPointeeType()->castAs(); } else { // Handle calls to expressions of unknown-any type. if (Fn->getType() == Context.UnknownAnyTy) { ExprResult rewrite = rebuildUnknownAnyFunction(*this, Fn); if (rewrite.isInvalid()) return ExprError(); Fn = rewrite.get(); TheCall->setCallee(Fn); goto retry; } return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function) << Fn->getType() << Fn->getSourceRange()); } if (getLangOpts().CUDA) { if (Config) { // CUDA: Kernel calls must be to global functions if (FDecl && !FDecl->hasAttr()) return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function) << FDecl->getName() << Fn->getSourceRange()); // CUDA: Kernel function must have 'void' return type if (!FuncT->getReturnType()->isVoidType()) return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return) << Fn->getType() << Fn->getSourceRange()); } else { // CUDA: Calls to global functions must be configured if (FDecl && FDecl->hasAttr()) return ExprError(Diag(LParenLoc, diag::err_global_call_not_config) << FDecl->getName() << Fn->getSourceRange()); } } // Check for a valid return type if (CheckCallReturnType(FuncT->getReturnType(), Fn->getLocStart(), TheCall, FDecl)) return ExprError(); // We know the result type of the call, set it. TheCall->setType(FuncT->getCallResultType(Context)); TheCall->setValueKind(Expr::getValueKindForType(FuncT->getReturnType())); const FunctionProtoType *Proto = dyn_cast(FuncT); if (Proto) { if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc, IsExecConfig)) return ExprError(); } else { assert(isa(FuncT) && "Unknown FunctionType!"); if (FDecl) { // Check if we have too few/too many template arguments, based // on our knowledge of the function definition. const FunctionDecl *Def = nullptr; if (FDecl->hasBody(Def) && Args.size() != Def->param_size()) { Proto = Def->getType()->getAs(); if (!Proto || !(Proto->isVariadic() && Args.size() >= Def->param_size())) Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments) << (Args.size() > Def->param_size()) << FDecl << Fn->getSourceRange(); } // If the function we're calling isn't a function prototype, but we have // a function prototype from a prior declaratiom, use that prototype. if (!FDecl->hasPrototype()) Proto = FDecl->getType()->getAs(); } // Promote the arguments (C99 6.5.2.2p6). for (unsigned i = 0, e = Args.size(); i != e; i++) { Expr *Arg = Args[i]; if (Proto && i < Proto->getNumParams()) { InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Proto->getParamType(i), Proto->isParamConsumed(i)); ExprResult ArgE = PerformCopyInitialization(Entity, SourceLocation(), Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } else { ExprResult ArgE = DefaultArgumentPromotion(Arg); if (ArgE.isInvalid()) return true; Arg = ArgE.getAs(); } if (RequireCompleteType(Arg->getLocStart(), Arg->getType(), diag::err_call_incomplete_argument, Arg)) return ExprError(); TheCall->setArg(i, Arg); } } if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) if (!Method->isStatic()) return ExprError(Diag(LParenLoc, diag::err_member_call_without_object) << Fn->getSourceRange()); // Check for sentinels if (NDecl) DiagnoseSentinelCalls(NDecl, LParenLoc, Args); // Do special checking on direct calls to functions. if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); } else if (NDecl) { if (CheckPointerCall(NDecl, TheCall, Proto)) return ExprError(); } else { if (CheckOtherCall(TheCall, Proto)) return ExprError(); } return MaybeBindToTemporary(TheCall); } ExprResult Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty, SourceLocation RParenLoc, Expr *InitExpr) { assert(Ty && "ActOnCompoundLiteral(): missing type"); assert(InitExpr && "ActOnCompoundLiteral(): missing expression"); TypeSourceInfo *TInfo; QualType literalType = GetTypeFromParser(Ty, &TInfo); if (!TInfo) TInfo = Context.getTrivialTypeSourceInfo(literalType); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, InitExpr); } ExprResult Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, SourceLocation RParenLoc, Expr *LiteralExpr) { QualType literalType = TInfo->getType(); if (literalType->isArrayType()) { if (RequireCompleteType(LParenLoc, Context.getBaseElementType(literalType), diag::err_illegal_decl_array_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); if (literalType->isVariableArrayType()) return ExprError(Diag(LParenLoc, diag::err_variable_object_no_init) << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())); } else if (!literalType->isDependentType() && RequireCompleteType(LParenLoc, literalType, diag::err_typecheck_decl_incomplete_type, SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); InitializedEntity Entity = InitializedEntity::InitializeCompoundLiteralInit(TInfo); InitializationKind Kind = InitializationKind::CreateCStyleCast(LParenLoc, SourceRange(LParenLoc, RParenLoc), /*InitList=*/true); InitializationSequence InitSeq(*this, Entity, Kind, LiteralExpr); ExprResult Result = InitSeq.Perform(*this, Entity, Kind, LiteralExpr, &literalType); if (Result.isInvalid()) return ExprError(); LiteralExpr = Result.get(); bool isFileScope = getCurFunctionOrMethodDecl() == nullptr; if (isFileScope && !LiteralExpr->isTypeDependent() && !LiteralExpr->isValueDependent() && !literalType->isDependentType()) { // 6.5.2.5p3 if (CheckForConstantInitializer(LiteralExpr, literalType)) return ExprError(); } // In C, compound literals are l-values for some reason. ExprValueKind VK = getLangOpts().CPlusPlus ? VK_RValue : VK_LValue; return MaybeBindToTemporary( new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType, VK, LiteralExpr, isFileScope)); } ExprResult Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, SourceLocation RBraceLoc) { // Immediately handle non-overload placeholders. Overloads can be // resolved contextually, but everything else here can't. for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) { if (InitArgList[I]->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(InitArgList[I]); // Ignore failures; dropping the entire initializer list because // of one failure would be terrible for indexing/etc. if (result.isInvalid()) continue; InitArgList[I] = result.get(); } } // Semantic analysis for initializers is done by ActOnDeclarator() and // CheckInitializer() - it requires knowledge of the object being intialized. InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } /// Do an explicit extend of the given block pointer if we're in ARC. void Sema::maybeExtendBlockObject(ExprResult &E) { assert(E.get()->getType()->isBlockPointerType()); assert(E.get()->isRValue()); // Only do this in an r-value context. if (!getLangOpts().ObjCAutoRefCount) return; E = ImplicitCastExpr::Create(Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(), /*base path*/ nullptr, VK_RValue); ExprNeedsCleanups = true; } /// Prepare a conversion of the given expression to an ObjC object /// pointer type. CastKind Sema::PrepareCastToObjCObjectPointer(ExprResult &E) { QualType type = E.get()->getType(); if (type->isObjCObjectPointerType()) { return CK_BitCast; } else if (type->isBlockPointerType()) { maybeExtendBlockObject(E); return CK_BlockPointerToObjCPointerCast; } else { assert(type->isPointerType()); return CK_CPointerToObjCPointerCast; } } /// Prepares for a scalar cast, performing all the necessary stages /// except the final cast and returning the kind required. CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) { // Both Src and Dest are scalar types, i.e. arithmetic or pointer. // Also, callers should have filtered out the invalid cases with // pointers. Everything else should be possible. QualType SrcTy = Src.get()->getType(); if (Context.hasSameUnqualifiedType(SrcTy, DestTy)) return CK_NoOp; switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); case Type::STK_CPointer: case Type::STK_BlockPointer: case Type::STK_ObjCObjectPointer: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: { unsigned SrcAS = SrcTy->getPointeeType().getAddressSpace(); unsigned DestAS = DestTy->getPointeeType().getAddressSpace(); if (SrcAS != DestAS) return CK_AddressSpaceConversion; return CK_BitCast; } case Type::STK_BlockPointer: return (SrcKind == Type::STK_BlockPointer ? CK_BitCast : CK_AnyPointerToBlockPointerCast); case Type::STK_ObjCObjectPointer: if (SrcKind == Type::STK_ObjCObjectPointer) return CK_BitCast; if (SrcKind == Type::STK_CPointer) return CK_CPointerToObjCPointerCast; maybeExtendBlockObject(Src); return CK_BlockPointerToObjCPointerCast; case Type::STK_Bool: return CK_PointerToBoolean; case Type::STK_Integral: return CK_PointerToIntegral; case Type::STK_Floating: case Type::STK_FloatingComplex: case Type::STK_IntegralComplex: case Type::STK_MemberPointer: llvm_unreachable("illegal cast from pointer"); } llvm_unreachable("Should have returned before this"); case Type::STK_Bool: // casting from bool is like casting from an integer case Type::STK_Integral: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: if (Src.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) return CK_NullToPointer; return CK_IntegralToPointer; case Type::STK_Bool: return CK_IntegralToBoolean; case Type::STK_Integral: return CK_IntegralCast; case Type::STK_Floating: return CK_IntegralToFloating; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralCast); return CK_IntegralRealToComplex; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_IntegralToFloating); return CK_FloatingRealToComplex; case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); case Type::STK_Floating: switch (DestTy->getScalarTypeKind()) { case Type::STK_Floating: return CK_FloatingCast; case Type::STK_Bool: return CK_FloatingToBoolean; case Type::STK_Integral: return CK_FloatingToIntegral; case Type::STK_FloatingComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingCast); return CK_FloatingRealToComplex; case Type::STK_IntegralComplex: Src = ImpCastExprToType(Src.get(), DestTy->castAs()->getElementType(), CK_FloatingToIntegral); return CK_IntegralRealToComplex; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); case Type::STK_FloatingComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_FloatingComplexCast; case Type::STK_IntegralComplex: return CK_FloatingComplexToIntegralComplex; case Type::STK_Floating: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_FloatingComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_FloatingComplexToReal); return CK_FloatingCast; } case Type::STK_Bool: return CK_FloatingComplexToBoolean; case Type::STK_Integral: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_FloatingComplexToReal); return CK_FloatingToIntegral; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex float->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); case Type::STK_IntegralComplex: switch (DestTy->getScalarTypeKind()) { case Type::STK_FloatingComplex: return CK_IntegralComplexToFloatingComplex; case Type::STK_IntegralComplex: return CK_IntegralComplexCast; case Type::STK_Integral: { QualType ET = SrcTy->castAs()->getElementType(); if (Context.hasSameType(ET, DestTy)) return CK_IntegralComplexToReal; Src = ImpCastExprToType(Src.get(), ET, CK_IntegralComplexToReal); return CK_IntegralCast; } case Type::STK_Bool: return CK_IntegralComplexToBoolean; case Type::STK_Floating: Src = ImpCastExprToType(Src.get(), SrcTy->castAs()->getElementType(), CK_IntegralComplexToReal); return CK_IntegralToFloating; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: llvm_unreachable("valid complex int->pointer cast?"); case Type::STK_MemberPointer: llvm_unreachable("member pointer type in C"); } llvm_unreachable("Should have returned before this"); } llvm_unreachable("Unhandled scalar cast"); } static bool breakDownVectorType(QualType type, uint64_t &len, QualType &eltType) { // Vectors are simple. if (const VectorType *vecType = type->getAs()) { len = vecType->getNumElements(); eltType = vecType->getElementType(); assert(eltType->isScalarType()); return true; } // We allow lax conversion to and from non-vector types, but only if // they're real types (i.e. non-complex, non-pointer scalar types). if (!type->isRealType()) return false; len = 1; eltType = type; return true; } /// Are the two types lax-compatible vector types? That is, given /// that one of them is a vector, do they have equal storage sizes, /// where the storage size is the number of elements times the element /// size? /// /// This will also return false if either of the types is neither a /// vector nor a real type. bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); // Disallow lax conversions between scalars and ExtVectors (these // conversions are allowed for other vector types because common headers // depend on them). Most scalar OP ExtVector cases are handled by the // splat path anyway, which does what we want (convert, not bitcast). // What this rules out for ExtVectors is crazy things like char4*float. if (srcTy->isScalarType() && destTy->isExtVectorType()) return false; if (destTy->isScalarType() && srcTy->isExtVectorType()) return false; uint64_t srcLen, destLen; QualType srcEltTy, destEltTy; if (!breakDownVectorType(srcTy, srcLen, srcEltTy)) return false; if (!breakDownVectorType(destTy, destLen, destEltTy)) return false; // ASTContext::getTypeSize will return the size rounded up to a // power of 2, so instead of using that, we need to use the raw // element size multiplied by the element count. uint64_t srcEltSize = Context.getTypeSize(srcEltTy); uint64_t destEltSize = Context.getTypeSize(destEltTy); return (srcLen * srcEltSize == destLen * destEltSize); } /// Is this a legal conversion between two types, one of which is /// known to be a vector type? bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) { assert(destTy->isVectorType() || srcTy->isVectorType()); if (!Context.getLangOpts().LaxVectorConversions) return false; return areLaxCompatibleVectorTypes(srcTy, destTy); } bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty, CastKind &Kind) { assert(VectorTy->isVectorType() && "Not a vector type!"); if (Ty->isVectorType() || Ty->isIntegralType(Context)) { if (!areLaxCompatibleVectorTypes(Ty, VectorTy)) return Diag(R.getBegin(), Ty->isVectorType() ? diag::err_invalid_conversion_between_vectors : diag::err_invalid_conversion_between_vector_and_integer) << VectorTy << Ty << R; } else return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << VectorTy << Ty << R; Kind = CK_BitCast; return false; } ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) { QualType DestElemTy = VectorTy->castAs()->getElementType(); if (DestElemTy == SplattedExpr->getType()) return SplattedExpr; assert(DestElemTy->isFloatingType() || DestElemTy->isIntegralOrEnumerationType()); CastKind CK; if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) { // OpenCL requires that we convert `true` boolean expressions to -1, but // only when splatting vectors. if (DestElemTy->isFloatingType()) { // To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast // in two steps: boolean to signed integral, then to floating. ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy, CK_BooleanToSignedIntegral); SplattedExpr = CastExprRes.get(); CK = CK_IntegralToFloating; } else { CK = CK_BooleanToSignedIntegral; } } else { ExprResult CastExprRes = SplattedExpr; CK = PrepareScalarCast(CastExprRes, DestElemTy); if (CastExprRes.isInvalid()) return ExprError(); SplattedExpr = CastExprRes.get(); } return ImpCastExprToType(SplattedExpr, DestElemTy, CK); } ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy, Expr *CastExpr, CastKind &Kind) { assert(DestTy->isExtVectorType() && "Not an extended vector type!"); QualType SrcTy = CastExpr->getType(); // If SrcTy is a VectorType, the total size must match to explicitly cast to // an ExtVectorType. // In OpenCL, casts between vectors of different types are not allowed. // (See OpenCL 6.2). if (SrcTy->isVectorType()) { if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) || (getLangOpts().OpenCL && (DestTy.getCanonicalType() != SrcTy.getCanonicalType()))) { Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors) << DestTy << SrcTy << R; return ExprError(); } Kind = CK_BitCast; return CastExpr; } // All non-pointer scalars can be cast to ExtVector type. The appropriate // conversion will take place first from scalar to elt type, and then // splat from elt type to vector. if (SrcTy->isPointerType()) return Diag(R.getBegin(), diag::err_invalid_conversion_between_vector_and_scalar) << DestTy << SrcTy << R; Kind = CK_VectorSplat; return prepareVectorSplat(DestTy, CastExpr); } ExprResult Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc, Declarator &D, ParsedType &Ty, SourceLocation RParenLoc, Expr *CastExpr) { assert(!D.isInvalidType() && (CastExpr != nullptr) && "ActOnCastExpr(): missing type or expr"); TypeSourceInfo *castTInfo = GetTypeForDeclaratorCast(D, CastExpr->getType()); if (D.isInvalidType()) return ExprError(); if (getLangOpts().CPlusPlus) { // Check that there are no default arguments (C++ only). CheckExtraCXXDefaultArguments(D); } else { // Make sure any TypoExprs have been dealt with. ExprResult Res = CorrectDelayedTyposInExpr(CastExpr); if (!Res.isUsable()) return ExprError(); CastExpr = Res.get(); } checkUnusedDeclAttributes(D); QualType castType = castTInfo->getType(); Ty = CreateParsedType(castType, castTInfo); bool isVectorLiteral = false; // Check for an altivec or OpenCL literal, // i.e. all the elements are integer constants. ParenExpr *PE = dyn_cast(CastExpr); ParenListExpr *PLE = dyn_cast(CastExpr); if ((getLangOpts().AltiVec || getLangOpts().ZVector || getLangOpts().OpenCL) && castType->isVectorType() && (PE || PLE)) { if (PLE && PLE->getNumExprs() == 0) { Diag(PLE->getExprLoc(), diag::err_altivec_empty_initializer); return ExprError(); } if (PE || PLE->getNumExprs() == 1) { Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0)); if (!E->getType()->isVectorType()) isVectorLiteral = true; } else isVectorLiteral = true; } // If this is a vector initializer, '(' type ')' '(' init, ..., init ')' // then handle it as such. if (isVectorLiteral) return BuildVectorLiteral(LParenLoc, RParenLoc, CastExpr, castTInfo); // If the Expr being casted is a ParenListExpr, handle it specially. // This is not an AltiVec-style cast, so turn the ParenListExpr into a // sequence of BinOp comma operators. if (isa(CastExpr)) { ExprResult Result = MaybeConvertParenListExprToParenExpr(S, CastExpr); if (Result.isInvalid()) return ExprError(); CastExpr = Result.get(); } if (getLangOpts().CPlusPlus && !castType->isVoidType() && !getSourceManager().isInSystemMacro(LParenLoc)) Diag(LParenLoc, diag::warn_old_style_cast) << CastExpr->getSourceRange(); CheckTollFreeBridgeCast(castType, CastExpr); CheckObjCBridgeRelatedCast(castType, CastExpr); return BuildCStyleCastExpr(LParenLoc, castTInfo, RParenLoc, CastExpr); } ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc, SourceLocation RParenLoc, Expr *E, TypeSourceInfo *TInfo) { assert((isa(E) || isa(E)) && "Expected paren or paren list expression"); Expr **exprs; unsigned numExprs; Expr *subExpr; SourceLocation LiteralLParenLoc, LiteralRParenLoc; if (ParenListExpr *PE = dyn_cast(E)) { LiteralLParenLoc = PE->getLParenLoc(); LiteralRParenLoc = PE->getRParenLoc(); exprs = PE->getExprs(); numExprs = PE->getNumExprs(); } else { // isa by assertion at function entrance LiteralLParenLoc = cast(E)->getLParen(); LiteralRParenLoc = cast(E)->getRParen(); subExpr = cast(E)->getSubExpr(); exprs = &subExpr; numExprs = 1; } QualType Ty = TInfo->getType(); assert(Ty->isVectorType() && "Expected vector type"); SmallVector initExprs; const VectorType *VTy = Ty->getAs(); unsigned numElems = Ty->getAs()->getNumElements(); // '(...)' form of vector initialization in AltiVec: the number of // initializers must be one or must match the size of the vector. // If a single value is specified in the initializer then it will be // replicated to all the components of the vector if (VTy->getVectorKind() == VectorType::AltiVecVector) { // The number of initializers must be one or must match the size of the // vector. If a single value is specified in the initializer then it will // be replicated to all the components of the vector if (numExprs == 1) { QualType ElemTy = Ty->getAs()->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } else if (numExprs < numElems) { Diag(E->getExprLoc(), diag::err_incorrect_number_of_vector_initializers); return ExprError(); } else initExprs.append(exprs, exprs + numExprs); } else { // For OpenCL, when the number of initializers is a single value, // it will be replicated to all components of the vector. if (getLangOpts().OpenCL && VTy->getVectorKind() == VectorType::GenericVector && numExprs == 1) { QualType ElemTy = Ty->getAs()->getElementType(); ExprResult Literal = DefaultLvalueConversion(exprs[0]); if (Literal.isInvalid()) return ExprError(); Literal = ImpCastExprToType(Literal.get(), ElemTy, PrepareScalarCast(Literal, ElemTy)); return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get()); } initExprs.append(exprs, exprs + numExprs); } // FIXME: This means that pretty-printing the final AST will produce curly // braces instead of the original commas. InitListExpr *initE = new (Context) InitListExpr(Context, LiteralLParenLoc, initExprs, LiteralRParenLoc); initE->setType(Ty); return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, initE); } /// This is not an AltiVec-style cast or or C++ direct-initialization, so turn /// the ParenListExpr into a sequence of comma binary operators. ExprResult Sema::MaybeConvertParenListExprToParenExpr(Scope *S, Expr *OrigExpr) { ParenListExpr *E = dyn_cast(OrigExpr); if (!E) return OrigExpr; ExprResult Result(E->getExpr(0)); for (unsigned i = 1, e = E->getNumExprs(); i != e && !Result.isInvalid(); ++i) Result = ActOnBinOp(S, E->getExprLoc(), tok::comma, Result.get(), E->getExpr(i)); if (Result.isInvalid()) return ExprError(); return ActOnParenExpr(E->getLParenLoc(), E->getRParenLoc(), Result.get()); } ExprResult Sema::ActOnParenListExpr(SourceLocation L, SourceLocation R, MultiExprArg Val) { Expr *expr = new (Context) ParenListExpr(Context, L, Val, R); return expr; } /// \brief Emit a specialized diagnostic when one expression is a null pointer /// constant and the other is not a pointer. Returns true if a diagnostic is /// emitted. bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, SourceLocation QuestionLoc) { Expr *NullExpr = LHSExpr; Expr *NonPointerExpr = RHSExpr; Expr::NullPointerConstantKind NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); if (NullKind == Expr::NPCK_NotNull) { NullExpr = RHSExpr; NonPointerExpr = LHSExpr; NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); } if (NullKind == Expr::NPCK_NotNull) return false; if (NullKind == Expr::NPCK_ZeroExpression) return false; if (NullKind == Expr::NPCK_ZeroLiteral) { // In this case, check to make sure that we got here from a "NULL" // string in the source code. NullExpr = NullExpr->IgnoreParenImpCasts(); SourceLocation loc = NullExpr->getExprLoc(); if (!findMacroSpelling(loc, "NULL")) return false; } int DiagType = (NullKind == Expr::NPCK_CXX11_nullptr); Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands_null) << NonPointerExpr->getType() << DiagType << NonPointerExpr->getSourceRange(); return true; } /// \brief Return false if the condition expression is valid, true otherwise. static bool checkCondition(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { QualType CondTy = Cond->getType(); // OpenCL v1.1 s6.3.i says the condition cannot be a floating point type. if (S.getLangOpts().OpenCL && CondTy->isFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << CondTy << Cond->getSourceRange(); return true; } // C99 6.5.15p2 if (CondTy->isScalarType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_scalar) << CondTy << Cond->getSourceRange(); return true; } /// \brief Handle when one or both operands are void type. static QualType checkConditionalVoidType(Sema &S, ExprResult &LHS, ExprResult &RHS) { Expr *LHSExpr = LHS.get(); Expr *RHSExpr = RHS.get(); if (!LHSExpr->getType()->isVoidType()) S.Diag(RHSExpr->getLocStart(), diag::ext_typecheck_cond_one_void) << RHSExpr->getSourceRange(); if (!RHSExpr->getType()->isVoidType()) S.Diag(LHSExpr->getLocStart(), diag::ext_typecheck_cond_one_void) << LHSExpr->getSourceRange(); LHS = S.ImpCastExprToType(LHS.get(), S.Context.VoidTy, CK_ToVoid); RHS = S.ImpCastExprToType(RHS.get(), S.Context.VoidTy, CK_ToVoid); return S.Context.VoidTy; } /// \brief Return false if the NullExpr can be promoted to PointerTy, /// true otherwise. static bool checkConditionalNullPointer(Sema &S, ExprResult &NullExpr, QualType PointerTy) { if ((!PointerTy->isAnyPointerType() && !PointerTy->isBlockPointerType()) || !NullExpr.get()->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNull)) return true; NullExpr = S.ImpCastExprToType(NullExpr.get(), PointerTy, CK_NullToPointer); return false; } /// \brief Checks compatibility between two pointers and return the resulting /// type. static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (S.Context.hasSameType(LHSTy, RHSTy)) { // Two identical pointers types are always compatible. return LHSTy; } QualType lhptee, rhptee; // Get the pointee types. bool IsBlockPointer = false; if (const BlockPointerType *LHSBTy = LHSTy->getAs()) { lhptee = LHSBTy->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); IsBlockPointer = true; } else { lhptee = LHSTy->castAs()->getPointeeType(); rhptee = RHSTy->castAs()->getPointeeType(); } // C99 6.5.15p6: If both operands are pointers to compatible types or to // differently qualified versions of compatible types, the result type is // a pointer to an appropriately qualified version of the composite // type. // Only CVR-qualifiers exist in the standard, and the differently-qualified // clause doesn't make sense for our extensions. E.g. address space 2 should // be incompatible with address space 3: they may live on different devices or // anything. Qualifiers lhQual = lhptee.getQualifiers(); Qualifiers rhQual = rhptee.getQualifiers(); unsigned MergedCVRQual = lhQual.getCVRQualifiers() | rhQual.getCVRQualifiers(); lhQual.removeCVRQualifiers(); rhQual.removeCVRQualifiers(); lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual); rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual); QualType CompositeTy = S.Context.mergeTypes(lhptee, rhptee); if (CompositeTy.isNull()) { S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); // In this situation, we assume void* type. No especially good // reason, but this is what gcc does, and we do have to pick // to get a consistent AST. QualType incompatTy = S.Context.getPointerType(S.Context.VoidTy); LHS = S.ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast); return incompatTy; } // The pointer types are compatible. QualType ResultTy = CompositeTy.withCVRQualifiers(MergedCVRQual); if (IsBlockPointer) ResultTy = S.Context.getBlockPointerType(ResultTy); else ResultTy = S.Context.getPointerType(ResultTy); LHS = S.ImpCastExprToType(LHS.get(), ResultTy, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), ResultTy, CK_BitCast); return ResultTy; } /// \brief Return the resulting type when the operands are both block pointers. static QualType checkConditionalBlockPointerCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); if (!LHSTy->isBlockPointerType() || !RHSTy->isBlockPointerType()) { if (LHSTy->isVoidPointerType() || RHSTy->isVoidPointerType()) { QualType destType = S.Context.getPointerType(S.Context.VoidTy); LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } S.Diag(Loc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // We have 2 block pointer types. return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// \brief Return the resulting type when the operands are both pointers. static QualType checkConditionalObjectPointersCompatibility(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // get the pointer types QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // get the "pointed to" types QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); // ignore qualifiers on void (C99 6.5.15p3, clause 6) if (lhptee->isVoidType() && rhptee->isIncompleteOrObjectType()) { // Figure out necessary qualifiers (C99 6.5.15p6) QualType destPointee = S.Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (rhptee->isVoidType() && lhptee->isIncompleteOrObjectType()) { QualType destPointee = S.Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = S.Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = S.ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return checkConditionalPointerCompatibility(S, LHS, RHS, Loc); } /// \brief Return false if the first expression is not an integer and the second /// expression is not a pointer, true otherwise. static bool checkPointerIntegerMismatch(Sema &S, ExprResult &Int, Expr* PointerExpr, SourceLocation Loc, bool IsIntFirstExpr) { if (!PointerExpr->getType()->isPointerType() || !Int.get()->getType()->isIntegerType()) return false; Expr *Expr1 = IsIntFirstExpr ? Int.get() : PointerExpr; Expr *Expr2 = IsIntFirstExpr ? PointerExpr : Int.get(); S.Diag(Loc, diag::ext_typecheck_cond_pointer_integer_mismatch) << Expr1->getType() << Expr2->getType() << Expr1->getSourceRange() << Expr2->getSourceRange(); Int = S.ImpCastExprToType(Int.get(), PointerExpr->getType(), CK_IntegralToPointer); return true; } /// \brief Simple conversion between integer and floating point types. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// OpenCL v1.1 s6.3.i and s6.11.6 together require that the scalar /// types are either integer or floating type. Between the two /// operands, the type with the higher rank is defined as the "result /// type". The other operand needs to be promoted to the same type. No /// other type promotion is allowed. We cannot use /// UsualArithmeticConversions() for this purpose, since it always /// promotes promotable types. static QualType OpenCLArithmeticConversions(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { LHS = S.DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = S.Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType(); QualType RHSType = S.Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); if (!LHSType->isIntegerType() && !LHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << LHSType << LHS.get()->getSourceRange(); return QualType(); } if (!RHSType->isIntegerType() && !RHSType->isRealFloatingType()) { S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float) << RHSType << RHS.get()->getSourceRange(); return QualType(); } // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; // Now handle "real" floating types (i.e. float, double, long double). if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) return handleFloatConversion(S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); // Finally, we have two differing integer types. return handleIntegerConversion (S, LHS, RHS, LHSType, RHSType, /*IsCompAssign = */ false); } /// \brief Convert scalar operands to a vector that matches the /// condition in length. /// /// Used when handling the OpenCL conditional operator where the /// condition is a vector while the other operands are scalar. /// /// We first compute the "result type" for the scalar operands /// according to OpenCL v1.1 s6.3.i. Both operands are then converted /// into a vector of that type where the length matches the condition /// vector type. s6.11.6 requires that the element types of the result /// and the condition must have the same number of bits. static QualType OpenCLConvertScalarsToVectors(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType CondTy, SourceLocation QuestionLoc) { QualType ResTy = OpenCLArithmeticConversions(S, LHS, RHS, QuestionLoc); if (ResTy.isNull()) return QualType(); const VectorType *CV = CondTy->getAs(); assert(CV); // Determine the vector result type unsigned NumElements = CV->getNumElements(); QualType VectorTy = S.Context.getExtVectorType(ResTy, NumElements); // Ensure that all types have the same number of bits if (S.Context.getTypeSize(CV->getElementType()) != S.Context.getTypeSize(ResTy)) { // Since VectorTy is created internally, it does not pretty print // with an OpenCL name. Instead, we just print a description. std::string EleTyName = ResTy.getUnqualifiedType().getAsString(); SmallString<64> Str; llvm::raw_svector_ostream OS(Str); OS << "(vector of " << NumElements << " '" << EleTyName << "' values)"; S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << OS.str(); return QualType(); } // Convert operands to the vector result type LHS = S.ImpCastExprToType(LHS.get(), VectorTy, CK_VectorSplat); RHS = S.ImpCastExprToType(RHS.get(), VectorTy, CK_VectorSplat); return VectorTy; } /// \brief Return false if this is a valid OpenCL condition vector static bool checkOpenCLConditionVector(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { // OpenCL v1.1 s6.11.6 says the elements of the vector must be of // integral type. const VectorType *CondTy = Cond->getType()->getAs(); assert(CondTy); QualType EleTy = CondTy->getElementType(); if (EleTy->isIntegerType()) return false; S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat) << Cond->getType() << Cond->getSourceRange(); return true; } /// \brief Return false if the vector condition type and the vector /// result type are compatible. /// /// OpenCL v1.1 s6.11.6 requires that both vector types have the same /// number of elements, and their element types have the same number /// of bits. static bool checkVectorResult(Sema &S, QualType CondTy, QualType VecResTy, SourceLocation QuestionLoc) { const VectorType *CV = CondTy->getAs(); const VectorType *RV = VecResTy->getAs(); assert(CV && RV); if (CV->getNumElements() != RV->getNumElements()) { S.Diag(QuestionLoc, diag::err_conditional_vector_size) << CondTy << VecResTy; return true; } QualType CVE = CV->getElementType(); QualType RVE = RV->getElementType(); if (S.Context.getTypeSize(CVE) != S.Context.getTypeSize(RVE)) { S.Diag(QuestionLoc, diag::err_conditional_vector_element_size) << CondTy << VecResTy; return true; } return false; } /// \brief Return the resulting type for the conditional operator in /// OpenCL (aka "ternary selection operator", OpenCL v1.1 /// s6.3.i) when the condition is a vector type. static QualType OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { Cond = S.DefaultFunctionArrayLvalueConversion(Cond.get()); if (Cond.isInvalid()) return QualType(); QualType CondTy = Cond.get()->getType(); if (checkOpenCLConditionVector(S, Cond.get(), QuestionLoc)) return QualType(); // If either operand is a vector then find the vector type of the // result as specified in OpenCL v1.1 s6.3.i. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType VecResTy = S.CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); if (VecResTy.isNull()) return QualType(); // The result type must match the condition type as specified in // OpenCL v1.1 s6.11.6. if (checkVectorResult(S, CondTy, VecResTy, QuestionLoc)) return QualType(); return VecResTy; } // Both operands are scalar. return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc); } /// Note that LHS is not null here, even if this is the gnu "x ?: y" extension. /// In that case, LHS = cond. /// C99 6.5.15 QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation QuestionLoc) { ExprResult LHSResult = CheckPlaceholderExpr(LHS.get()); if (!LHSResult.isUsable()) return QualType(); LHS = LHSResult; ExprResult RHSResult = CheckPlaceholderExpr(RHS.get()); if (!RHSResult.isUsable()) return QualType(); RHS = RHSResult; // C++ is sufficiently different to merit its own checker. if (getLangOpts().CPlusPlus) return CXXCheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); VK = VK_RValue; OK = OK_Ordinary; // The OpenCL operator with a vector condition is sufficiently // different to merit its own checker. if (getLangOpts().OpenCL && Cond.get()->getType()->isVectorType()) return OpenCLCheckVectorConditional(*this, Cond, LHS, RHS, QuestionLoc); // First, check the condition. Cond = UsualUnaryConversions(Cond.get()); if (Cond.isInvalid()) return QualType(); if (checkCondition(*this, Cond.get(), QuestionLoc)) return QualType(); // Now check the two expressions. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, QuestionLoc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); QualType ResTy = UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // If both operands have arithmetic type, do the usual arithmetic conversions // to find a common type: C99 6.5.15p3,5. if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) { LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy)); RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy)); return ResTy; } // If both operands are the same structure or union type, the result is that // type. if (const RecordType *LHSRT = LHSTy->getAs()) { // C99 6.5.15p3 if (const RecordType *RHSRT = RHSTy->getAs()) if (LHSRT->getDecl() == RHSRT->getDecl()) // "If both the operands have structure or union type, the result has // that type." This implies that CV qualifiers are dropped. return LHSTy.getUnqualifiedType(); // FIXME: Type of conditional expression must be complete in C mode. } // C99 6.5.15p5: "If both operands have void type, the result has void type." // The following || allows only one side to be void (a GCC-ism). if (LHSTy->isVoidType() || RHSTy->isVoidType()) { return checkConditionalVoidType(*this, LHS, RHS); } // C99 6.5.15p6 - "if one operand is a null pointer constant, the result has // the type of the other operand." if (!checkConditionalNullPointer(*this, RHS, LHSTy)) return LHSTy; if (!checkConditionalNullPointer(*this, LHS, RHSTy)) return RHSTy; // All objective-c pointer type analysis is done here. QualType compositeType = FindCompositeObjCPointerType(LHS, RHS, QuestionLoc); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (!compositeType.isNull()) return compositeType; // Handle block pointer types. if (LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) return checkConditionalBlockPointerCompatibility(*this, LHS, RHS, QuestionLoc); // Check constraints for C object pointers types (C99 6.5.15p3,6). if (LHSTy->isPointerType() && RHSTy->isPointerType()) return checkConditionalObjectPointersCompatibility(*this, LHS, RHS, QuestionLoc); // GCC compatibility: soften pointer/integer mismatch. Note that // null pointers have been filtered out by this point. if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc, /*isIntFirstExpr=*/true)) return RHSTy; if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc, /*isIntFirstExpr=*/false)) return LHSTy; // Emit a better diagnostic if one of the expressions is a null pointer // constant and the other is not a pointer type. In this case, the user most // likely forgot to take the address of the other expression. if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc)) return QualType(); // Otherwise, the operands are not compatible. Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// FindCompositeObjCPointerType - Helper method to find composite type of /// two objective-c pointer types of the two input expressions. QualType Sema::FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc) { QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); // Handle things like Class and struct objc_class*. Here we case the result // to the pseudo-builtin, because that will be implicitly cast back to the // redefinition type if an attempt is made to access its fields. if (LHSTy->isObjCClassType() && (Context.hasSameType(RHSTy, Context.getObjCClassRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCClassType() && (Context.hasSameType(LHSTy, Context.getObjCClassRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_object* / id if (LHSTy->isObjCIdType() && (Context.hasSameType(RHSTy, Context.getObjCIdRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_CPointerToObjCPointerCast); return LHSTy; } if (RHSTy->isObjCIdType() && (Context.hasSameType(LHSTy, Context.getObjCIdRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_CPointerToObjCPointerCast); return RHSTy; } // And the same for struct objc_selector* / SEL if (Context.isObjCSelType(LHSTy) && (Context.hasSameType(RHSTy, Context.getObjCSelRedefinitionType()))) { RHS = ImpCastExprToType(RHS.get(), LHSTy, CK_BitCast); return LHSTy; } if (Context.isObjCSelType(RHSTy) && (Context.hasSameType(LHSTy, Context.getObjCSelRedefinitionType()))) { LHS = ImpCastExprToType(LHS.get(), RHSTy, CK_BitCast); return RHSTy; } // Check constraints for Objective-C object pointers types. if (LHSTy->isObjCObjectPointerType() && RHSTy->isObjCObjectPointerType()) { if (Context.getCanonicalType(LHSTy) == Context.getCanonicalType(RHSTy)) { // Two identical object pointer types are always compatible. return LHSTy; } const ObjCObjectPointerType *LHSOPT = LHSTy->castAs(); const ObjCObjectPointerType *RHSOPT = RHSTy->castAs(); QualType compositeType = LHSTy; // If both operands are interfaces and either operand can be // assigned to the other, use that type as the composite // type. This allows // xxx ? (A*) a : (B*) b // where B is a subclass of A. // // Additionally, as for assignment, if either type is 'id' // allow silent coercion. Finally, if the types are // incompatible then make sure to use 'id' as the composite // type so the result is acceptable for sending messages to. // FIXME: Consider unifying with 'areComparableObjCPointerTypes'. // It could return the composite type. if (!(compositeType = Context.areCommonBaseCompatible(LHSOPT, RHSOPT)).isNull()) { // Nothing more to do. } else if (Context.canAssignObjCInterfaces(LHSOPT, RHSOPT)) { compositeType = RHSOPT->isObjCBuiltinType() ? RHSTy : LHSTy; } else if (Context.canAssignObjCInterfaces(RHSOPT, LHSOPT)) { compositeType = LHSOPT->isObjCBuiltinType() ? LHSTy : RHSTy; } else if ((LHSTy->isObjCQualifiedIdType() || RHSTy->isObjCQualifiedIdType()) && Context.ObjCQualifiedIdTypesAreCompatible(LHSTy, RHSTy, true)) { // Need to handle "id" explicitly. // GCC allows qualified id and any Objective-C type to devolve to // id. Currently localizing to here until clear this should be // part of ObjCQualifiedIdTypesAreCompatible. compositeType = Context.getObjCIdType(); } else if (LHSTy->isObjCIdType() || RHSTy->isObjCIdType()) { compositeType = Context.getObjCIdType(); } else { Diag(QuestionLoc, diag::ext_typecheck_cond_incompatible_operands) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); QualType incompatTy = Context.getObjCIdType(); LHS = ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast); return incompatTy; } // The object pointer types are compatible. LHS = ImpCastExprToType(LHS.get(), compositeType, CK_BitCast); RHS = ImpCastExprToType(RHS.get(), compositeType, CK_BitCast); return compositeType; } // Check Objective-C object pointer types and 'void *' if (LHSTy->isVoidPointerType() && RHSTy->isObjCObjectPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(lhptee, rhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. LHS = ImpCastExprToType(LHS.get(), destType, CK_NoOp); // Promote to void*. RHS = ImpCastExprToType(RHS.get(), destType, CK_BitCast); return destType; } if (LHSTy->isObjCObjectPointerType() && RHSTy->isVoidPointerType()) { if (getLangOpts().ObjCAutoRefCount) { // ARC forbids the implicit conversion of object pointers to 'void *', // so these types are not compatible. Diag(QuestionLoc, diag::err_cond_voidptr_arc) << LHSTy << RHSTy << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = RHS = true; return QualType(); } QualType lhptee = LHSTy->getAs()->getPointeeType(); QualType rhptee = RHSTy->getAs()->getPointeeType(); QualType destPointee = Context.getQualifiedType(rhptee, lhptee.getQualifiers()); QualType destType = Context.getPointerType(destPointee); // Add qualifiers if necessary. RHS = ImpCastExprToType(RHS.get(), destType, CK_NoOp); // Promote to void*. LHS = ImpCastExprToType(LHS.get(), destType, CK_BitCast); return destType; } return QualType(); } /// SuggestParentheses - Emit a note with a fixit hint that wraps /// ParenRange in parentheses. static void SuggestParentheses(Sema &Self, SourceLocation Loc, const PartialDiagnostic &Note, SourceRange ParenRange) { SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd()); if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() && EndLoc.isValid()) { Self.Diag(Loc, Note) << FixItHint::CreateInsertion(ParenRange.getBegin(), "(") << FixItHint::CreateInsertion(EndLoc, ")"); } else { // We can't display the parentheses, so just show the bare note. Self.Diag(Loc, Note) << ParenRange; } } static bool IsArithmeticOp(BinaryOperatorKind Opc) { return BinaryOperator::isAdditiveOp(Opc) || BinaryOperator::isMultiplicativeOp(Opc) || BinaryOperator::isShiftOp(Opc); } /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary /// expression, either using a built-in or overloaded operator, /// and sets *OpCode to the opcode and *RHSExprs to the right-hand side /// expression. static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); E = E->IgnoreConversionOperator(); E = E->IgnoreImpCasts(); // Built-in binary operator. if (BinaryOperator *OP = dyn_cast(E)) { if (IsArithmeticOp(OP->getOpcode())) { *Opcode = OP->getOpcode(); *RHSExprs = OP->getRHS(); return true; } } // Overloaded operator. if (CXXOperatorCallExpr *Call = dyn_cast(E)) { if (Call->getNumArgs() != 2) return false; // Make sure this is really a binary operator that is safe to pass into // BinaryOperator::getOverloadedOpcode(), e.g. it's not a subscript op. OverloadedOperatorKind OO = Call->getOperator(); if (OO < OO_Plus || OO > OO_Arrow || OO == OO_PlusPlus || OO == OO_MinusMinus) return false; BinaryOperatorKind OpKind = BinaryOperator::getOverloadedOpcode(OO); if (IsArithmeticOp(OpKind)) { *Opcode = OpKind; *RHSExprs = Call->getArg(1); return true; } } return false; } /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type /// or is a logical expression such as (x==y) which has int type, but is /// commonly interpreted as boolean. static bool ExprLooksBoolean(Expr *E) { E = E->IgnoreParenImpCasts(); if (E->getType()->isBooleanType()) return true; if (BinaryOperator *OP = dyn_cast(E)) return OP->isComparisonOp() || OP->isLogicalOp(); if (UnaryOperator *OP = dyn_cast(E)) return OP->getOpcode() == UO_LNot; if (E->getType()->isPointerType()) return true; return false; } /// DiagnoseConditionalPrecedence - Emit a warning when a conditional operator /// and binary operator are mixed in a way that suggests the programmer assumed /// the conditional operator has higher precedence, for example: /// "int x = a + someBinaryCondition ? 1 : 2". static void DiagnoseConditionalPrecedence(Sema &Self, SourceLocation OpLoc, Expr *Condition, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind CondOpcode; Expr *CondRHS; if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS)) return; if (!ExprLooksBoolean(CondRHS)) return; // The condition is an arithmetic binary expression, with a right- // hand side that looks boolean, so warn. Self.Diag(OpLoc, diag::warn_precedence_conditional) << Condition->getSourceRange() << BinaryOperator::getOpcodeStr(CondOpcode); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << BinaryOperator::getOpcodeStr(CondOpcode), SourceRange(Condition->getLocStart(), Condition->getLocEnd())); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_conditional_first), SourceRange(CondRHS->getLocStart(), RHSExpr->getLocEnd())); } /// ActOnConditionalOp - Parse a ?: operation. Note that 'LHS' may be null /// in the case of a the GNU conditional expr extension. ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, SourceLocation ColonLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes in the condition because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr); if (!CondResult.isUsable()) return ExprError(); CondExpr = CondResult.get(); } // If this is the gnu "x ?: y" extension, analyze the types as though the LHS // was the condition. OpaqueValueExpr *opaqueValue = nullptr; Expr *commonExpr = nullptr; if (!LHSExpr) { commonExpr = CondExpr; // Lower out placeholder types first. This is important so that we don't // try to capture a placeholder. This happens in few cases in C++; such // as Objective-C++'s dictionary subscripting syntax. if (commonExpr->hasPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(commonExpr); if (!result.isUsable()) return ExprError(); commonExpr = result.get(); } // We usually want to apply unary conversions *before* saving, except // in the special case of a C++ l-value conditional. if (!(getLangOpts().CPlusPlus && !commonExpr->isTypeDependent() && commonExpr->getValueKind() == RHSExpr->getValueKind() && commonExpr->isGLValue() && commonExpr->isOrdinaryOrBitFieldObject() && RHSExpr->isOrdinaryOrBitFieldObject() && Context.hasSameType(commonExpr->getType(), RHSExpr->getType()))) { ExprResult commonRes = UsualUnaryConversions(commonExpr); if (commonRes.isInvalid()) return ExprError(); commonExpr = commonRes.get(); } opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(), commonExpr->getType(), commonExpr->getValueKind(), commonExpr->getObjectKind(), commonExpr); LHSExpr = CondExpr = opaqueValue; } ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; ExprResult Cond = CondExpr, LHS = LHSExpr, RHS = RHSExpr; QualType result = CheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc); if (result.isNull() || Cond.isInvalid() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(), RHS.get()); CheckBoolLikeConversion(Cond.get(), QuestionLoc); if (!commonExpr) return new (Context) ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc, RHS.get(), result, VK, OK); return new (Context) BinaryConditionalOperator( commonExpr, opaqueValue, Cond.get(), LHS.get(), RHS.get(), QuestionLoc, ColonLoc, result, VK, OK); } // checkPointerTypesForAssignment - This is a very tricky routine (despite // being closely modeled after the C99 spec:-). The odd characteristic of this // routine is it effectively iqnores the qualifiers on the top level pointee. // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. // FIXME: add a couple examples in this comment. static Sema::AssignConvertType checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); // get the "pointed to" type (ignoring qualifiers at the top level) const Type *lhptee, *rhptee; Qualifiers lhq, rhq; std::tie(lhptee, lhq) = cast(LHSType)->getPointeeType().split().asPair(); std::tie(rhptee, rhq) = cast(RHSType)->getPointeeType().split().asPair(); Sema::AssignConvertType ConvTy = Sema::Compatible; // C99 6.5.16.1p1: This following citation is common to constraints // 3 & 4 (below). ...and the type *pointed to* by the left has all the // qualifiers of the type *pointed to* by the right; // As a special case, 'non-__weak A *' -> 'non-__weak const *' is okay. if (lhq.getObjCLifetime() != rhq.getObjCLifetime() && lhq.compatiblyIncludesObjCLifetime(rhq)) { // Ignore lifetime for further calculation. lhq.removeObjCLifetime(); rhq.removeObjCLifetime(); } if (!lhq.compatiblyIncludes(rhq)) { // Treat address-space mismatches as fatal. TODO: address subspaces if (!lhq.isAddressSpaceSupersetOf(rhq)) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // It's okay to add or remove GC or lifetime qualifiers when converting to // and from void*. else if (lhq.withoutObjCGCAttr().withoutObjCLifetime() .compatiblyIncludes( rhq.withoutObjCGCAttr().withoutObjCLifetime()) && (lhptee->isVoidType() || rhptee->isVoidType())) ; // keep old // Treat lifetime mismatches as fatal. else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) ConvTy = Sema::IncompatiblePointerDiscardsQualifiers; // For GCC compatibility, other qualifier mismatches are treated // as still compatible in C. else ConvTy = Sema::CompatiblePointerDiscardsQualifiers; } // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or // incomplete type and the other is a pointer to a qualified or unqualified // version of void... if (lhptee->isVoidType()) { if (rhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(rhptee->isFunctionType()); return Sema::FunctionVoidPointer; } if (rhptee->isVoidType()) { if (lhptee->isIncompleteOrObjectType()) return ConvTy; // As an extension, we allow cast to/from void* to function pointer. assert(lhptee->isFunctionType()); return Sema::FunctionVoidPointer; } // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or // unqualified versions of compatible types, ... QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0); if (!S.Context.typesAreCompatible(ltrans, rtrans)) { // Check if the pointee types are compatible ignoring the sign. // We explicitly check for char so that we catch "char" vs // "unsigned char" on systems where "char" is unsigned. if (lhptee->isCharType()) ltrans = S.Context.UnsignedCharTy; else if (lhptee->hasSignedIntegerRepresentation()) ltrans = S.Context.getCorrespondingUnsignedType(ltrans); if (rhptee->isCharType()) rtrans = S.Context.UnsignedCharTy; else if (rhptee->hasSignedIntegerRepresentation()) rtrans = S.Context.getCorrespondingUnsignedType(rtrans); if (ltrans == rtrans) { // Types are compatible ignoring the sign. Qualifier incompatibility // takes priority over sign incompatibility because the sign // warning can be disabled. if (ConvTy != Sema::Compatible) return ConvTy; return Sema::IncompatiblePointerSign; } // If we are a multi-level pointer, it's possible that our issue is simply // one of qualification - e.g. char ** -> const char ** is not allowed. If // the eventual target type is the same and the pointers have the same // level of indirection, this must be the issue. if (isa(lhptee) && isa(rhptee)) { do { lhptee = cast(lhptee)->getPointeeType().getTypePtr(); rhptee = cast(rhptee)->getPointeeType().getTypePtr(); } while (isa(lhptee) && isa(rhptee)); if (lhptee == rhptee) return Sema::IncompatibleNestedPointerQualifiers; } // General pointer incompatibility takes priority over qualifiers. return Sema::IncompatiblePointer; } if (!S.getLangOpts().CPlusPlus && S.IsNoReturnConversion(ltrans, rtrans, ltrans)) return Sema::IncompatiblePointer; return ConvTy; } /// checkBlockPointerTypesForAssignment - This routine determines whether two /// block pointer types are compatible or whether a block and normal pointer /// are compatible. It is more restrict than comparing two function pointer // types. static Sema::AssignConvertType checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); QualType lhptee, rhptee; // get the "pointed to" type (ignoring qualifiers at the top level) lhptee = cast(LHSType)->getPointeeType(); rhptee = cast(RHSType)->getPointeeType(); // In C++, the types have to match exactly. if (S.getLangOpts().CPlusPlus) return Sema::IncompatibleBlockPointer; Sema::AssignConvertType ConvTy = Sema::Compatible; // For blocks we enforce that qualifiers are identical. if (lhptee.getLocalQualifiers() != rhptee.getLocalQualifiers()) ConvTy = Sema::CompatiblePointerDiscardsQualifiers; if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType)) return Sema::IncompatibleBlockPointer; return ConvTy; } /// checkObjCPointerTypesForAssignment - Compares two objective-c pointer types /// for assignment compatibility. static Sema::AssignConvertType checkObjCPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { assert(LHSType.isCanonical() && "LHS was not canonicalized!"); assert(RHSType.isCanonical() && "RHS was not canonicalized!"); if (LHSType->isObjCBuiltinType()) { // Class is not compatible with ObjC object pointers. if (LHSType->isObjCClassType() && !RHSType->isObjCBuiltinType() && !RHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } if (RHSType->isObjCBuiltinType()) { if (RHSType->isObjCClassType() && !LHSType->isObjCBuiltinType() && !LHSType->isObjCQualifiedClassType()) return Sema::IncompatiblePointer; return Sema::Compatible; } QualType lhptee = LHSType->getAs()->getPointeeType(); QualType rhptee = RHSType->getAs()->getPointeeType(); if (!lhptee.isAtLeastAsQualifiedAs(rhptee) && // make an exception for id

!LHSType->isObjCQualifiedIdType()) return Sema::CompatiblePointerDiscardsQualifiers; if (S.Context.typesAreCompatible(LHSType, RHSType)) return Sema::Compatible; if (LHSType->isObjCQualifiedIdType() || RHSType->isObjCQualifiedIdType()) return Sema::IncompatibleObjCQualifiedId; return Sema::IncompatiblePointer; } Sema::AssignConvertType Sema::CheckAssignmentConstraints(SourceLocation Loc, QualType LHSType, QualType RHSType) { // Fake up an opaque expression. We don't actually care about what // cast operations are required, so if CheckAssignmentConstraints // adds casts to this they'll be wasted, but fortunately that doesn't // usually happen on valid code. OpaqueValueExpr RHSExpr(Loc, RHSType, VK_RValue); ExprResult RHSPtr = &RHSExpr; CastKind K = CK_Invalid; return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false); } /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently /// has code to accommodate several GCC extensions when type checking /// pointers. Here are some objectionable examples that GCC considers warnings: /// /// int a, *pint; /// short *pshort; /// struct foo *pfoo; /// /// pint = pshort; // warning: assignment from incompatible pointer type /// a = pint; // warning: assignment makes integer from pointer without a cast /// pint = a; // warning: assignment makes pointer from integer without a cast /// pint = pfoo; // warning: assignment from incompatible pointer type /// /// As a result, the code for dealing with pointers is more complex than the /// C99 spec dictates. /// /// Sets 'Kind' for any result kind except Incompatible. Sema::AssignConvertType Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, CastKind &Kind, bool ConvertRHS) { QualType RHSType = RHS.get()->getType(); QualType OrigLHSType = LHSType; // Get canonical types. We're not formatting these types, just comparing // them. LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType(); RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType(); // Common case: no conversion required. if (LHSType == RHSType) { Kind = CK_NoOp; return Compatible; } // If we have an atomic type, try a non-atomic assignment, then just add an // atomic qualification step. if (const AtomicType *AtomicTy = dyn_cast(LHSType)) { Sema::AssignConvertType result = CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind); if (result != Compatible) return result; if (Kind != CK_NoOp && ConvertRHS) RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind); Kind = CK_NonAtomicToAtomic; return Compatible; } // If the left-hand side is a reference type, then we are in a // (rare!) case where we've allowed the use of references in C, // e.g., as a parameter type in a built-in function. In this case, // just make sure that the type referenced is compatible with the // right-hand side type. The caller is responsible for adjusting // LHSType so that the resulting expression does not have reference // type. if (const ReferenceType *LHSTypeRef = LHSType->getAs()) { if (Context.typesAreCompatible(LHSTypeRef->getPointeeType(), RHSType)) { Kind = CK_LValueBitCast; return Compatible; } return Incompatible; } // Allow scalar to ExtVector assignments, and assignments of an ExtVector type // to the same ExtVector type. if (LHSType->isExtVectorType()) { if (RHSType->isExtVectorType()) return Incompatible; if (RHSType->isArithmeticType()) { // CK_VectorSplat does T -> vector T, so first cast to the element type. if (ConvertRHS) RHS = prepareVectorSplat(LHSType, RHS.get()); Kind = CK_VectorSplat; return Compatible; } } // Conversions to or from vector type. if (LHSType->isVectorType() || RHSType->isVectorType()) { if (LHSType->isVectorType() && RHSType->isVectorType()) { // Allow assignments of an AltiVec vector type to an equivalent GCC // vector type and vice versa if (Context.areCompatibleVectorTypes(LHSType, RHSType)) { Kind = CK_BitCast; return Compatible; } // If we are allowing lax vector conversions, and LHS and RHS are both // vectors, the total size only needs to be the same. This is a bitcast; // no bits are changed but the result type is different. if (isLaxVectorConversion(RHSType, LHSType)) { Kind = CK_BitCast; return IncompatibleVectors; } } return Incompatible; } // Arithmetic conversions. if (LHSType->isArithmeticType() && RHSType->isArithmeticType() && !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) { if (ConvertRHS) Kind = PrepareScalarCast(RHS, LHSType); return Compatible; } // Conversions to normal pointers. if (const PointerType *LHSPointer = dyn_cast(LHSType)) { // U* -> T* if (isa(RHSType)) { unsigned AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); unsigned AddrSpaceR = RHSType->getPointeeType().getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkPointerTypesForAssignment(*this, LHSType, RHSType); } // int -> T* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null? return IntToPointer; } // C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { // - conversions to void* if (LHSPointer->getPointeeType()->isVoidType()) { Kind = CK_BitCast; return Compatible; } // - conversions from 'Class' to the redefinition type if (RHSType->isObjCClassType() && Context.hasSameType(LHSType, Context.getObjCClassRedefinitionType())) { Kind = CK_BitCast; return Compatible; } Kind = CK_BitCast; return IncompatiblePointer; } // U^ -> void* if (RHSType->getAs()) { if (LHSPointer->getPointeeType()->isVoidType()) { Kind = CK_BitCast; return Compatible; } } return Incompatible; } // Conversions to block pointers. if (isa(LHSType)) { // U^ -> T^ if (RHSType->isBlockPointerType()) { Kind = CK_BitCast; return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType); } // int or null -> T^ if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToBlockPointer; } // id -> T^ if (getLangOpts().ObjC1 && RHSType->isObjCIdType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } // void* -> T^ if (const PointerType *RHSPT = RHSType->getAs()) if (RHSPT->getPointeeType()->isVoidType()) { Kind = CK_AnyPointerToBlockPointerCast; return Compatible; } return Incompatible; } // Conversions to Objective-C pointers. if (isa(LHSType)) { // A* -> B* if (RHSType->isObjCObjectPointerType()) { Kind = CK_BitCast; Sema::AssignConvertType result = checkObjCPointerTypesForAssignment(*this, LHSType, RHSType); if (getLangOpts().ObjCAutoRefCount && result == Compatible && !CheckObjCARCUnavailableWeakConversion(OrigLHSType, RHSType)) result = IncompatibleObjCWeakRef; return result; } // int or null -> A* if (RHSType->isIntegerType()) { Kind = CK_IntegralToPointer; // FIXME: null return IntToPointer; } // In general, C pointers are not compatible with ObjC object pointers, // with two exceptions: if (isa(RHSType)) { Kind = CK_CPointerToObjCPointerCast; // - conversions from 'void*' if (RHSType->isVoidPointerType()) { return Compatible; } // - conversions to 'Class' from its redefinition type if (LHSType->isObjCClassType() && Context.hasSameType(RHSType, Context.getObjCClassRedefinitionType())) { return Compatible; } return IncompatiblePointer; } // Only under strict condition T^ is compatible with an Objective-C pointer. if (RHSType->isBlockPointerType() && LHSType->isBlockCompatibleObjCPointerType(Context)) { if (ConvertRHS) maybeExtendBlockObject(RHS); Kind = CK_BlockPointerToObjCPointerCast; return Compatible; } return Incompatible; } // Conversions from pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // Conversions from Objective-C pointers that are not covered by the above. if (isa(RHSType)) { // T* -> _Bool if (LHSType == Context.BoolTy) { Kind = CK_PointerToBoolean; return Compatible; } // T* -> int if (LHSType->isIntegerType()) { Kind = CK_PointerToIntegral; return PointerToInt; } return Incompatible; } // struct A -> struct B if (isa(LHSType) && isa(RHSType)) { if (Context.typesAreCompatible(LHSType, RHSType)) { Kind = CK_NoOp; return Compatible; } } return Incompatible; } /// \brief Constructs a transparent union from an expression that is /// used to initialize the transparent union. static void ConstructTransparentUnion(Sema &S, ASTContext &C, ExprResult &EResult, QualType UnionType, FieldDecl *Field) { // Build an initializer list that designates the appropriate member // of the transparent union. Expr *E = EResult.get(); InitListExpr *Initializer = new (C) InitListExpr(C, SourceLocation(), E, SourceLocation()); Initializer->setType(UnionType); Initializer->setInitializedFieldInUnion(Field); // Build a compound literal constructing a value of the transparent // union type from this initializer list. TypeSourceInfo *unionTInfo = C.getTrivialTypeSourceInfo(UnionType); EResult = new (C) CompoundLiteralExpr(SourceLocation(), unionTInfo, UnionType, VK_RValue, Initializer, false); } Sema::AssignConvertType Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType, ExprResult &RHS) { QualType RHSType = RHS.get()->getType(); // If the ArgType is a Union type, we want to handle a potential // transparent_union GCC extension. const RecordType *UT = ArgType->getAsUnionType(); if (!UT || !UT->getDecl()->hasAttr()) return Incompatible; // The field to initialize within the transparent union. RecordDecl *UD = UT->getDecl(); FieldDecl *InitField = nullptr; // It's compatible if the expression matches any of the fields. for (auto *it : UD->fields()) { if (it->getType()->isPointerType()) { // If the transparent union contains a pointer type, we allow: // 1) void pointer // 2) null pointer constant if (RHSType->isPointerType()) if (RHSType->castAs()->getPointeeType()->isVoidType()) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_BitCast); InitField = it; break; } if (RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_NullToPointer); InitField = it; break; } } CastKind Kind = CK_Invalid; if (CheckAssignmentConstraints(it->getType(), RHS, Kind) == Compatible) { RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind); InitField = it; break; } } if (!InitField) return Incompatible; ConstructTransparentUnion(*this, Context, RHS, ArgType, InitField); return Compatible; } Sema::AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS, bool Diagnose, bool DiagnoseCFAudited, bool ConvertRHS) { // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly, // we can't avoid *all* modifications at the moment, so we need some somewhere // to put the updated value. ExprResult LocalRHS = CallerRHS; ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS; if (getLangOpts().CPlusPlus) { if (!LHSType->isRecordType() && !LHSType->isAtomicType()) { // C++ 5.17p3: If the left operand is not of class type, the // expression is implicitly converted (C++ 4) to the // cv-unqualified type of the left operand. ExprResult Res; if (Diagnose) { Res = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), AA_Assigning); } else { ImplicitConversionSequence ICS = TryImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), /*SuppressUserConversions=*/false, /*AllowExplicit=*/false, /*InOverloadResolution=*/false, /*CStyle=*/false, /*AllowObjCWritebackConversion=*/false); if (ICS.isFailure()) return Incompatible; Res = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(), ICS, AA_Assigning); } if (Res.isInvalid()) return Incompatible; Sema::AssignConvertType result = Compatible; if (getLangOpts().ObjCAutoRefCount && !CheckObjCARCUnavailableWeakConversion(LHSType, RHS.get()->getType())) result = IncompatibleObjCWeakRef; RHS = Res; return result; } // FIXME: Currently, we fall through and treat C++ classes like C // structures. // FIXME: We also fall through for atomics; not sure what should // happen there, though. } else if (RHS.get()->getType() == Context.OverloadTy) { // As a set of extensions to C, we support overloading on functions. These // functions need to be resolved here. DeclAccessPair DAP; if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction( RHS.get(), LHSType, /*Complain=*/false, DAP)) RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD); else return Incompatible; } // C99 6.5.16.1p1: the left operand is a pointer and the right is // a null pointer constant. if ((LHSType->isPointerType() || LHSType->isObjCObjectPointerType() || LHSType->isBlockPointerType()) && RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (Diagnose || ConvertRHS) { CastKind Kind; CXXCastPath Path; CheckPointerConversion(RHS.get(), LHSType, Kind, Path, /*IgnoreBaseAccess=*/false, Diagnose); if (ConvertRHS) RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_RValue, &Path); } return Compatible; } // This check seems unnatural, however it is necessary to ensure the proper // conversion of functions/arrays. If the conversion were done for all // DeclExpr's (created by ActOnIdExpression), it would mess up the unary // expressions that suppress this implicit conversion (&, sizeof). // // Suppress this for references: C++ 8.5.3p5. if (!LHSType->isReferenceType()) { // FIXME: We potentially allocate here even if ConvertRHS is false. RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose); if (RHS.isInvalid()) return Incompatible; } Expr *PRE = RHS.get()->IgnoreParenCasts(); if (Diagnose && isa(PRE)) { ObjCProtocolDecl *PDecl = cast(PRE)->getProtocol(); if (PDecl && !PDecl->hasDefinition()) { Diag(PRE->getExprLoc(), diag::warn_atprotocol_protocol) << PDecl->getName(); Diag(PDecl->getLocation(), diag::note_entity_declared_at) << PDecl; } } CastKind Kind = CK_Invalid; Sema::AssignConvertType result = CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS); // C99 6.5.16.1p2: The value of the right operand is converted to the // type of the assignment expression. // CheckAssignmentConstraints allows the left-hand side to be a reference, // so that we can use references in built-in functions even in C. // The getNonReferenceType() call makes sure that the resulting expression // does not have reference type. if (result != Incompatible && RHS.get()->getType() != LHSType) { QualType Ty = LHSType.getNonLValueExprType(Context); Expr *E = RHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCARCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion, Diagnose, DiagnoseCFAudited); if (getLangOpts().ObjC1 && (CheckObjCBridgeRelatedConversions(E->getLocStart(), LHSType, E->getType(), E, Diagnose) || ConversionToObjCStringLiteralCheck(LHSType, E, Diagnose))) { RHS = E; return Compatible; } if (ConvertRHS) RHS = ImpCastExprToType(E, Ty, Kind); } return result; } QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { Diag(Loc, diag::err_typecheck_invalid_operands) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } /// Try to convert a value of non-vector type to a vector type by converting /// the type to the element type of the vector and then performing a splat. /// If the language is OpenCL, we only use conversions that promote scalar /// rank; for C, Obj-C, and C++ we allow any real scalar conversion except /// for float->int. /// /// \param scalar - if non-null, actually perform the conversions /// \return true if the operation fails (but without diagnosing the failure) static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar, QualType scalarTy, QualType vectorEltTy, QualType vectorTy) { // The conversion to apply to the scalar before splatting it, // if necessary. CastKind scalarCast = CK_Invalid; if (vectorEltTy->isIntegralType(S.Context)) { if (!scalarTy->isIntegralType(S.Context)) return true; if (S.getLangOpts().OpenCL && S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0) return true; scalarCast = CK_IntegralCast; } else if (vectorEltTy->isRealFloatingType()) { if (scalarTy->isRealFloatingType()) { if (S.getLangOpts().OpenCL && S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) return true; scalarCast = CK_FloatingCast; } else if (scalarTy->isIntegralType(S.Context)) scalarCast = CK_IntegralToFloating; else return true; } else { return true; } // Adjust scalar if desired. if (scalar) { if (scalarCast != CK_Invalid) *scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast); *scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat); } return false; } QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool AllowBothBool, bool AllowBoolConversions) { if (!IsCompAssign) { LHS = DefaultFunctionArrayLvalueConversion(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); // For conversion purposes, we ignore any qualifiers. // For example, "const float" and "float" are equivalent. QualType LHSType = LHS.get()->getType().getUnqualifiedType(); QualType RHSType = RHS.get()->getType().getUnqualifiedType(); const VectorType *LHSVecType = LHSType->getAs(); const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType && RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); // If the vector types are identical, return. if (Context.hasSameType(LHSType, RHSType)) return LHSType; // If we have compatible AltiVec and GCC vector types, use the AltiVec type. if (LHSVecType && RHSVecType && Context.areCompatibleVectorTypes(LHSType, RHSType)) { if (isa(LHSVecType)) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } // AllowBoolConversions says that bool and non-bool AltiVec vectors // can be mixed, with the result being the non-bool type. The non-bool // operand must have integer element type. if (AllowBoolConversions && LHSVecType && RHSVecType && LHSVecType->getNumElements() == RHSVecType->getNumElements() && (Context.getTypeSize(LHSVecType->getElementType()) == Context.getTypeSize(RHSVecType->getElementType()))) { if (LHSVecType->getVectorKind() == VectorType::AltiVecVector && LHSVecType->getElementType()->isIntegerType() && RHSVecType->getVectorKind() == VectorType::AltiVecBool) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return LHSType; } if (!IsCompAssign && LHSVecType->getVectorKind() == VectorType::AltiVecBool && RHSVecType->getVectorKind() == VectorType::AltiVecVector && RHSVecType->getElementType()->isIntegerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); return RHSType; } } // If there's an ext-vector type and a scalar, try to convert the scalar to // the vector element type and splat. if (!RHSVecType && isa(LHSVecType)) { if (!tryVectorConvertAndSplat(*this, &RHS, RHSType, LHSVecType->getElementType(), LHSType)) return LHSType; } if (!LHSVecType && isa(RHSVecType)) { if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS), LHSType, RHSVecType->getElementType(), RHSType)) return RHSType; } // If we're allowing lax vector conversions, only the total (data) size // needs to be the same. // FIXME: Should we really be allowing this? // FIXME: We really just pick the LHS type arbitrarily? if (isLaxVectorConversion(RHSType, LHSType)) { QualType resultType = LHSType; RHS = ImpCastExprToType(RHS.get(), resultType, CK_BitCast); return resultType; } // Okay, the expression is invalid. // If there's a non-vector, non-real operand, diagnose that. if ((!RHSVecType && !RHSType->isRealType()) || (!LHSVecType && !LHSType->isRealType())) { Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // OpenCL V1.1 6.2.6.p1: // If the operands are of more than one vector type, then an error shall // occur. Implicit conversions between vector types are not permitted, per // section 6.2.1. if (getLangOpts().OpenCL && RHSVecType && isa(RHSVecType) && LHSVecType && isa(LHSVecType)) { Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType << RHSType; return QualType(); } // Otherwise, use the generic diagnostic. Diag(Loc, diag::err_typecheck_vector_not_convertable) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } // checkArithmeticNull - Detect when a NULL constant is used improperly in an // expression. These are mainly cases where the null pointer is used as an // integer instead of a pointer. static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompare) { // The canonical way to check for a GNU null is with isNullPointerConstant, // but we use a bit of a hack here for speed; this is a relatively // hot path, and isNullPointerConstant is slow. bool LHSNull = isa(LHS.get()->IgnoreParenImpCasts()); bool RHSNull = isa(RHS.get()->IgnoreParenImpCasts()); QualType NonNullType = LHSNull ? RHS.get()->getType() : LHS.get()->getType(); // Avoid analyzing cases where the result will either be invalid (and // diagnosed as such) or entirely valid and not something to warn about. if ((!LHSNull && !RHSNull) || NonNullType->isBlockPointerType() || NonNullType->isMemberPointerType() || NonNullType->isFunctionType()) return; // Comparison operations would not make sense with a null pointer no matter // what the other expression is. if (!IsCompare) { S.Diag(Loc, diag::warn_null_in_arithmetic_operation) << (LHSNull ? LHS.get()->getSourceRange() : SourceRange()) << (RHSNull ? RHS.get()->getSourceRange() : SourceRange()); return; } // The rest of the operations only make sense with a null pointer // if the other expression is a pointer. if (LHSNull == RHSNull || NonNullType->isAnyPointerType() || NonNullType->canDecayToPointerType()) return; S.Diag(Loc, diag::warn_null_in_comparison_operation) << LHSNull /* LHS is NULL */ << NonNullType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsDiv) { // Check for division/remainder by zero. llvm::APSInt RHSValue; if (!RHS.get()->isValueDependent() && RHS.get()->EvaluateAsInt(RHSValue, S.Context) && RHSValue == 0) S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_remainder_division_by_zero) << IsDiv << RHS.get()->getSourceRange()); } QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool IsDiv) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isArithmeticType()) return InvalidOperands(Loc, LHS, RHS); if (IsDiv) DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv); return compType; } QualType Sema::CheckRemainderOperands( ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/false); return InvalidOperands(Loc, LHS, RHS); } QualType compType = UsualArithmeticConversions(LHS, RHS, IsCompAssign); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); if (compType.isNull() || !compType->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, false /* IsDiv */); return compType; } /// \brief Diagnose invalid arithmetic on two void pointers. static void diagnoseArithmeticOnTwoVoidPointers(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 1 /* two pointers */ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// \brief Diagnose invalid arithmetic on a void pointer. static void diagnoseArithmeticOnVoidPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_void_type : diag::ext_gnu_void_ptr) << 0 /* one pointer */ << Pointer->getSourceRange(); } /// \brief Diagnose invalid arithmetic on two function pointers. static void diagnoseArithmeticOnTwoFunctionPointers(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { assert(LHS->getType()->isAnyPointerType()); assert(RHS->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 1 /* two pointers */ << LHS->getType()->getPointeeType() // We only show the second type if it differs from the first. << (unsigned)!S.Context.hasSameUnqualifiedType(LHS->getType(), RHS->getType()) << RHS->getType()->getPointeeType() << LHS->getSourceRange() << RHS->getSourceRange(); } /// \brief Diagnose invalid arithmetic on a function pointer. static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc, Expr *Pointer) { assert(Pointer->getType()->isAnyPointerType()); S.Diag(Loc, S.getLangOpts().CPlusPlus ? diag::err_typecheck_pointer_arith_function_type : diag::ext_gnu_ptr_func_arith) << 0 /* one pointer */ << Pointer->getType()->getPointeeType() << 0 /* one pointer, so only one type */ << Pointer->getSourceRange(); } /// \brief Emit error if Operand is incomplete pointer type /// /// \returns True if pointer has incomplete type static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(ResType->isAnyPointerType() && !ResType->isDependentType()); QualType PointeeTy = ResType->getPointeeType(); return S.RequireCompleteType(Loc, PointeeTy, diag::err_typecheck_arithmetic_incomplete_type, PointeeTy, Operand->getSourceRange()); } /// \brief Check the validity of an arithmetic pointer operand. /// /// If the operand has pointer type, this code will check for pointer types /// which are invalid in arithmetic operations. These will be diagnosed /// appropriately, including whether or not the use is supported as an /// extension. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticOpPointerOperand(Sema &S, SourceLocation Loc, Expr *Operand) { QualType ResType = Operand->getType(); if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); if (!ResType->isAnyPointerType()) return true; QualType PointeeTy = ResType->getPointeeType(); if (PointeeTy->isVoidType()) { diagnoseArithmeticOnVoidPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (PointeeTy->isFunctionType()) { diagnoseArithmeticOnFunctionPointer(S, Loc, Operand); return !S.getLangOpts().CPlusPlus; } if (checkArithmeticIncompletePointerType(S, Loc, Operand)) return false; return true; } /// \brief Check the validity of a binary arithmetic operation w.r.t. pointer /// operands. /// /// This routine will diagnose any invalid arithmetic on pointer operands much /// like \see checkArithmeticOpPointerOperand. However, it has special logic /// for emitting a single diagnostic even for operations where both LHS and RHS /// are (potentially problematic) pointers. /// /// \returns True when the operand is valid to use (even if as an extension). static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { bool isLHSPointer = LHSExpr->getType()->isAnyPointerType(); bool isRHSPointer = RHSExpr->getType()->isAnyPointerType(); if (!isLHSPointer && !isRHSPointer) return true; QualType LHSPointeeTy, RHSPointeeTy; if (isLHSPointer) LHSPointeeTy = LHSExpr->getType()->getPointeeType(); if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType(); // if both are pointers check if operation is valid wrt address spaces if (S.getLangOpts().OpenCL && isLHSPointer && isRHSPointer) { const PointerType *lhsPtr = LHSExpr->getType()->getAs(); const PointerType *rhsPtr = RHSExpr->getType()->getAs(); if (!lhsPtr->isAddressSpaceOverlapping(*rhsPtr)) { S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSExpr->getType() << RHSExpr->getType() << 1 /*arithmetic op*/ << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); return false; } } // Check for arithmetic on pointers to incomplete types. bool isLHSVoidPtr = isLHSPointer && LHSPointeeTy->isVoidType(); bool isRHSVoidPtr = isRHSPointer && RHSPointeeTy->isVoidType(); if (isLHSVoidPtr || isRHSVoidPtr) { if (!isRHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, LHSExpr); else if (!isLHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoVoidPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } bool isLHSFuncPtr = isLHSPointer && LHSPointeeTy->isFunctionType(); bool isRHSFuncPtr = isRHSPointer && RHSPointeeTy->isFunctionType(); if (isLHSFuncPtr || isRHSFuncPtr) { if (!isRHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, LHSExpr); else if (!isLHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, RHSExpr); else diagnoseArithmeticOnTwoFunctionPointers(S, Loc, LHSExpr, RHSExpr); return !S.getLangOpts().CPlusPlus; } if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr)) return false; if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr)) return false; return true; } /// diagnoseStringPlusInt - Emit a warning when adding an integer to a string /// literal. static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { StringLiteral* StrExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); Expr* IndexExpr = RHSExpr; if (!StrExpr) { StrExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); IndexExpr = LHSExpr; } bool IsStringPlusInt = StrExpr && IndexExpr->getType()->isIntegralOrUnscopedEnumerationType(); if (!IsStringPlusInt || IndexExpr->isValueDependent()) return; llvm::APSInt index; if (IndexExpr->EvaluateAsInt(index, Self.getASTContext())) { unsigned StrLenWithNull = StrExpr->getLength() + 1; if (index.isNonNegative() && index <= llvm::APSInt(llvm::APInt(index.getBitWidth(), StrLenWithNull), index.isUnsigned())) return; } SourceRange DiagRange(LHSExpr->getLocStart(), RHSExpr->getLocEnd()); Self.Diag(OpLoc, diag::warn_string_plus_int) << DiagRange << IndexExpr->IgnoreImpCasts()->getType(); // Only print a fixit for "str" + int, not for int + "str". if (IndexExpr == RHSExpr) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } /// \brief Emit a warning when adding a char literal to a string. static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { const Expr *StringRefExpr = LHSExpr; const CharacterLiteral *CharExpr = dyn_cast(RHSExpr->IgnoreImpCasts()); if (!CharExpr) { CharExpr = dyn_cast(LHSExpr->IgnoreImpCasts()); StringRefExpr = RHSExpr; } if (!CharExpr || !StringRefExpr) return; const QualType StringType = StringRefExpr->getType(); // Return if not a PointerType. if (!StringType->isAnyPointerType()) return; // Return if not a CharacterType. if (!StringType->getPointeeType()->isAnyCharacterType()) return; ASTContext &Ctx = Self.getASTContext(); SourceRange DiagRange(LHSExpr->getLocStart(), RHSExpr->getLocEnd()); const QualType CharType = CharExpr->getType(); if (!CharType->isAnyCharacterType() && CharType->isIntegerType() && llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << Ctx.CharTy; } else { Self.Diag(OpLoc, diag::warn_string_plus_char) << DiagRange << CharExpr->getType(); } // Only print a fixit for str + char, not for char + str. if (isa(RHSExpr->IgnoreImpCasts())) { SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd()); Self.Diag(OpLoc, diag::note_string_plus_scalar_silence) << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&") << FixItHint::CreateReplacement(SourceRange(OpLoc), "[") << FixItHint::CreateInsertion(EndLoc, "]"); } else { Self.Diag(OpLoc, diag::note_string_plus_scalar_silence); } } /// \brief Emit error when two pointers are incompatible. static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc, Expr *LHSExpr, Expr *RHSExpr) { assert(LHSExpr->getType()->isAnyPointerType()); assert(RHSExpr->getType()->isAnyPointerType()); S.Diag(Loc, diag::err_typecheck_sub_ptr_compatible) << LHSExpr->getType() << RHSExpr->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } // C99 6.5.6 QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands( LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/getLangOpts().ZVector); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Diagnose "string literal" '+' int and string '+' "char literal". if (Opc == BO_Add) { diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get()); diagnoseStringPlusChar(*this, Loc, LHS.get(), RHS.get()); } // handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Type-checking. Ultimately the pointer's going to be in PExp; // note that we bias towards the LHS being the pointer. Expr *PExp = LHS.get(), *IExp = RHS.get(); bool isObjCPointer; if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { std::swap(PExp, IExp); if (PExp->getType()->isPointerType()) { isObjCPointer = false; } else if (PExp->getType()->isObjCObjectPointerType()) { isObjCPointer = true; } else { return InvalidOperands(Loc, LHS, RHS); } } assert(PExp->getType()->isAnyPointerType()); if (!IExp->getType()->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); if (!checkArithmeticOpPointerOperand(*this, Loc, PExp)) return QualType(); if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp)) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(PExp, IExp); if (CompLHSTy) { QualType LHSTy = Context.isPromotableBitField(LHS.get()); if (LHSTy.isNull()) { LHSTy = LHS.get()->getType(); if (LHSTy->isPromotableIntegerType()) LHSTy = Context.getPromotedIntegerType(LHSTy); } *CompLHSTy = LHSTy; } return PExp->getType(); } // C99 6.5.6 QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, QualType* CompLHSTy) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { QualType compType = CheckVectorOperands( LHS, RHS, Loc, CompLHSTy, /*AllowBothBool*/getLangOpts().AltiVec, /*AllowBoolConversions*/getLangOpts().ZVector); if (CompLHSTy) *CompLHSTy = compType; return compType; } QualType compType = UsualArithmeticConversions(LHS, RHS, CompLHSTy); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // Enforce type constraints: C99 6.5.6p3. // Handle the common case first (both operands are arithmetic). if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } // Either ptr - int or ptr - ptr. if (LHS.get()->getType()->isAnyPointerType()) { QualType lpointee = LHS.get()->getType()->getPointeeType(); // Diagnose bad cases where we step over interface counts. if (LHS.get()->getType()->isObjCObjectPointerType() && checkArithmeticOnObjCPointer(*this, Loc, LHS.get())) return QualType(); // The result type of a pointer-int computation is the pointer type. if (RHS.get()->getType()->isIntegerType()) { if (!checkArithmeticOpPointerOperand(*this, Loc, LHS.get())) return QualType(); // Check array bounds for pointer arithemtic CheckArrayAccess(LHS.get(), RHS.get(), /*ArraySubscriptExpr*/nullptr, /*AllowOnePastEnd*/true, /*IndexNegated*/true); if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return LHS.get()->getType(); } // Handle pointer-pointer subtractions. if (const PointerType *RHSPTy = RHS.get()->getType()->getAs()) { QualType rpointee = RHSPTy->getPointeeType(); if (getLangOpts().CPlusPlus) { // Pointee types must be the same: C++ [expr.add] if (!Context.hasSameUnqualifiedType(lpointee, rpointee)) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); } } else { // Pointee types must be compatible C99 6.5.6p3 if (!Context.typesAreCompatible( Context.getCanonicalType(lpointee).getUnqualifiedType(), Context.getCanonicalType(rpointee).getUnqualifiedType())) { diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get()); return QualType(); } } if (!checkArithmeticBinOpPointerOperands(*this, Loc, LHS.get(), RHS.get())) return QualType(); // The pointee type may have zero size. As an extension, a structure or // union may have zero size or an array may have zero length. In this // case subtraction does not make sense. if (!rpointee->isVoidType() && !rpointee->isFunctionType()) { CharUnits ElementSize = Context.getTypeSizeInChars(rpointee); if (ElementSize.isZero()) { Diag(Loc,diag::warn_sub_ptr_zero_size_types) << rpointee.getUnqualifiedType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } if (CompLHSTy) *CompLHSTy = LHS.get()->getType(); return Context.getPointerDiffType(); } } return InvalidOperands(Loc, LHS, RHS); } static bool isScopedEnumerationType(QualType T) { if (const EnumType *ET = T->getAs()) return ET->getDecl()->isScoped(); return false; } static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType LHSType) { // OpenCL 6.3j: shift values are effectively % word size of LHS (more defined), // so skip remaining warnings as we don't want to modify values within Sema. if (S.getLangOpts().OpenCL) return; llvm::APSInt Right; // Check right/shifter operand if (RHS.get()->isValueDependent() || !RHS.get()->EvaluateAsInt(Right, S.Context)) return; if (Right.isNegative()) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_negative) << RHS.get()->getSourceRange()); return; } llvm::APInt LeftBits(Right.getBitWidth(), S.Context.getTypeSize(LHS.get()->getType())); if (Right.uge(LeftBits)) { S.DiagRuntimeBehavior(Loc, RHS.get(), S.PDiag(diag::warn_shift_gt_typewidth) << RHS.get()->getSourceRange()); return; } if (Opc != BO_Shl) return; // When left shifting an ICE which is signed, we can check for overflow which // according to C++ has undefined behavior ([expr.shift] 5.8/2). Unsigned // integers have defined behavior modulo one more than the maximum value // representable in the result type, so never warn for those. llvm::APSInt Left; if (LHS.get()->isValueDependent() || LHSType->hasUnsignedIntegerRepresentation() || !LHS.get()->EvaluateAsInt(Left, S.Context)) return; // If LHS does not have a signed type and non-negative value // then, the behavior is undefined. Warn about it. if (Left.isNegative()) { S.DiagRuntimeBehavior(Loc, LHS.get(), S.PDiag(diag::warn_shift_lhs_negative) << LHS.get()->getSourceRange()); return; } llvm::APInt ResultBits = static_cast(Right) + Left.getMinSignedBits(); if (LeftBits.uge(ResultBits)) return; llvm::APSInt Result = Left.extend(ResultBits.getLimitedValue()); Result = Result.shl(Right); // Print the bit representation of the signed integer as an unsigned // hexadecimal number. SmallString<40> HexResult; Result.toString(HexResult, 16, /*Signed =*/false, /*Literal =*/true); // If we are only missing a sign bit, this is less likely to result in actual // bugs -- if the result is cast back to an unsigned type, it will have the // expected value. Thus we place this behind a different warning that can be // turned off separately if needed. if (LeftBits == ResultBits - 1) { S.Diag(Loc, diag::warn_shift_result_sets_sign_bit) << HexResult << LHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return; } S.Diag(Loc, diag::warn_shift_result_gt_typewidth) << HexResult.str() << Result.getMinSignedBits() << LHSType << Left.getBitWidth() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// \brief Return the resulting type when an OpenCL vector is shifted /// by a scalar or vector shift amount. static QualType checkOpenCLVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { // OpenCL v1.1 s6.3.j says RHS can be a vector only if LHS is a vector. if (!LHS.get()->getType()->isVectorType()) { S.Diag(Loc, diag::err_shift_rhs_only_vector) << RHS.get()->getType() << LHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } if (!IsCompAssign) { LHS = S.UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); } RHS = S.UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); const VectorType *LHSVecTy = LHSType->castAs(); QualType LHSEleType = LHSVecTy->getElementType(); // Note that RHS might not be a vector. QualType RHSType = RHS.get()->getType(); const VectorType *RHSVecTy = RHSType->getAs(); QualType RHSEleType = RHSVecTy ? RHSVecTy->getElementType() : RHSType; // OpenCL v1.1 s6.3.j says that the operands need to be integers. if (!LHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << LHS.get()->getType() << LHS.get()->getSourceRange(); return QualType(); } if (!RHSEleType->isIntegerType()) { S.Diag(Loc, diag::err_typecheck_expect_int) << RHS.get()->getType() << RHS.get()->getSourceRange(); return QualType(); } if (RHSVecTy) { // OpenCL v1.1 s6.3.j says that for vector types, the operators // are applied component-wise. So if RHS is a vector, then ensure // that the number of elements is the same as LHS... if (RHSVecTy->getNumElements() != LHSVecTy->getNumElements()) { S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); return QualType(); } } else { // ...else expand RHS to match the number of elements in LHS. QualType VecTy = S.Context.getExtVectorType(RHSEleType, LHSVecTy->getNumElements()); RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat); } return LHSType; } // C99 6.5.7 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); // Vector shifts promote their scalar inputs to vector type. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LangOpts.OpenCL) return checkOpenCLVectorShift(*this, LHS, RHS, Loc, IsCompAssign); if (LangOpts.ZVector) { // The shift operators for the z vector extensions work basically // like OpenCL shifts, except that neither the LHS nor the RHS is // allowed to be a "vector bool". if (auto LHSVecType = LHS.get()->getType()->getAs()) if (LHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); if (auto RHSVecType = RHS.get()->getType()->getAs()) if (RHSVecType->getVectorKind() == VectorType::AltiVecBool) return InvalidOperands(Loc, LHS, RHS); return checkOpenCLVectorShift(*this, LHS, RHS, Loc, IsCompAssign); } return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/true, /*AllowBoolConversions*/false); } // Shifts don't perform usual arithmetic conversions, they just do integer // promotions on each operand. C99 6.5.7p3 // For the LHS, do usual unary conversions, but then reset them away // if this is a compound assignment. ExprResult OldLHS = LHS; LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); QualType LHSType = LHS.get()->getType(); if (IsCompAssign) LHS = OldLHS; // The RHS is simpler. RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); QualType RHSType = RHS.get()->getType(); // C99 6.5.7p2: Each of the operands shall have integer type. if (!LHSType->hasIntegerRepresentation() || !RHSType->hasIntegerRepresentation()) return InvalidOperands(Loc, LHS, RHS); // C++0x: Don't allow scoped enums. FIXME: Use something better than // hasIntegerRepresentation() above instead of this. if (isScopedEnumerationType(LHSType) || isScopedEnumerationType(RHSType)) { return InvalidOperands(Loc, LHS, RHS); } // Sanity-check shift operands DiagnoseBadShiftValues(*this, LHS, RHS, Loc, Opc, LHSType); // "The type of the result is that of the promoted left operand." return LHSType; } static bool IsWithinTemplateSpecialization(Decl *D) { if (DeclContext *DC = D->getDeclContext()) { if (isa(DC)) return true; if (FunctionDecl *FD = dyn_cast(DC)) return FD->isFunctionTemplateSpecialization(); } return false; } /// If two different enums are compared, raise a warning. static void checkEnumComparison(Sema &S, SourceLocation Loc, Expr *LHS, Expr *RHS) { QualType LHSStrippedType = LHS->IgnoreParenImpCasts()->getType(); QualType RHSStrippedType = RHS->IgnoreParenImpCasts()->getType(); const EnumType *LHSEnumType = LHSStrippedType->getAs(); if (!LHSEnumType) return; const EnumType *RHSEnumType = RHSStrippedType->getAs(); if (!RHSEnumType) return; // Ignore anonymous enums. if (!LHSEnumType->getDecl()->getIdentifier()) return; if (!RHSEnumType->getDecl()->getIdentifier()) return; if (S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType)) return; S.Diag(Loc, diag::warn_comparison_of_mixed_enum_types) << LHSStrippedType << RHSStrippedType << LHS->getSourceRange() << RHS->getSourceRange(); } /// \brief Diagnose bad pointer comparisons. static void diagnoseDistinctPointerComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_distinct_pointers : diag::ext_typecheck_comparison_of_distinct_pointers) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } /// \brief Returns false if the pointers are converted to a composite type, /// true otherwise. static bool convertPointersToCompositeType(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS) { // C++ [expr.rel]p2: // [...] Pointer conversions (4.10) and qualification // conversions (4.4) are performed on pointer operands (or on // a pointer operand and a null pointer constant) to bring // them to their composite pointer type. [...] // // C++ [expr.eq]p1 uses the same notion for (in)equality // comparisons of pointers. // C++ [expr.eq]p2: // In addition, pointers to members can be compared, or a pointer to // member and a null pointer constant. Pointer to member conversions // (4.11) and qualification conversions (4.4) are performed to bring // them to a common type. If one operand is a null pointer constant, // the common type is the type of the other operand. Otherwise, the // common type is a pointer to member type similar (4.4) to the type // of one of the operands, with a cv-qualification signature (4.4) // that is the union of the cv-qualification signatures of the operand // types. QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); assert((LHSType->isPointerType() && RHSType->isPointerType()) || (LHSType->isMemberPointerType() && RHSType->isMemberPointerType())); bool NonStandardCompositeType = false; bool *BoolPtr = S.isSFINAEContext() ? nullptr : &NonStandardCompositeType; QualType T = S.FindCompositePointerType(Loc, LHS, RHS, BoolPtr); if (T.isNull()) { diagnoseDistinctPointerComparison(S, Loc, LHS, RHS, /*isError*/true); return true; } if (NonStandardCompositeType) S.Diag(Loc, diag::ext_typecheck_comparison_of_distinct_pointers_nonstandard) << LHSType << RHSType << T << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); LHS = S.ImpCastExprToType(LHS.get(), T, CK_BitCast); RHS = S.ImpCastExprToType(RHS.get(), T, CK_BitCast); return false; } static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, bool IsError) { S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_fptr_to_void : diag::ext_typecheck_comparison_of_fptr_to_void) << LHS.get()->getType() << RHS.get()->getType() << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } static bool isObjCObjectLiteral(ExprResult &E) { switch (E.get()->IgnoreParenImpCasts()->getStmtClass()) { case Stmt::ObjCArrayLiteralClass: case Stmt::ObjCDictionaryLiteralClass: case Stmt::ObjCStringLiteralClass: case Stmt::ObjCBoxedExprClass: return true; default: // Note that ObjCBoolLiteral is NOT an object literal! return false; } } static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { const ObjCObjectPointerType *Type = LHS->getType()->getAs(); // If this is not actually an Objective-C object, bail out. if (!Type) return false; // Get the LHS object's interface type. QualType InterfaceType = Type->getPointeeType(); // If the RHS isn't an Objective-C object, bail out. if (!RHS->getType()->isObjCObjectPointerType()) return false; // Try to find the -isEqual: method. Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector(); ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel, InterfaceType, /*instance=*/true); if (!Method) { if (Type->isObjCIdType()) { // For 'id', just check the global pool. Method = S.LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(), /*receiverId=*/true); } else { // Check protocols. Method = S.LookupMethodInQualifiedType(IsEqualSel, Type, /*instance=*/true); } } if (!Method) return false; QualType T = Method->parameters()[0]->getType(); if (!T->isObjCObjectPointerType()) return false; QualType R = Method->getReturnType(); if (!R->isScalarType()) return false; return true; } Sema::ObjCLiteralKind Sema::CheckLiteralKind(Expr *FromE) { FromE = FromE->IgnoreParenImpCasts(); switch (FromE->getStmtClass()) { default: break; case Stmt::ObjCStringLiteralClass: // "string literal" return LK_String; case Stmt::ObjCArrayLiteralClass: // "array literal" return LK_Array; case Stmt::ObjCDictionaryLiteralClass: // "dictionary literal" return LK_Dictionary; case Stmt::BlockExprClass: return LK_Block; case Stmt::ObjCBoxedExprClass: { Expr *Inner = cast(FromE)->getSubExpr()->IgnoreParens(); switch (Inner->getStmtClass()) { case Stmt::IntegerLiteralClass: case Stmt::FloatingLiteralClass: case Stmt::CharacterLiteralClass: case Stmt::ObjCBoolLiteralExprClass: case Stmt::CXXBoolLiteralExprClass: // "numeric literal" return LK_Numeric; case Stmt::ImplicitCastExprClass: { CastKind CK = cast(Inner)->getCastKind(); // Boolean literals can be represented by implicit casts. if (CK == CK_IntegralToBoolean || CK == CK_IntegralCast) return LK_Numeric; break; } default: break; } return LK_Boxed; } } return LK_None; } static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc, ExprResult &LHS, ExprResult &RHS, BinaryOperator::Opcode Opc){ Expr *Literal; Expr *Other; if (isObjCObjectLiteral(LHS)) { Literal = LHS.get(); Other = RHS.get(); } else { Literal = RHS.get(); Other = LHS.get(); } // Don't warn on comparisons against nil. Other = Other->IgnoreParenCasts(); if (Other->isNullPointerConstant(S.getASTContext(), Expr::NPC_ValueDependentIsNotNull)) return; // This should be kept in sync with warn_objc_literal_comparison. // LK_String should always be after the other literals, since it has its own // warning flag. Sema::ObjCLiteralKind LiteralKind = S.CheckLiteralKind(Literal); assert(LiteralKind != Sema::LK_Block); if (LiteralKind == Sema::LK_None) { llvm_unreachable("Unknown Objective-C object literal kind"); } if (LiteralKind == Sema::LK_String) S.Diag(Loc, diag::warn_objc_string_literal_comparison) << Literal->getSourceRange(); else S.Diag(Loc, diag::warn_objc_literal_comparison) << LiteralKind << Literal->getSourceRange(); if (BinaryOperator::isEqualityOp(Opc) && hasIsEqualMethod(S, LHS.get(), RHS.get())) { SourceLocation Start = LHS.get()->getLocStart(); SourceLocation End = S.getLocForEndOfToken(RHS.get()->getLocEnd()); CharSourceRange OpRange = CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc)); S.Diag(Loc, diag::note_objc_literal_comparison_isequal) << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![") << FixItHint::CreateReplacement(OpRange, " isEqual:") << FixItHint::CreateInsertion(End, "]"); } } static void diagnoseLogicalNotOnLHSofComparison(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check that left hand side is !something. UnaryOperator *UO = dyn_cast(LHS.get()->IgnoreImpCasts()); if (!UO || UO->getOpcode() != UO_LNot) return; // Only check if the right hand side is non-bool arithmetic type. if (RHS.get()->isKnownToHaveBooleanValue()) return; // Make sure that the something in !something is not bool. Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts(); if (SubExpr->isKnownToHaveBooleanValue()) return; // Emit warning. S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_comparison) << Loc; // First note suggest !(x < y) SourceLocation FirstOpen = SubExpr->getLocStart(); SourceLocation FirstClose = RHS.get()->getLocEnd(); FirstClose = S.getLocForEndOfToken(FirstClose); if (FirstClose.isInvalid()) FirstOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix) << FixItHint::CreateInsertion(FirstOpen, "(") << FixItHint::CreateInsertion(FirstClose, ")"); // Second note suggests (!x) < y SourceLocation SecondOpen = LHS.get()->getLocStart(); SourceLocation SecondClose = LHS.get()->getLocEnd(); SecondClose = S.getLocForEndOfToken(SecondClose); if (SecondClose.isInvalid()) SecondOpen = SourceLocation(); S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens) << FixItHint::CreateInsertion(SecondOpen, "(") << FixItHint::CreateInsertion(SecondClose, ")"); } // Get the decl for a simple expression: a reference to a variable, // an implicit C++ field reference, or an implicit ObjC ivar reference. static ValueDecl *getCompareDecl(Expr *E) { if (DeclRefExpr* DR = dyn_cast(E)) return DR->getDecl(); if (ObjCIvarRefExpr* Ivar = dyn_cast(E)) { if (Ivar->isFreeIvar()) return Ivar->getDecl(); } if (MemberExpr* Mem = dyn_cast(E)) { if (Mem->isImplicitAccess()) return Mem->getMemberDecl(); } return nullptr; } // C99 6.5.8, C++ [expr.rel] QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsRelational) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true); // Handle vector comparisons separately. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorCompareOperands(LHS, RHS, Loc, IsRelational); QualType LHSType = LHS.get()->getType(); QualType RHSType = RHS.get()->getType(); Expr *LHSStripped = LHS.get()->IgnoreParenImpCasts(); Expr *RHSStripped = RHS.get()->IgnoreParenImpCasts(); checkEnumComparison(*this, Loc, LHS.get(), RHS.get()); diagnoseLogicalNotOnLHSofComparison(*this, LHS, RHS, Loc, Opc); if (!LHSType->hasFloatingRepresentation() && !(LHSType->isBlockPointerType() && IsRelational) && !LHS.get()->getLocStart().isMacroID() && !RHS.get()->getLocStart().isMacroID() && ActiveTemplateInstantiations.empty()) { // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. // // NOTE: Don't warn about comparison expressions resulting from macro // expansion. Also don't warn about comparisons which are only self // comparisons within a template specialization. The warnings should catch // obvious cases in the definition of the template anyways. The idea is to // warn when the typed comparison operator will always evaluate to the same // result. ValueDecl *DL = getCompareDecl(LHSStripped); ValueDecl *DR = getCompareDecl(RHSStripped); if (DL && DR && DL == DR && !IsWithinTemplateSpecialization(DL)) { DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always) << 0 // self- << (Opc == BO_EQ || Opc == BO_LE || Opc == BO_GE)); } else if (DL && DR && LHSType->isArrayType() && RHSType->isArrayType() && !DL->getType()->isReferenceType() && !DR->getType()->isReferenceType()) { // what is it always going to eval to? char always_evals_to; switch(Opc) { case BO_EQ: // e.g. array1 == array2 always_evals_to = 0; // false break; case BO_NE: // e.g. array1 != array2 always_evals_to = 1; // true break; default: // best we can say is 'a constant' always_evals_to = 2; // e.g. array1 <= array2 break; } DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always) << 1 // array << always_evals_to); } if (isa(LHSStripped)) LHSStripped = LHSStripped->IgnoreParenCasts(); if (isa(RHSStripped)) RHSStripped = RHSStripped->IgnoreParenCasts(); // Warn about comparisons against a string constant (unless the other // operand is null), the user probably wants strcmp. Expr *literalString = nullptr; Expr *literalStringStripped = nullptr; if ((isa(LHSStripped) || isa(LHSStripped)) && !RHSStripped->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { literalString = LHS.get(); literalStringStripped = LHSStripped; } else if ((isa(RHSStripped) || isa(RHSStripped)) && !LHSStripped->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { literalString = RHS.get(); literalStringStripped = RHSStripped; } if (literalString) { DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_stringcompare) << isa(literalStringStripped) << literalString->getSourceRange()); } } // C99 6.5.8p3 / C99 6.5.9p4 UsualArithmeticConversions(LHS, RHS); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); LHSType = LHS.get()->getType(); RHSType = RHS.get()->getType(); // The result of comparisons is 'bool' in C++, 'int' in C. QualType ResultTy = Context.getLogicalOperationType(); if (IsRelational) { if (LHSType->isRealType() && RHSType->isRealType()) return ResultTy; } else { // Check for comparisons of floating point operands using != and ==. if (LHSType->hasFloatingRepresentation()) CheckFloatComparison(Loc, LHS.get(), RHS.get()); if (LHSType->isArithmeticType() && RHSType->isArithmeticType()) return ResultTy; } const Expr::NullPointerConstantKind LHSNullKind = LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); const Expr::NullPointerConstantKind RHSNullKind = RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); bool LHSIsNull = LHSNullKind != Expr::NPCK_NotNull; bool RHSIsNull = RHSNullKind != Expr::NPCK_NotNull; if (!IsRelational && LHSIsNull != RHSIsNull) { bool IsEquality = Opc == BO_EQ; if (RHSIsNull) DiagnoseAlwaysNonNullPointer(LHS.get(), RHSNullKind, IsEquality, RHS.get()->getSourceRange()); else DiagnoseAlwaysNonNullPointer(RHS.get(), LHSNullKind, IsEquality, LHS.get()->getSourceRange()); } // All of the following pointer-related warnings are GCC extensions, except // when handling null pointer constants. if (LHSType->isPointerType() && RHSType->isPointerType()) { // C99 6.5.8p2 QualType LCanPointeeTy = LHSType->castAs()->getPointeeType().getCanonicalType(); QualType RCanPointeeTy = RHSType->castAs()->getPointeeType().getCanonicalType(); if (getLangOpts().CPlusPlus) { if (LCanPointeeTy == RCanPointeeTy) return ResultTy; if (!IsRelational && (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) { // Valid unless comparison between non-null pointer and function pointer // This is a gcc extension compatibility comparison. // In a SFINAE context, we treat this as a hard error to maintain // conformance with the C++ standard. if ((LCanPointeeTy->isFunctionType() || RCanPointeeTy->isFunctionType()) && !LHSIsNull && !RHSIsNull) { diagnoseFunctionPointerToVoidComparison( *this, Loc, LHS, RHS, /*isError*/ (bool)isSFINAEContext()); if (isSFINAEContext()) return QualType(); RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return ResultTy; } } if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); else return ResultTy; } // C99 6.5.9p2 and C99 6.5.8p2 if (Context.typesAreCompatible(LCanPointeeTy.getUnqualifiedType(), RCanPointeeTy.getUnqualifiedType())) { // Valid unless a relational comparison of function pointers if (IsRelational && LCanPointeeTy->isFunctionType()) { Diag(Loc, diag::ext_typecheck_ordered_comparison_of_function_pointers) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } else if (!IsRelational && (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) { // Valid unless comparison between non-null pointer and function pointer if ((LCanPointeeTy->isFunctionType() || RCanPointeeTy->isFunctionType()) && !LHSIsNull && !RHSIsNull) diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS, /*isError*/false); } else { // Invalid diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LCanPointeeTy != RCanPointeeTy) { // Treat NULL constant as a special case in OpenCL. if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) { const PointerType *LHSPtr = LHSType->getAs(); if (!LHSPtr->isAddressSpaceOverlapping(*RHSType->getAs())) { Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers) << LHSType << RHSType << 0 /* comparison */ << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } unsigned AddrSpaceL = LCanPointeeTy.getAddressSpace(); unsigned AddrSpaceR = RCanPointeeTy.getAddressSpace(); CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, Kind); else RHS = ImpCastExprToType(RHS.get(), LHSType, Kind); } return ResultTy; } if (getLangOpts().CPlusPlus) { // Comparison of nullptr_t with itself. if (LHSType->isNullPtrType() && RHSType->isNullPtrType()) return ResultTy; // Comparison of pointers with null pointer constants and equality // comparisons of member pointers to null pointer constants. if (RHSIsNull && ((LHSType->isAnyPointerType() || LHSType->isNullPtrType()) || (!IsRelational && (LHSType->isMemberPointerType() || LHSType->isBlockPointerType())))) { RHS = ImpCastExprToType(RHS.get(), LHSType, LHSType->isMemberPointerType() ? CK_NullToMemberPointer : CK_NullToPointer); return ResultTy; } if (LHSIsNull && ((RHSType->isAnyPointerType() || RHSType->isNullPtrType()) || (!IsRelational && (RHSType->isMemberPointerType() || RHSType->isBlockPointerType())))) { LHS = ImpCastExprToType(LHS.get(), RHSType, RHSType->isMemberPointerType() ? CK_NullToMemberPointer : CK_NullToPointer); return ResultTy; } // Comparison of member pointers. if (!IsRelational && LHSType->isMemberPointerType() && RHSType->isMemberPointerType()) { if (convertPointersToCompositeType(*this, Loc, LHS, RHS)) return QualType(); else return ResultTy; } // Handle scoped enumeration types specifically, since they don't promote // to integers. if (LHS.get()->getType()->isEnumeralType() && Context.hasSameUnqualifiedType(LHS.get()->getType(), RHS.get()->getType())) return ResultTy; } // Handle block pointer types. if (!IsRelational && LHSType->isBlockPointerType() && RHSType->isBlockPointerType()) { QualType lpointee = LHSType->castAs()->getPointeeType(); QualType rpointee = RHSType->castAs()->getPointeeType(); if (!LHSIsNull && !RHSIsNull && !Context.typesAreCompatible(lpointee, rpointee)) { Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return ResultTy; } // Allow block pointers to be compared with null pointer constants. if (!IsRelational && ((LHSType->isBlockPointerType() && RHSType->isPointerType()) || (LHSType->isPointerType() && RHSType->isBlockPointerType()))) { if (!LHSIsNull && !RHSIsNull) { if (!((RHSType->isPointerType() && RHSType->castAs() ->getPointeeType()->isVoidType()) || (LHSType->isPointerType() && LHSType->castAs() ->getPointeeType()->isVoidType()))) Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, RHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, LHSType->isPointerType() ? CK_BitCast : CK_AnyPointerToBlockPointerCast); return ResultTy; } if (LHSType->isObjCObjectPointerType() || RHSType->isObjCObjectPointerType()) { const PointerType *LPT = LHSType->getAs(); const PointerType *RPT = RHSType->getAs(); if (LPT || RPT) { bool LPtrToVoid = LPT ? LPT->getPointeeType()->isVoidType() : false; bool RPtrToVoid = RPT ? RPT->getPointeeType()->isVoidType() : false; if (!LPtrToVoid && !RPtrToVoid && !Context.typesAreCompatible(LHSType, RHSType)) { diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); } if (LHSIsNull && !RHSIsNull) { Expr *E = LHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCARCConversion(SourceRange(), RHSType, E, CCK_ImplicitConversion); LHS = ImpCastExprToType(E, RHSType, RPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } else { Expr *E = RHS.get(); if (getLangOpts().ObjCAutoRefCount) CheckObjCARCConversion(SourceRange(), LHSType, E, CCK_ImplicitConversion, /*Diagnose=*/true, /*DiagnoseCFAudited=*/false, Opc); RHS = ImpCastExprToType(E, LHSType, LPT ? CK_BitCast :CK_CPointerToObjCPointerCast); } return ResultTy; } if (LHSType->isObjCObjectPointerType() && RHSType->isObjCObjectPointerType()) { if (!Context.areComparableObjCPointerTypes(LHSType, RHSType)) diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); if (isObjCObjectLiteral(LHS) || isObjCObjectLiteral(RHS)) diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc); if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast); else RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast); return ResultTy; } } if ((LHSType->isAnyPointerType() && RHSType->isIntegerType()) || (LHSType->isIntegerType() && RHSType->isAnyPointerType())) { unsigned DiagID = 0; bool isError = false; if (LangOpts.DebuggerSupport) { // Under a debugger, allow the comparison of pointers to integers, // since users tend to want to compare addresses. } else if ((LHSIsNull && LHSType->isIntegerType()) || (RHSIsNull && RHSType->isIntegerType())) { if (IsRelational && !getLangOpts().CPlusPlus) DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_and_zero; } else if (IsRelational && !getLangOpts().CPlusPlus) DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_integer; else if (getLangOpts().CPlusPlus) { DiagID = diag::err_typecheck_comparison_of_pointer_integer; isError = true; } else DiagID = diag::ext_typecheck_comparison_of_pointer_integer; if (DiagID) { Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); if (isError) return QualType(); } if (LHSType->isIntegerType()) LHS = ImpCastExprToType(LHS.get(), RHSType, LHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); else RHS = ImpCastExprToType(RHS.get(), LHSType, RHSIsNull ? CK_NullToPointer : CK_IntegralToPointer); return ResultTy; } // Handle block pointers. if (!IsRelational && RHSIsNull && LHSType->isBlockPointerType() && RHSType->isIntegerType()) { RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer); return ResultTy; } if (!IsRelational && LHSIsNull && LHSType->isIntegerType() && RHSType->isBlockPointerType()) { LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer); return ResultTy; } return InvalidOperands(Loc, LHS, RHS); } // Return a signed type that is of identical size and number of elements. // For floating point vectors, return an integer type of identical size // and number of elements. QualType Sema::GetSignedVectorType(QualType V) { const VectorType *VTy = V->getAs(); unsigned TypeSize = Context.getTypeSize(VTy->getElementType()); if (TypeSize == Context.getTypeSize(Context.CharTy)) return Context.getExtVectorType(Context.CharTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.ShortTy)) return Context.getExtVectorType(Context.ShortTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.IntTy)) return Context.getExtVectorType(Context.IntTy, VTy->getNumElements()); else if (TypeSize == Context.getTypeSize(Context.LongTy)) return Context.getExtVectorType(Context.LongTy, VTy->getNumElements()); assert(TypeSize == Context.getTypeSize(Context.LongLongTy) && "Unhandled vector element size in vector compare"); return Context.getExtVectorType(Context.LongLongTy, VTy->getNumElements()); } /// CheckVectorCompareOperands - vector comparisons are a clang extension that /// operates on extended vector types. Instead of producing an IntTy result, /// like a scalar comparison, a vector comparison produces a vector of integer /// types. QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsRelational) { // Check to make sure we're operating on vectors of the same type and width, // Allowing one side to be a scalar of element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, /*isCompAssign*/false, /*AllowBothBool*/true, /*AllowBoolConversions*/getLangOpts().ZVector); if (vType.isNull()) return vType; QualType LHSType = LHS.get()->getType(); // If AltiVec, the comparison results in a numeric type, i.e. // bool for C++, int for C if (getLangOpts().AltiVec && vType->getAs()->getVectorKind() == VectorType::AltiVecVector) return Context.getLogicalOperationType(); // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and // often indicate logic errors in the program. if (!LHSType->hasFloatingRepresentation() && ActiveTemplateInstantiations.empty()) { if (DeclRefExpr* DRL = dyn_cast(LHS.get()->IgnoreParenImpCasts())) if (DeclRefExpr* DRR = dyn_cast(RHS.get()->IgnoreParenImpCasts())) if (DRL->getDecl() == DRR->getDecl()) DiagRuntimeBehavior(Loc, nullptr, PDiag(diag::warn_comparison_always) << 0 // self- << 2 // "a constant" ); } // Check for comparisons of floating point operands using != and ==. if (!IsRelational && LHSType->hasFloatingRepresentation()) { assert (RHS.get()->getType()->hasFloatingRepresentation()); CheckFloatComparison(Loc, LHS.get(), RHS.get()); } // Return a signed type for the vector. return GetSignedVectorType(LHSType); } QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { // Ensure that either both operands are of the same vector type, or // one operand is of a vector type and the other is of its element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, false, /*AllowBothBool*/true, /*AllowBoolConversions*/false); if (vType.isNull()) return InvalidOperands(Loc, LHS, RHS); if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 && vType->hasFloatingRepresentation()) return InvalidOperands(Loc, LHS, RHS); return GetSignedVectorType(LHS.get()->getType()); } inline QualType Sema::CheckBitwiseOperands( ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { if (LHS.get()->getType()->hasIntegerRepresentation() && RHS.get()->getType()->hasIntegerRepresentation()) return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign, /*AllowBothBool*/true, /*AllowBoolConversions*/getLangOpts().ZVector); return InvalidOperands(Loc, LHS, RHS); } ExprResult LHSResult = LHS, RHSResult = RHS; QualType compType = UsualArithmeticConversions(LHSResult, RHSResult, IsCompAssign); if (LHSResult.isInvalid() || RHSResult.isInvalid()) return QualType(); LHS = LHSResult.get(); RHS = RHSResult.get(); if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType()) return compType; return InvalidOperands(Loc, LHS, RHS); } // C99 6.5.[13,14] inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { // Check vector operands differently. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) return CheckVectorLogicalOperands(LHS, RHS, Loc); // Diagnose cases where the user write a logical and/or but probably meant a // bitwise one. We do this when the LHS is a non-bool integer and the RHS // is a constant. if (LHS.get()->getType()->isIntegerType() && !LHS.get()->getType()->isBooleanType() && RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() && // Don't warn in macros or template instantiations. !Loc.isMacroID() && ActiveTemplateInstantiations.empty()) { // If the RHS can be constant folded, and if it constant folds to something // that isn't 0 or 1 (which indicate a potential logical operation that // happened to fold to true/false) then warn. // Parens on the RHS are ignored. llvm::APSInt Result; if (RHS.get()->EvaluateAsInt(Result, Context)) if ((getLangOpts().Bool && !RHS.get()->getType()->isBooleanType() && !RHS.get()->getExprLoc().isMacroID()) || (Result != 0 && Result != 1)) { Diag(Loc, diag::warn_logical_instead_of_bitwise) << RHS.get()->getSourceRange() << (Opc == BO_LAnd ? "&&" : "||"); // Suggest replacing the logical operator with the bitwise version Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator) << (Opc == BO_LAnd ? "&" : "|") << FixItHint::CreateReplacement(SourceRange( Loc, getLocForEndOfToken(Loc)), Opc == BO_LAnd ? "&" : "|"); if (Opc == BO_LAnd) // Suggest replacing "Foo() && kNonZero" with "Foo()" Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant) << FixItHint::CreateRemoval( SourceRange(getLocForEndOfToken(LHS.get()->getLocEnd()), RHS.get()->getLocEnd())); } } if (!Context.getLangOpts().CPlusPlus) { // OpenCL v1.1 s6.3.g: The logical operators and (&&), or (||) do // not operate on the built-in scalar and vector float types. if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { if (LHS.get()->getType()->isFloatingType() || RHS.get()->getType()->isFloatingType()) return InvalidOperands(Loc, LHS, RHS); } LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); RHS = UsualUnaryConversions(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!LHS.get()->getType()->isScalarType() || !RHS.get()->getType()->isScalarType()) return InvalidOperands(Loc, LHS, RHS); return Context.IntTy; } // The following is safe because we only use this method for // non-overloadable operands. // C++ [expr.log.and]p1 // C++ [expr.log.or]p1 // The operands are both contextually converted to type bool. ExprResult LHSRes = PerformContextuallyConvertToBool(LHS.get()); if (LHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); LHS = LHSRes; ExprResult RHSRes = PerformContextuallyConvertToBool(RHS.get()); if (RHSRes.isInvalid()) return InvalidOperands(Loc, LHS, RHS); RHS = RHSRes; // C++ [expr.log.and]p2 // C++ [expr.log.or]p2 // The result is a bool. return Context.BoolTy; } static bool IsReadonlyMessage(Expr *E, Sema &S) { const MemberExpr *ME = dyn_cast(E); if (!ME) return false; if (!isa(ME->getMemberDecl())) return false; ObjCMessageExpr *Base = dyn_cast(ME->getBase()->IgnoreParenImpCasts()); if (!Base) return false; return Base->getMethodDecl() != nullptr; } /// Is the given expression (which must be 'const') a reference to a /// variable which was originally non-const, but which has become /// 'const' due to being captured within a block? enum NonConstCaptureKind { NCCK_None, NCCK_Block, NCCK_Lambda }; static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) { assert(E->isLValue() && E->getType().isConstQualified()); E = E->IgnoreParens(); // Must be a reference to a declaration from an enclosing scope. DeclRefExpr *DRE = dyn_cast(E); if (!DRE) return NCCK_None; if (!DRE->refersToEnclosingVariableOrCapture()) return NCCK_None; // The declaration must be a variable which is not declared 'const'. VarDecl *var = dyn_cast(DRE->getDecl()); if (!var) return NCCK_None; if (var->getType().isConstQualified()) return NCCK_None; assert(var->hasLocalStorage() && "capture added 'const' to non-local?"); // Decide whether the first capture was for a block or a lambda. DeclContext *DC = S.CurContext, *Prev = nullptr; while (DC != var->getDeclContext()) { Prev = DC; DC = DC->getParent(); } // Unless we have an init-capture, we've gone one step too far. if (!var->isInitCapture()) DC = Prev; return (isa(DC) ? NCCK_Block : NCCK_Lambda); } static bool IsTypeModifiable(QualType Ty, bool IsDereference) { Ty = Ty.getNonReferenceType(); if (IsDereference && Ty->isPointerType()) Ty = Ty->getPointeeType(); return !Ty.isConstQualified(); } /// Emit the "read-only variable not assignable" error and print notes to give /// more information about why the variable is not assignable, such as pointing /// to the declaration of a const variable, showing that a method is const, or /// that the function is returning a const reference. static void DiagnoseConstAssignment(Sema &S, const Expr *E, SourceLocation Loc) { // Update err_typecheck_assign_const and note_typecheck_assign_const // when this enum is changed. enum { ConstFunction, ConstVariable, ConstMember, ConstMethod, ConstUnknown, // Keep as last element }; SourceRange ExprRange = E->getSourceRange(); // Only emit one error on the first const found. All other consts will emit // a note to the error. bool DiagnosticEmitted = false; // Track if the current expression is the result of a derefence, and if the // next checked expression is the result of a derefence. bool IsDereference = false; bool NextIsDereference = false; // Loop to process MemberExpr chains. while (true) { IsDereference = NextIsDereference; NextIsDereference = false; E = E->IgnoreParenImpCasts(); if (const MemberExpr *ME = dyn_cast(E)) { NextIsDereference = ME->isArrow(); const ValueDecl *VD = ME->getMemberDecl(); if (const FieldDecl *Field = dyn_cast(VD)) { // Mutable fields can be modified even if the class is const. if (Field->isMutable()) { assert(DiagnosticEmitted && "Expected diagnostic not emitted."); break; } if (!IsTypeModifiable(Field->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << false /*static*/ << Field << Field->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << false /*static*/ << Field << Field->getType() << Field->getSourceRange(); } E = ME->getBase(); continue; } else if (const VarDecl *VDecl = dyn_cast(VD)) { if (VDecl->getType().isConstQualified()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMember << true /*static*/ << VDecl << VDecl->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstMember << true /*static*/ << VDecl << VDecl->getType() << VDecl->getSourceRange(); } // Static fields do not inherit constness from parents. break; } break; } // End MemberExpr break; } if (const CallExpr *CE = dyn_cast(E)) { // Function calls const FunctionDecl *FD = CE->getDirectCallee(); if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstFunction << FD; DiagnosticEmitted = true; } S.Diag(FD->getReturnTypeSourceRange().getBegin(), diag::note_typecheck_assign_const) << ConstFunction << FD << FD->getReturnType() << FD->getReturnTypeSourceRange(); } } else if (const DeclRefExpr *DRE = dyn_cast(E)) { // Point to variable declaration. if (const ValueDecl *VD = DRE->getDecl()) { if (!IsTypeModifiable(VD->getType(), IsDereference)) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstVariable << VD << VD->getType(); DiagnosticEmitted = true; } S.Diag(VD->getLocation(), diag::note_typecheck_assign_const) << ConstVariable << VD << VD->getType() << VD->getSourceRange(); } } } else if (isa(E)) { if (const DeclContext *DC = S.getFunctionLevelDeclContext()) { if (const CXXMethodDecl *MD = dyn_cast(DC)) { if (MD->isConst()) { if (!DiagnosticEmitted) { S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstMethod << MD; DiagnosticEmitted = true; } S.Diag(MD->getLocation(), diag::note_typecheck_assign_const) << ConstMethod << MD << MD->getSourceRange(); } } } } if (DiagnosticEmitted) return; // Can't determine a more specific message, so display the generic error. S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstUnknown; } /// CheckForModifiableLvalue - Verify that E is a modifiable lvalue. If not, /// emit an error and return true. If so, return false. static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { assert(!E->hasPlaceholderType(BuiltinType::PseudoObject)); SourceLocation OrigLoc = Loc; Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context, &Loc); if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S)) IsLV = Expr::MLV_InvalidMessageExpression; if (IsLV == Expr::MLV_Valid) return false; unsigned DiagID = 0; bool NeedType = false; switch (IsLV) { // C99 6.5.16p2 case Expr::MLV_ConstQualified: // Use a specialized diagnostic when we're assigning to an object // from an enclosing function or block. if (NonConstCaptureKind NCCK = isReferenceToNonConstCapture(S, E)) { if (NCCK == NCCK_Block) DiagID = diag::err_block_decl_ref_not_modifiable_lvalue; else DiagID = diag::err_lambda_decl_ref_not_modifiable_lvalue; break; } // In ARC, use some specialized diagnostics for occasions where we // infer 'const'. These are always pseudo-strong variables. if (S.getLangOpts().ObjCAutoRefCount) { DeclRefExpr *declRef = dyn_cast(E->IgnoreParenCasts()); if (declRef && isa(declRef->getDecl())) { VarDecl *var = cast(declRef->getDecl()); // Use the normal diagnostic if it's pseudo-__strong but the // user actually wrote 'const'. if (var->isARCPseudoStrong() && (!var->getTypeSourceInfo() || !var->getTypeSourceInfo()->getType().isConstQualified())) { // There are two pseudo-strong cases: // - self ObjCMethodDecl *method = S.getCurMethodDecl(); if (method && var == method->getSelfDecl()) DiagID = method->isClassMethod() ? diag::err_typecheck_arc_assign_self_class_method : diag::err_typecheck_arc_assign_self; // - fast enumeration variables else DiagID = diag::err_typecheck_arr_assign_enumeration; SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; // We need to preserve the AST regardless, so migration tool // can do its job. return false; } } } // If none of the special cases above are triggered, then this is a // simple const assignment. if (DiagID == 0) { DiagnoseConstAssignment(S, E, Loc); return true; } break; case Expr::MLV_ConstAddrSpace: DiagnoseConstAssignment(S, E, Loc); return true; case Expr::MLV_ArrayType: case Expr::MLV_ArrayTemporary: DiagID = diag::err_typecheck_array_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_NotObjectType: DiagID = diag::err_typecheck_non_object_not_modifiable_lvalue; NeedType = true; break; case Expr::MLV_LValueCast: DiagID = diag::err_typecheck_lvalue_casts_not_supported; break; case Expr::MLV_Valid: llvm_unreachable("did not take early return for MLV_Valid"); case Expr::MLV_InvalidExpression: case Expr::MLV_MemberFunction: case Expr::MLV_ClassTemporary: DiagID = diag::err_typecheck_expression_not_modifiable_lvalue; break; case Expr::MLV_IncompleteType: case Expr::MLV_IncompleteVoidType: return S.RequireCompleteType(Loc, E->getType(), diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E); case Expr::MLV_DuplicateVectorComponents: DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue; break; case Expr::MLV_NoSetterProperty: llvm_unreachable("readonly properties should be processed differently"); case Expr::MLV_InvalidMessageExpression: DiagID = diag::error_readonly_message_assignment; break; case Expr::MLV_SubObjCPropertySetting: DiagID = diag::error_no_subobject_property_setting; break; } SourceRange Assign; if (Loc != OrigLoc) Assign = SourceRange(OrigLoc, OrigLoc); if (NeedType) S.Diag(Loc, DiagID) << E->getType() << E->getSourceRange() << Assign; else S.Diag(Loc, DiagID) << E->getSourceRange() << Assign; return true; } static void CheckIdentityFieldAssignment(Expr *LHSExpr, Expr *RHSExpr, SourceLocation Loc, Sema &Sema) { // C / C++ fields MemberExpr *ML = dyn_cast(LHSExpr); MemberExpr *MR = dyn_cast(RHSExpr); if (ML && MR && ML->getMemberDecl() == MR->getMemberDecl()) { if (isa(ML->getBase()) && isa(MR->getBase())) Sema.Diag(Loc, diag::warn_identity_field_assign) << 0; } // Objective-C instance variables ObjCIvarRefExpr *OL = dyn_cast(LHSExpr); ObjCIvarRefExpr *OR = dyn_cast(RHSExpr); if (OL && OR && OL->getDecl() == OR->getDecl()) { DeclRefExpr *RL = dyn_cast(OL->getBase()->IgnoreImpCasts()); DeclRefExpr *RR = dyn_cast(OR->getBase()->IgnoreImpCasts()); if (RL && RR && RL->getDecl() == RR->getDecl()) Sema.Diag(Loc, diag::warn_identity_field_assign) << 1; } } // C99 6.5.16.1 QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, SourceLocation Loc, QualType CompoundType) { assert(!LHSExpr->hasPlaceholderType(BuiltinType::PseudoObject)); // Verify that LHS is a modifiable lvalue, and emit error if not. if (CheckForModifiableLvalue(LHSExpr, Loc, *this)) return QualType(); QualType LHSType = LHSExpr->getType(); QualType RHSType = CompoundType.isNull() ? RHS.get()->getType() : CompoundType; AssignConvertType ConvTy; if (CompoundType.isNull()) { Expr *RHSCheck = RHS.get(); CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this); QualType LHSTy(LHSType); ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS); if (RHS.isInvalid()) return QualType(); // Special case of NSObject attributes on c-style pointer types. if (ConvTy == IncompatiblePointer && ((Context.isObjCNSObjectType(LHSType) && RHSType->isObjCObjectPointerType()) || (Context.isObjCNSObjectType(RHSType) && LHSType->isObjCObjectPointerType()))) ConvTy = Compatible; if (ConvTy == Compatible && LHSType->isObjCObjectType()) Diag(Loc, diag::err_objc_object_assignment) << LHSType; // If the RHS is a unary plus or minus, check to see if they = and + are // right next to each other. If so, the user may have typo'd "x =+ 4" // instead of "x += 4". if (ImplicitCastExpr *ICE = dyn_cast(RHSCheck)) RHSCheck = ICE->getSubExpr(); if (UnaryOperator *UO = dyn_cast(RHSCheck)) { if ((UO->getOpcode() == UO_Plus || UO->getOpcode() == UO_Minus) && Loc.isFileID() && UO->getOperatorLoc().isFileID() && // Only if the two operators are exactly adjacent. Loc.getLocWithOffset(1) == UO->getOperatorLoc() && // And there is a space or other character before the subexpr of the // unary +/-. We don't want to warn on "x=-1". Loc.getLocWithOffset(2) != UO->getSubExpr()->getLocStart() && UO->getSubExpr()->getLocStart().isFileID()) { Diag(Loc, diag::warn_not_compound_assign) << (UO->getOpcode() == UO_Plus ? "+" : "-") << SourceRange(UO->getOperatorLoc(), UO->getOperatorLoc()); } } if (ConvTy == Compatible) { if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong) { // Warn about retain cycles where a block captures the LHS, but // not if the LHS is a simple variable into which the block is // being stored...unless that variable can be captured by reference! const Expr *InnerLHS = LHSExpr->IgnoreParenCasts(); const DeclRefExpr *DRE = dyn_cast(InnerLHS); if (!DRE || DRE->getDecl()->hasAttr()) checkRetainCycles(LHSExpr, RHS.get()); // It is safe to assign a weak reference into a strong variable. // Although this code can still have problems: // id x = self.weakProp; // id y = self.weakProp; // we do not warn to warn spuriously when 'x' and 'y' are on separate // paths through the function. This should be revisited if // -Wrepeated-use-of-weak is made flow-sensitive. if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, RHS.get()->getLocStart())) getCurFunction()->markSafeWeakUse(RHS.get()); } else if (getLangOpts().ObjCAutoRefCount) { checkUnsafeExprAssigns(Loc, LHSExpr, RHS.get()); } } } else { // Compound assignment "x += y" ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType); } if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType, RHS.get(), AA_Assigning)) return QualType(); CheckForNullPointerDereference(*this, LHSExpr); // C99 6.5.16p3: The type of an assignment expression is the type of the // left operand unless the left operand has qualified type, in which case // it is the unqualified version of the type of the left operand. // C99 6.5.16.1p2: In simple assignment, the value of the right operand // is converted to the type of the assignment expression (above). // C++ 5.17p1: the type of the assignment expression is that of its left // operand. return (getLangOpts().CPlusPlus ? LHSType : LHSType.getUnqualifiedType()); } // C99 6.5.17 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { LHS = S.CheckPlaceholderExpr(LHS.get()); RHS = S.CheckPlaceholderExpr(RHS.get()); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); // C's comma performs lvalue conversion (C99 6.3.2.1) on both its // operands, but not unary promotions. // C++'s comma does not do any conversions at all (C++ [expr.comma]p1). // So we treat the LHS as a ignored value, and in C++ we allow the // containing site to determine what should be done with the RHS. LHS = S.IgnoredValueConversions(LHS.get()); if (LHS.isInvalid()) return QualType(); S.DiagnoseUnusedExprResult(LHS.get()); if (!S.getLangOpts().CPlusPlus) { RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get()); if (RHS.isInvalid()) return QualType(); if (!RHS.get()->getType()->isVoidType()) S.RequireCompleteType(Loc, RHS.get()->getType(), diag::err_incomplete_type); } return RHS.get()->getType(); } /// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine /// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions. static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op, ExprValueKind &VK, ExprObjectKind &OK, SourceLocation OpLoc, bool IsInc, bool IsPrefix) { if (Op->isTypeDependent()) return S.Context.DependentTy; QualType ResType = Op->getType(); // Atomic types can be used for increment / decrement where the non-atomic // versions can, so ignore the _Atomic() specifier for the purpose of // checking. if (const AtomicType *ResAtomicType = ResType->getAs()) ResType = ResAtomicType->getValueType(); assert(!ResType.isNull() && "no type for increment/decrement expression"); if (S.getLangOpts().CPlusPlus && ResType->isBooleanType()) { // Decrement of bool is not allowed. if (!IsInc) { S.Diag(OpLoc, diag::err_decrement_bool) << Op->getSourceRange(); return QualType(); } // Increment of bool sets it to true, but is deprecated. S.Diag(OpLoc, S.getLangOpts().CPlusPlus1z ? diag::ext_increment_bool : diag::warn_increment_bool) << Op->getSourceRange(); } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) { // Error on enum increments and decrements in C++ mode S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType; return QualType(); } else if (ResType->isRealType()) { // OK! } else if (ResType->isPointerType()) { // C99 6.5.2.4p2, 6.5.6p2 if (!checkArithmeticOpPointerOperand(S, OpLoc, Op)) return QualType(); } else if (ResType->isObjCObjectPointerType()) { // On modern runtimes, ObjC pointer arithmetic is forbidden. // Otherwise, we just need a complete type. if (checkArithmeticIncompletePointerType(S, OpLoc, Op) || checkArithmeticOnObjCPointer(S, OpLoc, Op)) return QualType(); } else if (ResType->isAnyComplexType()) { // C99 does not support ++/-- on complex types, we allow as an extension. S.Diag(OpLoc, diag::ext_integer_increment_complex) << ResType << Op->getSourceRange(); } else if (ResType->isPlaceholderType()) { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); return CheckIncrementDecrementOperand(S, PR.get(), VK, OK, OpLoc, IsInc, IsPrefix); } else if (S.getLangOpts().AltiVec && ResType->isVectorType()) { // OK! ( C/C++ Language Extensions for CBEA(Version 2.6) 10.3 ) } else if (S.getLangOpts().ZVector && ResType->isVectorType() && (ResType->getAs()->getVectorKind() != VectorType::AltiVecBool)) { // The z vector extensions allow ++ and -- for non-bool vectors. } else if(S.getLangOpts().OpenCL && ResType->isVectorType() && ResType->getAs()->getElementType()->isIntegerType()) { // OpenCL V1.2 6.3 says dec/inc ops operate on integer vector types. } else { S.Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement) << ResType << int(IsInc) << Op->getSourceRange(); return QualType(); } // At this point, we know we have a real, complex or pointer type. // Now make sure the operand is a modifiable lvalue. if (CheckForModifiableLvalue(Op, OpLoc, S)) return QualType(); // In C++, a prefix increment is the same type as the operand. Otherwise // (in C or with postfix), the increment is the unqualified type of the // operand. if (IsPrefix && S.getLangOpts().CPlusPlus) { VK = VK_LValue; OK = Op->getObjectKind(); return ResType; } else { VK = VK_RValue; return ResType.getUnqualifiedType(); } } /// getPrimaryDecl - Helper function for CheckAddressOfOperand(). /// This routine allows us to typecheck complex/recursive expressions /// where the declaration is needed for type checking. We only need to /// handle cases when the expression references a function designator /// or is an lvalue. Here are some examples: /// - &(x) => x /// - &*****f => f for f a function designator. /// - &s.xx => s /// - &s.zz[1].yy -> s, if zz is an array /// - *(x + 1) -> x, if x is an array /// - &"123"[2] -> 0 /// - & __real__ x -> x static ValueDecl *getPrimaryDecl(Expr *E) { switch (E->getStmtClass()) { case Stmt::DeclRefExprClass: return cast(E)->getDecl(); case Stmt::MemberExprClass: // If this is an arrow operator, the address is an offset from // the base's value, so the object the base refers to is // irrelevant. if (cast(E)->isArrow()) return nullptr; // Otherwise, the expression refers to a part of the base return getPrimaryDecl(cast(E)->getBase()); case Stmt::ArraySubscriptExprClass: { // FIXME: This code shouldn't be necessary! We should catch the implicit // promotion of register arrays earlier. Expr* Base = cast(E)->getBase(); if (ImplicitCastExpr* ICE = dyn_cast(Base)) { if (ICE->getSubExpr()->getType()->isArrayType()) return getPrimaryDecl(ICE->getSubExpr()); } return nullptr; } case Stmt::UnaryOperatorClass: { UnaryOperator *UO = cast(E); switch(UO->getOpcode()) { case UO_Real: case UO_Imag: case UO_Extension: return getPrimaryDecl(UO->getSubExpr()); default: return nullptr; } } case Stmt::ParenExprClass: return getPrimaryDecl(cast(E)->getSubExpr()); case Stmt::ImplicitCastExprClass: // If the result of an implicit cast is an l-value, we care about // the sub-expression; otherwise, the result here doesn't matter. return getPrimaryDecl(cast(E)->getSubExpr()); default: return nullptr; } } namespace { enum { AO_Bit_Field = 0, AO_Vector_Element = 1, AO_Property_Expansion = 2, AO_Register_Variable = 3, AO_No_Error = 4 }; } /// \brief Diagnose invalid operand for address of operations. /// /// \param Type The type of operand which cannot have its address taken. static void diagnoseAddressOfInvalidType(Sema &S, SourceLocation Loc, Expr *E, unsigned Type) { S.Diag(Loc, diag::err_typecheck_address_of) << Type << E->getSourceRange(); } /// CheckAddressOfOperand - The operand of & must be either a function /// designator or an lvalue designating an object. If it is an lvalue, the /// object cannot be declared with storage class register or be a bit field. /// Note: The usual conversions are *not* applied to the operand of the & /// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue. /// In C++, the operand might be an overloaded function name, in which case /// we allow the '&' but retain the overloaded-function type. QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) { if (const BuiltinType *PTy = OrigOp.get()->getType()->getAsPlaceholderType()){ if (PTy->getKind() == BuiltinType::Overload) { Expr *E = OrigOp.get()->IgnoreParens(); if (!isa(E)) { assert(cast(E)->getOpcode() == UO_AddrOf); Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof_addrof_function) << OrigOp.get()->getSourceRange(); return QualType(); } OverloadExpr *Ovl = cast(E); if (isa(Ovl)) if (!ResolveSingleFunctionTemplateSpecialization(Ovl)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } return Context.OverloadTy; } if (PTy->getKind() == BuiltinType::UnknownAny) return Context.UnknownAnyTy; if (PTy->getKind() == BuiltinType::BoundMember) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } OrigOp = CheckPlaceholderExpr(OrigOp.get()); if (OrigOp.isInvalid()) return QualType(); } if (OrigOp.get()->isTypeDependent()) return Context.DependentTy; assert(!OrigOp.get()->getType()->isPlaceholderType()); // Make sure to ignore parentheses in subsequent checks Expr *op = OrigOp.get()->IgnoreParens(); // OpenCL v1.0 s6.8.a.3: Pointers to functions are not allowed. if (LangOpts.OpenCL && op->getType()->isFunctionType()) { Diag(op->getExprLoc(), diag::err_opencl_taking_function_address); return QualType(); } if (getLangOpts().C99) { // Implement C99-only parts of addressof rules. if (UnaryOperator* uOp = dyn_cast(op)) { if (uOp->getOpcode() == UO_Deref) // Per C99 6.5.3.2, the address of a deref always returns a valid result // (assuming the deref expression is valid). return uOp->getSubExpr()->getType(); } // Technically, there should be a check for array subscript // expressions here, but the result of one is always an lvalue anyway. } ValueDecl *dcl = getPrimaryDecl(op); if (auto *FD = dyn_cast_or_null(dcl)) if (!checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, op->getLocStart())) return QualType(); Expr::LValueClassification lval = op->ClassifyLValue(Context); unsigned AddressOfError = AO_No_Error; if (lval == Expr::LV_ClassTemporary || lval == Expr::LV_ArrayTemporary) { bool sfinae = (bool)isSFINAEContext(); Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary : diag::ext_typecheck_addrof_temporary) << op->getType() << op->getSourceRange(); if (sfinae) return QualType(); // Materialize the temporary as an lvalue so that we can take its address. OrigOp = op = new (Context) MaterializeTemporaryExpr(op->getType(), OrigOp.get(), true); } else if (isa(op)) { return Context.getPointerType(op->getType()); } else if (lval == Expr::LV_MemberFunction) { // If it's an instance method, make a member pointer. // The expression must have exactly the form &A::foo. // If the underlying expression isn't a decl ref, give up. if (!isa(op)) { Diag(OpLoc, diag::err_invalid_form_pointer_member_function) << OrigOp.get()->getSourceRange(); return QualType(); } DeclRefExpr *DRE = cast(op); CXXMethodDecl *MD = cast(DRE->getDecl()); // The id-expression was parenthesized. if (OrigOp.get() != DRE) { Diag(OpLoc, diag::err_parens_pointer_member_function) << OrigOp.get()->getSourceRange(); // The method was named without a qualifier. } else if (!DRE->getQualifier()) { if (MD->getParent()->getName().empty()) Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange(); else { SmallString<32> Str; StringRef Qual = (MD->getParent()->getName() + "::").toStringRef(Str); Diag(OpLoc, diag::err_unqualified_pointer_member_function) << op->getSourceRange() << FixItHint::CreateInsertion(op->getSourceRange().getBegin(), Qual); } } // Taking the address of a dtor is illegal per C++ [class.dtor]p2. if (isa(MD)) Diag(OpLoc, diag::err_typecheck_addrof_dtor) << op->getSourceRange(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) { // C99 6.5.3.2p1 // The operand must be either an l-value or a function designator if (!op->getType()->isFunctionType()) { // Use a special diagnostic for loads from property references. if (isa(op)) { AddressOfError = AO_Property_Expansion; } else { Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof) << op->getType() << op->getSourceRange(); return QualType(); } } } else if (op->getObjectKind() == OK_BitField) { // C99 6.5.3.2p1 // The operand cannot be a bit-field AddressOfError = AO_Bit_Field; } else if (op->getObjectKind() == OK_VectorComponent) { // The operand cannot be an element of a vector AddressOfError = AO_Vector_Element; } else if (dcl) { // C99 6.5.3.2p1 // We have an lvalue with a decl. Make sure the decl is not declared // with the register storage-class specifier. if (const VarDecl *vd = dyn_cast(dcl)) { // in C++ it is not error to take address of a register // variable (c++03 7.1.1P3) if (vd->getStorageClass() == SC_Register && !getLangOpts().CPlusPlus) { AddressOfError = AO_Register_Variable; } } else if (isa(dcl)) { AddressOfError = AO_Property_Expansion; } else if (isa(dcl)) { return Context.OverloadTy; } else if (isa(dcl) || isa(dcl)) { // Okay: we can take the address of a field. // Could be a pointer to member, though, if there is an explicit // scope qualifier for the class. if (isa(op) && cast(op)->getQualifier()) { DeclContext *Ctx = dcl->getDeclContext(); if (Ctx && Ctx->isRecord()) { if (dcl->getType()->isReferenceType()) { Diag(OpLoc, diag::err_cannot_form_pointer_to_member_of_reference_type) << dcl->getDeclName() << dcl->getType(); return QualType(); } while (cast(Ctx)->isAnonymousStructOrUnion()) Ctx = Ctx->getParent(); QualType MPTy = Context.getMemberPointerType( op->getType(), Context.getTypeDeclType(cast(Ctx)).getTypePtr()); // Under the MS ABI, lock down the inheritance model now. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) (void)isCompleteType(OpLoc, MPTy); return MPTy; } } } else if (!isa(dcl) && !isa(dcl)) llvm_unreachable("Unknown/unexpected decl type"); } if (AddressOfError != AO_No_Error) { diagnoseAddressOfInvalidType(*this, OpLoc, op, AddressOfError); return QualType(); } if (lval == Expr::LV_IncompleteVoidType) { // Taking the address of a void variable is technically illegal, but we // allow it in cases which are otherwise valid. // Example: "extern void x; void* y = &x;". Diag(OpLoc, diag::ext_typecheck_addrof_void) << op->getSourceRange(); } // If the operand has type "type", the result has type "pointer to type". if (op->getType()->isObjCObjectType()) return Context.getObjCObjectPointerType(op->getType()); return Context.getPointerType(op->getType()); } static void RecordModifiableNonNullParam(Sema &S, const Expr *Exp) { const DeclRefExpr *DRE = dyn_cast(Exp); if (!DRE) return; const Decl *D = DRE->getDecl(); if (!D) return; const ParmVarDecl *Param = dyn_cast(D); if (!Param) return; if (const FunctionDecl* FD = dyn_cast(Param->getDeclContext())) if (!FD->hasAttr() && !Param->hasAttr()) return; if (FunctionScopeInfo *FD = S.getCurFunction()) if (!FD->ModifiedNonNullParams.count(Param)) FD->ModifiedNonNullParams.insert(Param); } /// CheckIndirectionOperand - Type check unary indirection (prefix '*'). static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK, SourceLocation OpLoc) { if (Op->isTypeDependent()) return S.Context.DependentTy; ExprResult ConvResult = S.UsualUnaryConversions(Op); if (ConvResult.isInvalid()) return QualType(); Op = ConvResult.get(); QualType OpTy = Op->getType(); QualType Result; if (isa(Op)) { QualType OpOrigType = Op->IgnoreParenCasts()->getType(); S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /*IsDereference*/true, Op->getSourceRange()); } if (const PointerType *PT = OpTy->getAs()) Result = PT->getPointeeType(); else if (const ObjCObjectPointerType *OPT = OpTy->getAs()) Result = OPT->getPointeeType(); else { ExprResult PR = S.CheckPlaceholderExpr(Op); if (PR.isInvalid()) return QualType(); if (PR.get() != Op) return CheckIndirectionOperand(S, PR.get(), VK, OpLoc); } if (Result.isNull()) { S.Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer) << OpTy << Op->getSourceRange(); return QualType(); } // Note that per both C89 and C99, indirection is always legal, even if Result // is an incomplete type or void. It would be possible to warn about // dereferencing a void pointer, but it's completely well-defined, and such a // warning is unlikely to catch any mistakes. In C++, indirection is not valid // for pointers to 'void' but is fine for any other pointer type: // // C++ [expr.unary.op]p1: // [...] the expression to which [the unary * operator] is applied shall // be a pointer to an object type, or a pointer to a function type if (S.getLangOpts().CPlusPlus && Result->isVoidType()) S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer) << OpTy << Op->getSourceRange(); // Dereferences are usually l-values... VK = VK_LValue; // ...except that certain expressions are never l-values in C. if (!S.getLangOpts().CPlusPlus && Result.isCForbiddenLValueType()) VK = VK_RValue; return Result; } BinaryOperatorKind Sema::ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind) { BinaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown binop!"); case tok::periodstar: Opc = BO_PtrMemD; break; case tok::arrowstar: Opc = BO_PtrMemI; break; case tok::star: Opc = BO_Mul; break; case tok::slash: Opc = BO_Div; break; case tok::percent: Opc = BO_Rem; break; case tok::plus: Opc = BO_Add; break; case tok::minus: Opc = BO_Sub; break; case tok::lessless: Opc = BO_Shl; break; case tok::greatergreater: Opc = BO_Shr; break; case tok::lessequal: Opc = BO_LE; break; case tok::less: Opc = BO_LT; break; case tok::greaterequal: Opc = BO_GE; break; case tok::greater: Opc = BO_GT; break; case tok::exclaimequal: Opc = BO_NE; break; case tok::equalequal: Opc = BO_EQ; break; case tok::amp: Opc = BO_And; break; case tok::caret: Opc = BO_Xor; break; case tok::pipe: Opc = BO_Or; break; case tok::ampamp: Opc = BO_LAnd; break; case tok::pipepipe: Opc = BO_LOr; break; case tok::equal: Opc = BO_Assign; break; case tok::starequal: Opc = BO_MulAssign; break; case tok::slashequal: Opc = BO_DivAssign; break; case tok::percentequal: Opc = BO_RemAssign; break; case tok::plusequal: Opc = BO_AddAssign; break; case tok::minusequal: Opc = BO_SubAssign; break; case tok::lesslessequal: Opc = BO_ShlAssign; break; case tok::greatergreaterequal: Opc = BO_ShrAssign; break; case tok::ampequal: Opc = BO_AndAssign; break; case tok::caretequal: Opc = BO_XorAssign; break; case tok::pipeequal: Opc = BO_OrAssign; break; case tok::comma: Opc = BO_Comma; break; } return Opc; } static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode( tok::TokenKind Kind) { UnaryOperatorKind Opc; switch (Kind) { default: llvm_unreachable("Unknown unary op!"); case tok::plusplus: Opc = UO_PreInc; break; case tok::minusminus: Opc = UO_PreDec; break; case tok::amp: Opc = UO_AddrOf; break; case tok::star: Opc = UO_Deref; break; case tok::plus: Opc = UO_Plus; break; case tok::minus: Opc = UO_Minus; break; case tok::tilde: Opc = UO_Not; break; case tok::exclaim: Opc = UO_LNot; break; case tok::kw___real: Opc = UO_Real; break; case tok::kw___imag: Opc = UO_Imag; break; case tok::kw___extension__: Opc = UO_Extension; break; } return Opc; } /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself. /// This warning is only emitted for builtin assignment operations. It is also /// suppressed in the event of macro expansions. static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, SourceLocation OpLoc) { if (!S.ActiveTemplateInstantiations.empty()) return; if (OpLoc.isInvalid() || OpLoc.isMacroID()) return; LHSExpr = LHSExpr->IgnoreParenImpCasts(); RHSExpr = RHSExpr->IgnoreParenImpCasts(); const DeclRefExpr *LHSDeclRef = dyn_cast(LHSExpr); const DeclRefExpr *RHSDeclRef = dyn_cast(RHSExpr); if (!LHSDeclRef || !RHSDeclRef || LHSDeclRef->getLocation().isMacroID() || RHSDeclRef->getLocation().isMacroID()) return; const ValueDecl *LHSDecl = cast(LHSDeclRef->getDecl()->getCanonicalDecl()); const ValueDecl *RHSDecl = cast(RHSDeclRef->getDecl()->getCanonicalDecl()); if (LHSDecl != RHSDecl) return; if (LHSDecl->getType().isVolatileQualified()) return; if (const ReferenceType *RefTy = LHSDecl->getType()->getAs()) if (RefTy->getPointeeType().isVolatileQualified()) return; S.Diag(OpLoc, diag::warn_self_assignment) << LHSDeclRef->getType() << LHSExpr->getSourceRange() << RHSExpr->getSourceRange(); } /// Check if a bitwise-& is performed on an Objective-C pointer. This /// is usually indicative of introspection within the Objective-C pointer. static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R, SourceLocation OpLoc) { if (!S.getLangOpts().ObjC1) return; const Expr *ObjCPointerExpr = nullptr, *OtherExpr = nullptr; const Expr *LHS = L.get(); const Expr *RHS = R.get(); if (LHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = LHS; OtherExpr = RHS; } else if (RHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) { ObjCPointerExpr = RHS; OtherExpr = LHS; } // This warning is deliberately made very specific to reduce false // positives with logic that uses '&' for hashing. This logic mainly // looks for code trying to introspect into tagged pointers, which // code should generally never do. if (ObjCPointerExpr && isa(OtherExpr->IgnoreParenCasts())) { unsigned Diag = diag::warn_objc_pointer_masking; // Determine if we are introspecting the result of performSelectorXXX. const Expr *Ex = ObjCPointerExpr->IgnoreParenCasts(); // Special case messages to -performSelector and friends, which // can return non-pointer values boxed in a pointer value. // Some clients may wish to silence warnings in this subcase. if (const ObjCMessageExpr *ME = dyn_cast(Ex)) { Selector S = ME->getSelector(); StringRef SelArg0 = S.getNameForSlot(0); if (SelArg0.startswith("performSelector")) Diag = diag::warn_objc_pointer_masking_performSelector; } S.Diag(OpLoc, Diag) << ObjCPointerExpr->getSourceRange(); } } static NamedDecl *getDeclFromExpr(Expr *E) { if (!E) return nullptr; if (auto *DRE = dyn_cast(E)) return DRE->getDecl(); if (auto *ME = dyn_cast(E)) return ME->getMemberDecl(); if (auto *IRE = dyn_cast(E)) return IRE->getDecl(); return nullptr; } /// CreateBuiltinBinOp - Creates a new built-in binary operation with /// operator @p Opc at location @c TokLoc. This routine only supports /// built-in operations; ActOnBinOp handles overloaded operators. ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { if (getLangOpts().CPlusPlus11 && isa(RHSExpr)) { // The syntax only allows initializer lists on the RHS of assignment, // so we don't need to worry about accepting invalid code for // non-assignment operators. // C++11 5.17p9: // The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning // of x = {} is x = T(). InitializationKind Kind = InitializationKind::CreateDirectList(RHSExpr->getLocStart()); InitializedEntity Entity = InitializedEntity::InitializeTemporary(LHSExpr->getType()); InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr); ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr); if (Init.isInvalid()) return Init; RHSExpr = Init.get(); } ExprResult LHS = LHSExpr, RHS = RHSExpr; QualType ResultTy; // Result type of the binary operator. // The following two variables are used for compound assignment operators QualType CompLHSTy; // Type of LHS after promotions for computation QualType CompResultTy; // Type of computation result ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. LHS = CorrectDelayedTyposInExpr(LHSExpr); RHS = CorrectDelayedTyposInExpr(RHSExpr, [Opc, LHS](Expr *E) { if (Opc != BO_Assign) return ExprResult(E); // Avoid correcting the RHS to the same Expr as the LHS. Decl *D = getDeclFromExpr(E); return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E; }); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); } if (getLangOpts().OpenCL) { // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by // the ATOMIC_VAR_INIT macro. if (LHSExpr->getType()->isAtomicType() || RHSExpr->getType()->isAtomicType()) { SourceRange SR(LHSExpr->getLocStart(), RHSExpr->getLocEnd()); if (BO_Assign == Opc) Diag(OpLoc, diag::err_atomic_init_constant) << SR; else ResultTy = InvalidOperands(OpLoc, LHS, RHS); return ExprError(); } } switch (Opc) { case BO_Assign: ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType()); if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = LHS.get()->getValueKind(); OK = LHS.get()->getObjectKind(); } if (!ResultTy.isNull()) { DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc); DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc); } RecordModifiableNonNullParam(*this, LHS.get()); break; case BO_PtrMemD: case BO_PtrMemI: ResultTy = CheckPointerToMemberOperands(LHS, RHS, VK, OpLoc, Opc == BO_PtrMemI); break; case BO_Mul: case BO_Div: ResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, false, Opc == BO_Div); break; case BO_Rem: ResultTy = CheckRemainderOperands(LHS, RHS, OpLoc); break; case BO_Add: ResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc); break; case BO_Sub: ResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc); break; case BO_Shl: case BO_Shr: ResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc); break; case BO_LE: case BO_LT: case BO_GE: case BO_GT: ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, true); break; case BO_EQ: case BO_NE: ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc, false); break; case BO_And: checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc); case BO_Xor: case BO_Or: ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc); break; case BO_LAnd: case BO_LOr: ResultTy = CheckLogicalOperands(LHS, RHS, OpLoc, Opc); break; case BO_MulAssign: case BO_DivAssign: CompResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, true, Opc == BO_DivAssign); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_RemAssign: CompResultTy = CheckRemainderOperands(LHS, RHS, OpLoc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_AddAssign: CompResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_SubAssign: CompResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc, &CompLHSTy); if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_ShlAssign: case BO_ShrAssign: CompResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_AndAssign: case BO_OrAssign: // fallthrough DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc); case BO_XorAssign: CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, true); CompLHSTy = CompResultTy; if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid()) ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy); break; case BO_Comma: ResultTy = CheckCommaOperands(*this, LHS, RHS, OpLoc); if (getLangOpts().CPlusPlus && !RHS.isInvalid()) { VK = RHS.get()->getValueKind(); OK = RHS.get()->getObjectKind(); } break; } if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid()) return ExprError(); // Check for array bounds violations for both sides of the BinaryOperator CheckArrayAccess(LHS.get()); CheckArrayAccess(RHS.get()); if (const ObjCIsaExpr *OISA = dyn_cast(LHS.get()->IgnoreParenCasts())) { NamedDecl *ObjectSetClass = LookupSingleName(TUScope, &Context.Idents.get("object_setClass"), SourceLocation(), LookupOrdinaryName); if (ObjectSetClass && isa(LHS.get())) { SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getLocEnd()); Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign) << FixItHint::CreateInsertion(LHS.get()->getLocStart(), "object_setClass(") << FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc), ",") << FixItHint::CreateInsertion(RHSLocEnd, ")"); } else Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign); } else if (const ObjCIvarRefExpr *OIRE = dyn_cast(LHS.get()->IgnoreParenCasts())) DiagnoseDirectIsaAccess(*this, OIRE, OpLoc, RHS.get()); if (CompResultTy.isNull()) return new (Context) BinaryOperator(LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc, FPFeatures.fp_contract); if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() != OK_ObjCProperty) { VK = VK_LValue; OK = LHS.get()->getObjectKind(); } return new (Context) CompoundAssignOperator( LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, CompLHSTy, CompResultTy, OpLoc, FPFeatures.fp_contract); } /// DiagnoseBitwisePrecedence - Emit a warning when bitwise and comparison /// operators are mixed in a way that suggests that the programmer forgot that /// comparison operators have higher precedence. The most typical example of /// such code is "flags & 0x0020 != 0", which is equivalent to "flags & 1". static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperator *LHSBO = dyn_cast(LHSExpr); BinaryOperator *RHSBO = dyn_cast(RHSExpr); // Check that one of the sides is a comparison operator and the other isn't. bool isLeftComp = LHSBO && LHSBO->isComparisonOp(); bool isRightComp = RHSBO && RHSBO->isComparisonOp(); if (isLeftComp == isRightComp) return; // Bitwise operations are sometimes used as eager logical ops. // Don't diagnose this. bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp(); bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp(); if (isLeftBitwise || isRightBitwise) return; SourceRange DiagRange = isLeftComp ? SourceRange(LHSExpr->getLocStart(), OpLoc) : SourceRange(OpLoc, RHSExpr->getLocEnd()); StringRef OpStr = isLeftComp ? LHSBO->getOpcodeStr() : RHSBO->getOpcodeStr(); SourceRange ParensRange = isLeftComp ? SourceRange(LHSBO->getRHS()->getLocStart(), RHSExpr->getLocEnd()) : SourceRange(LHSExpr->getLocStart(), RHSBO->getLHS()->getLocEnd()); Self.Diag(OpLoc, diag::warn_precedence_bitwise_rel) << DiagRange << BinaryOperator::getOpcodeStr(Opc) << OpStr; SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_silence) << OpStr, (isLeftComp ? LHSExpr : RHSExpr)->getSourceRange()); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_bitwise_first) << BinaryOperator::getOpcodeStr(Opc), ParensRange); } /// \brief It accepts a '&&' expr that is inside a '||' one. /// Emit a diagnostic together with a fixit hint that wraps the '&&' expression /// in parentheses. static void EmitDiagnosticForLogicalAndInLogicalOr(Sema &Self, SourceLocation OpLoc, BinaryOperator *Bop) { assert(Bop->getOpcode() == BO_LAnd); Self.Diag(Bop->getOperatorLoc(), diag::warn_logical_and_in_logical_or) << Bop->getSourceRange() << OpLoc; SuggestParentheses(Self, Bop->getOperatorLoc(), Self.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } /// \brief Returns true if the given expression can be evaluated as a constant /// 'true'. static bool EvaluatesAsTrue(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && Res; } /// \brief Returns true if the given expression can be evaluated as a constant /// 'false'. static bool EvaluatesAsFalse(Sema &S, Expr *E) { bool Res; return !E->isValueDependent() && E->EvaluateAsBooleanCondition(Res, S.getASTContext()) && !Res; } /// \brief Look for '&&' in the left hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrLHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(LHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "a && b || 0" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, RHSExpr)) return; // If it's "1 && a || b" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getLHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } else if (Bop->getOpcode() == BO_LOr) { if (BinaryOperator *RBop = dyn_cast(Bop->getRHS())) { // If it's "a || b && 1 || c" we didn't warn earlier for // "a || b && 1", but warn now. if (RBop->getOpcode() == BO_LAnd && EvaluatesAsTrue(S, RBop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, RBop); } } } } /// \brief Look for '&&' in the right hand of a '||' expr. static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { if (BinaryOperator *Bop = dyn_cast(RHSExpr)) { if (Bop->getOpcode() == BO_LAnd) { // If it's "0 || a && b" don't warn since the precedence doesn't matter. if (EvaluatesAsFalse(S, LHSExpr)) return; // If it's "a || b && 1" don't warn since the precedence doesn't matter. if (!EvaluatesAsTrue(S, Bop->getRHS())) return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop); } } } /// \brief Look for bitwise op in the left or right hand of a bitwise op with /// lower precedence and emit a diagnostic together with a fixit hint that wraps /// the '&' expression in parentheses. static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *SubExpr) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) { S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op) << Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc) << Bop->getSourceRange() << OpLoc; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Bop->getOpcodeStr(), Bop->getSourceRange()); } } } static void DiagnoseAdditionInShift(Sema &S, SourceLocation OpLoc, Expr *SubExpr, StringRef Shift) { if (BinaryOperator *Bop = dyn_cast(SubExpr)) { if (Bop->getOpcode() == BO_Add || Bop->getOpcode() == BO_Sub) { StringRef Op = Bop->getOpcodeStr(); S.Diag(Bop->getOperatorLoc(), diag::warn_addition_in_bitshift) << Bop->getSourceRange() << OpLoc << Shift << Op; SuggestParentheses(S, Bop->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << Op, Bop->getSourceRange()); } } } static void DiagnoseShiftCompare(Sema &S, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr) { CXXOperatorCallExpr *OCE = dyn_cast(LHSExpr); if (!OCE) return; FunctionDecl *FD = OCE->getDirectCallee(); if (!FD || !FD->isOverloadedOperator()) return; OverloadedOperatorKind Kind = FD->getOverloadedOperator(); if (Kind != OO_LessLess && Kind != OO_GreaterGreater) return; S.Diag(OpLoc, diag::warn_overloaded_shift_in_comparison) << LHSExpr->getSourceRange() << RHSExpr->getSourceRange() << (Kind == OO_LessLess); SuggestParentheses(S, OCE->getOperatorLoc(), S.PDiag(diag::note_precedence_silence) << (Kind == OO_LessLess ? "<<" : ">>"), OCE->getSourceRange()); SuggestParentheses(S, OpLoc, S.PDiag(diag::note_evaluate_comparison_first), SourceRange(OCE->getArg(1)->getLocStart(), RHSExpr->getLocEnd())); } /// DiagnoseBinOpPrecedence - Emit warnings for expressions with tricky /// precedence. static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc, SourceLocation OpLoc, Expr *LHSExpr, Expr *RHSExpr){ // Diagnose "arg1 'bitwise' arg2 'eq' arg3". if (BinaryOperator::isBitwiseOp(Opc)) DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr); // Diagnose "arg1 & arg2 | arg3" if ((Opc == BO_Or || Opc == BO_Xor) && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr); DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr); } // Warn about arg1 || arg2 && arg3, as GCC 4.3+ does. // We don't warn for 'assert(a || b && "bad")' since this is safe. if (Opc == BO_LOr && !OpLoc.isMacroID()/* Don't warn in macros. */) { DiagnoseLogicalAndInLogicalOrLHS(Self, OpLoc, LHSExpr, RHSExpr); DiagnoseLogicalAndInLogicalOrRHS(Self, OpLoc, LHSExpr, RHSExpr); } if ((Opc == BO_Shl && LHSExpr->getType()->isIntegralType(Self.getASTContext())) || Opc == BO_Shr) { StringRef Shift = BinaryOperator::getOpcodeStr(Opc); DiagnoseAdditionInShift(Self, OpLoc, LHSExpr, Shift); DiagnoseAdditionInShift(Self, OpLoc, RHSExpr, Shift); } // Warn on overloaded shift operators and comparisons, such as: // cout << 5 == 4; if (BinaryOperator::isComparisonOp(Opc)) DiagnoseShiftCompare(Self, OpLoc, LHSExpr, RHSExpr); } // Binary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnBinOp(Scope *S, SourceLocation TokLoc, tok::TokenKind Kind, Expr *LHSExpr, Expr *RHSExpr) { BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Kind); assert(LHSExpr && "ActOnBinOp(): missing left expression"); assert(RHSExpr && "ActOnBinOp(): missing right expression"); // Emit warnings for tricky precedence issues, e.g. "bitfield & 0x4 == 0" DiagnoseBinOpPrecedence(*this, Opc, TokLoc, LHSExpr, RHSExpr); return BuildBinOp(S, TokLoc, Opc, LHSExpr, RHSExpr); } /// Build an overloaded binary operator expression in the given scope. static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHS, Expr *RHS) { // Find all of the overloaded operators visible from this // point. We perform both an operator-name lookup from the local // scope and an argument-dependent lookup based on the types of // the arguments. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = BinaryOperator::getOverloadedOperator(Opc); if (Sc && OverOp != OO_None && OverOp != OO_Equal) S.LookupOverloadedOperatorName(OverOp, Sc, LHS->getType(), RHS->getType(), Functions); // Build the (potentially-overloaded, potentially-dependent) // binary operation. return S.CreateOverloadedBinOp(OpLoc, Opc, Functions, LHS, RHS); } ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr) { // We want to end up calling one of checkPseudoObjectAssignment // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if // both expressions are overloadable or either is type-dependent), // or CreateBuiltinBinOp (in any other case). We also want to get // any placeholder types out of the way. // Handle pseudo-objects in the LHS. if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) { // Assignments with a pseudo-object l-value need special analysis. if (pty->getKind() == BuiltinType::PseudoObject && BinaryOperator::isAssignmentOp(Opc)) return checkPseudoObjectAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr); // Don't resolve overloads if the other type is overloadable. if (pty->getKind() == BuiltinType::Overload) { // We can't actually test that if we still have a placeholder, // though. Fortunately, none of the exceptions we see in that // code below are valid when the LHS is an overload set. Note // that an overload set can be dependently-typed, but it never // instantiates to having an overloadable type. ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (resolvedRHS.isInvalid()) return ExprError(); RHSExpr = resolvedRHS.get(); if (RHSExpr->isTypeDependent() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } ExprResult LHS = CheckPlaceholderExpr(LHSExpr); if (LHS.isInvalid()) return ExprError(); LHSExpr = LHS.get(); } // Handle pseudo-objects in the RHS. if (const BuiltinType *pty = RHSExpr->getType()->getAsPlaceholderType()) { // An overload in the RHS can potentially be resolved by the type // being assigned to. if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) { if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); if (LHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } // Don't resolve overloads if the other type is overloadable. if (pty->getKind() == BuiltinType::Overload && LHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr); if (!resolvedRHS.isUsable()) return ExprError(); RHSExpr = resolvedRHS.get(); } if (getLangOpts().CPlusPlus) { // If either expression is type-dependent, always build an // overloaded op. if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); // Otherwise, build an overloaded op if either expression has an // overloadable type. if (LHSExpr->getType()->isOverloadableType() || RHSExpr->getType()->isOverloadableType()) return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr); } // Build a built-in binary operation. return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr); } ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *InputExpr) { ExprResult Input = InputExpr; ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType resultType; if (getLangOpts().OpenCL) { // The only legal unary operation for atomics is '&'. if (Opc != UO_AddrOf && InputExpr->getType()->isAtomicType()) { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << InputExpr->getType() << Input.get()->getSourceRange()); } } switch (Opc) { case UO_PreInc: case UO_PreDec: case UO_PostInc: case UO_PostDec: resultType = CheckIncrementDecrementOperand(*this, Input.get(), VK, OK, OpLoc, Opc == UO_PreInc || Opc == UO_PostInc, Opc == UO_PreInc || Opc == UO_PreDec); break; case UO_AddrOf: resultType = CheckAddressOfOperand(Input, OpLoc); RecordModifiableNonNullParam(*this, InputExpr); break; case UO_Deref: { Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = CheckIndirectionOperand(*this, Input.get(), VK, OpLoc); break; } case UO_Plus: case UO_Minus: Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; if (resultType->isArithmeticType()) // C99 6.5.3.3p1 break; else if (resultType->isVectorType() && // The z vector extensions don't allow + or - with bool vectors. (!Context.getLangOpts().ZVector || resultType->getAs()->getVectorKind() != VectorType::AltiVecBool)) break; else if (getLangOpts().CPlusPlus && // C++ [expr.unary.op]p6 Opc == UO_Plus && resultType->isPointerType()) break; return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); case UO_Not: // bitwise complement Input = UsualUnaryConversions(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); if (resultType->isDependentType()) break; // C99 6.5.3.3p1. We allow complex int and float as a GCC extension. if (resultType->isComplexType() || resultType->isComplexIntegerType()) // C99 does not support '~' for complex conjugation. Diag(OpLoc, diag::ext_integer_complement_complex) << resultType << Input.get()->getSourceRange(); else if (resultType->hasIntegerRepresentation()) break; else if (resultType->isExtVectorType()) { if (Context.getLangOpts().OpenCL) { // OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate // on vector float types. QualType T = resultType->getAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } break; } else { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } break; case UO_LNot: // logical negation // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5). Input = DefaultFunctionArrayLvalueConversion(Input.get()); if (Input.isInvalid()) return ExprError(); resultType = Input.get()->getType(); // Though we still have to promote half FP to float... if (resultType->isHalfType() && !Context.getLangOpts().NativeHalfType) { Input = ImpCastExprToType(Input.get(), Context.FloatTy, CK_FloatingCast).get(); resultType = Context.FloatTy; } if (resultType->isDependentType()) break; if (resultType->isScalarType() && !isScopedEnumerationType(resultType)) { // C99 6.5.3.3p1: ok, fallthrough; if (Context.getLangOpts().CPlusPlus) { // C++03 [expr.unary.op]p8, C++0x [expr.unary.op]p9: // operand contextually converted to bool. Input = ImpCastExprToType(Input.get(), Context.BoolTy, ScalarTypeToBooleanCastKind(resultType)); } else if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on scalar float types. if (!resultType->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } } else if (resultType->isExtVectorType()) { if (Context.getLangOpts().OpenCL && Context.getLangOpts().OpenCLVersion < 120) { // OpenCL v1.1 6.3.h: The logical operator not (!) does not // operate on vector float types. QualType T = resultType->getAs()->getElementType(); if (!T->isIntegerType()) return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // Vector logical not returns the signed variant of the operand type. resultType = GetSignedVectorType(resultType); break; } else { return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr) << resultType << Input.get()->getSourceRange()); } // LNot always has type int. C99 6.5.3.3p5. // In C++, it's bool. C++ 5.3.1p8 resultType = Context.getLogicalOperationType(); break; case UO_Real: case UO_Imag: resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real); // _Real maps ordinary l-values into ordinary l-values. _Imag maps ordinary // complex l-values to ordinary l-values and all other values to r-values. if (Input.isInvalid()) return ExprError(); if (Opc == UO_Real || Input.get()->getType()->isAnyComplexType()) { if (Input.get()->getValueKind() != VK_RValue && Input.get()->getObjectKind() == OK_Ordinary) VK = Input.get()->getValueKind(); } else if (!getLangOpts().CPlusPlus) { // In C, a volatile scalar is read by __imag. In C++, it is not. Input = DefaultLvalueConversion(Input.get()); } break; case UO_Extension: case UO_Coawait: resultType = Input.get()->getType(); VK = Input.get()->getValueKind(); OK = Input.get()->getObjectKind(); break; } if (resultType.isNull() || Input.isInvalid()) return ExprError(); // Check for array bounds violations in the operand of the UnaryOperator, // except for the '*' and '&' operators that have to be handled specially // by CheckArrayAccess (as there are special cases like &array[arraysize] // that are explicitly defined as valid by the standard). if (Opc != UO_AddrOf && Opc != UO_Deref) CheckArrayAccess(Input.get()); return new (Context) UnaryOperator(Input.get(), Opc, resultType, VK, OK, OpLoc); } /// \brief Determine whether the given expression is a qualified member /// access expression, of a form that could be turned into a pointer to member /// with the address-of operator. static bool isQualifiedMemberAccess(Expr *E) { if (DeclRefExpr *DRE = dyn_cast(E)) { if (!DRE->getQualifier()) return false; ValueDecl *VD = DRE->getDecl(); if (!VD->isCXXClassMember()) return false; if (isa(VD) || isa(VD)) return true; if (CXXMethodDecl *Method = dyn_cast(VD)) return Method->isInstance(); return false; } if (UnresolvedLookupExpr *ULE = dyn_cast(E)) { if (!ULE->getQualifier()) return false; for (NamedDecl *D : ULE->decls()) { if (CXXMethodDecl *Method = dyn_cast(D)) { if (Method->isInstance()) return true; } else { // Overload set does not contain methods. break; } } return false; } return false; } ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc, UnaryOperatorKind Opc, Expr *Input) { // First things first: handle placeholders so that the // overloaded-operator check considers the right type. if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) { // Increment and decrement of pseudo-object references. if (pty->getKind() == BuiltinType::PseudoObject && UnaryOperator::isIncrementDecrementOp(Opc)) return checkPseudoObjectIncDec(S, OpLoc, Opc, Input); // extension is always a builtin operator. if (Opc == UO_Extension) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // & gets special logic for several kinds of placeholder. // The builtin code knows what to do. if (Opc == UO_AddrOf && (pty->getKind() == BuiltinType::Overload || pty->getKind() == BuiltinType::UnknownAny || pty->getKind() == BuiltinType::BoundMember)) return CreateBuiltinUnaryOp(OpLoc, Opc, Input); // Anything else needs to be handled now. ExprResult Result = CheckPlaceholderExpr(Input); if (Result.isInvalid()) return ExprError(); Input = Result.get(); } if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() && UnaryOperator::getOverloadedOperator(Opc) != OO_None && !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) { // Find all of the overloaded operators visible from this // point. We perform both an operator-name lookup from the local // scope and an argument-dependent lookup based on the types of // the arguments. UnresolvedSet<16> Functions; OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc); if (S && OverOp != OO_None) LookupOverloadedOperatorName(OverOp, S, Input->getType(), QualType(), Functions); return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input); } return CreateBuiltinUnaryOp(OpLoc, Opc, Input); } // Unary Operators. 'Tok' is the token for the operator. ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op, Expr *Input) { return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input); } /// ActOnAddrLabel - Parse the GNU address of label extension: "&&foo". ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, LabelDecl *TheDecl) { TheDecl->markUsed(Context); // Create the AST node. The address of a label always has type 'void*'. return new (Context) AddrLabelExpr(OpLoc, LabLoc, TheDecl, Context.getPointerType(Context.VoidTy)); } /// Given the last statement in a statement-expression, check whether /// the result is a producing expression (like a call to an /// ns_returns_retained function) and, if so, rebuild it to hoist the /// release out of the full-expression. Otherwise, return null. /// Cannot fail. static Expr *maybeRebuildARCConsumingStmt(Stmt *Statement) { // Should always be wrapped with one of these. ExprWithCleanups *cleanups = dyn_cast(Statement); if (!cleanups) return nullptr; ImplicitCastExpr *cast = dyn_cast(cleanups->getSubExpr()); if (!cast || cast->getCastKind() != CK_ARCConsumeObject) return nullptr; // Splice out the cast. This shouldn't modify any interesting // features of the statement. Expr *producer = cast->getSubExpr(); assert(producer->getType() == cast->getType()); assert(producer->getValueKind() == cast->getValueKind()); cleanups->setSubExpr(producer); return cleanups; } void Sema::ActOnStartStmtExpr() { PushExpressionEvaluationContext(ExprEvalContexts.back().Context); } void Sema::ActOnStmtExprError() { // Note that function is also called by TreeTransform when leaving a // StmtExpr scope without rebuilding anything. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); } ExprResult Sema::ActOnStmtExpr(SourceLocation LPLoc, Stmt *SubStmt, SourceLocation RPLoc) { // "({..})" assert(SubStmt && isa(SubStmt) && "Invalid action invocation!"); CompoundStmt *Compound = cast(SubStmt); if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!ExprNeedsCleanups && "cleanups within StmtExpr not correctly bound!"); PopExpressionEvaluationContext(); // FIXME: there are a variety of strange constraints to enforce here, for // example, it is not possible to goto into a stmt expression apparently. // More semantic analysis is needed. // If there are sub-stmts in the compound stmt, take the type of the last one // as the type of the stmtexpr. QualType Ty = Context.VoidTy; bool StmtExprMayBindToTemp = false; if (!Compound->body_empty()) { Stmt *LastStmt = Compound->body_back(); LabelStmt *LastLabelStmt = nullptr; // If LastStmt is a label, skip down through into the body. while (LabelStmt *Label = dyn_cast(LastStmt)) { LastLabelStmt = Label; LastStmt = Label->getSubStmt(); } if (Expr *LastE = dyn_cast(LastStmt)) { // Do function/array conversion on the last expression, but not // lvalue-to-rvalue. However, initialize an unqualified type. ExprResult LastExpr = DefaultFunctionArrayConversion(LastE); if (LastExpr.isInvalid()) return ExprError(); Ty = LastExpr.get()->getType().getUnqualifiedType(); if (!Ty->isDependentType() && !LastExpr.get()->isTypeDependent()) { // In ARC, if the final expression ends in a consume, splice // the consume out and bind it later. In the alternate case // (when dealing with a retainable type), the result // initialization will create a produce. In both cases the // result will be +1, and we'll need to balance that out with // a bind. if (Expr *rebuiltLastStmt = maybeRebuildARCConsumingStmt(LastExpr.get())) { LastExpr = rebuiltLastStmt; } else { LastExpr = PerformCopyInitialization( InitializedEntity::InitializeResult(LPLoc, Ty, false), SourceLocation(), LastExpr); } if (LastExpr.isInvalid()) return ExprError(); if (LastExpr.get() != nullptr) { if (!LastLabelStmt) Compound->setLastStmt(LastExpr.get()); else LastLabelStmt->setSubStmt(LastExpr.get()); StmtExprMayBindToTemp = true; } } } } // FIXME: Check that expression type is complete/non-abstract; statement // expressions are not lvalues. Expr *ResStmtExpr = new (Context) StmtExpr(Compound, Ty, LPLoc, RPLoc); if (StmtExprMayBindToTemp) return MaybeBindToTemporary(ResStmtExpr); return ResStmtExpr; } ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc, TypeSourceInfo *TInfo, ArrayRef Components, SourceLocation RParenLoc) { QualType ArgTy = TInfo->getType(); bool Dependent = ArgTy->isDependentType(); SourceRange TypeRange = TInfo->getTypeLoc().getLocalSourceRange(); // We must have at least one component that refers to the type, and the first // one is known to be a field designator. Verify that the ArgTy represents // a struct/union/class. if (!Dependent && !ArgTy->isRecordType()) return ExprError(Diag(BuiltinLoc, diag::err_offsetof_record_type) << ArgTy << TypeRange); // Type must be complete per C99 7.17p3 because a declaring a variable // with an incomplete type would be ill-formed. if (!Dependent && RequireCompleteType(BuiltinLoc, ArgTy, diag::err_offsetof_incomplete_type, TypeRange)) return ExprError(); // offsetof with non-identifier designators (e.g. "offsetof(x, a.b[c])") are a // GCC extension, diagnose them. // FIXME: This diagnostic isn't actually visible because the location is in // a system header! if (Components.size() != 1) Diag(BuiltinLoc, diag::ext_offsetof_extended_field_designator) << SourceRange(Components[1].LocStart, Components.back().LocEnd); bool DidWarnAboutNonPOD = false; QualType CurrentType = ArgTy; SmallVector Comps; SmallVector Exprs; for (const OffsetOfComponent &OC : Components) { if (OC.isBrackets) { // Offset of an array sub-field. TODO: Should we allow vector elements? if (!CurrentType->isDependentType()) { const ArrayType *AT = Context.getAsArrayType(CurrentType); if(!AT) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_array_type) << CurrentType); CurrentType = AT->getElementType(); } else CurrentType = Context.DependentTy; ExprResult IdxRval = DefaultLvalueConversion(static_cast(OC.U.E)); if (IdxRval.isInvalid()) return ExprError(); Expr *Idx = IdxRval.get(); // The expression must be an integral expression. // FIXME: An integral constant expression? if (!Idx->isTypeDependent() && !Idx->isValueDependent() && !Idx->getType()->isIntegerType()) return ExprError(Diag(Idx->getLocStart(), diag::err_typecheck_subscript_not_integer) << Idx->getSourceRange()); // Record this array index. Comps.push_back(OffsetOfNode(OC.LocStart, Exprs.size(), OC.LocEnd)); Exprs.push_back(Idx); continue; } // Offset of a field. if (CurrentType->isDependentType()) { // We have the offset of a field, but we can't look into the dependent // type. Just record the identifier of the field. Comps.push_back(OffsetOfNode(OC.LocStart, OC.U.IdentInfo, OC.LocEnd)); CurrentType = Context.DependentTy; continue; } // We need to have a complete type to look into. if (RequireCompleteType(OC.LocStart, CurrentType, diag::err_offsetof_incomplete_type)) return ExprError(); // Look for the designated field. const RecordType *RC = CurrentType->getAs(); if (!RC) return ExprError(Diag(OC.LocEnd, diag::err_offsetof_record_type) << CurrentType); RecordDecl *RD = RC->getDecl(); // C++ [lib.support.types]p5: // The macro offsetof accepts a restricted set of type arguments in this // International Standard. type shall be a POD structure or a POD union // (clause 9). // C++11 [support.types]p4: // If type is not a standard-layout class (Clause 9), the results are // undefined. if (CXXRecordDecl *CRD = dyn_cast(RD)) { bool IsSafe = LangOpts.CPlusPlus11? CRD->isStandardLayout() : CRD->isPOD(); unsigned DiagID = LangOpts.CPlusPlus11? diag::ext_offsetof_non_standardlayout_type : diag::ext_offsetof_non_pod_type; if (!IsSafe && !DidWarnAboutNonPOD && DiagRuntimeBehavior(BuiltinLoc, nullptr, PDiag(DiagID) << SourceRange(Components[0].LocStart, OC.LocEnd) << CurrentType)) DidWarnAboutNonPOD = true; } // Look for the field. LookupResult R(*this, OC.U.IdentInfo, OC.LocStart, LookupMemberName); LookupQualifiedName(R, RD); FieldDecl *MemberDecl = R.getAsSingle(); IndirectFieldDecl *IndirectMemberDecl = nullptr; if (!MemberDecl) { if ((IndirectMemberDecl = R.getAsSingle())) MemberDecl = IndirectMemberDecl->getAnonField(); } if (!MemberDecl) return ExprError(Diag(BuiltinLoc, diag::err_no_member) << OC.U.IdentInfo << RD << SourceRange(OC.LocStart, OC.LocEnd)); // C99 7.17p3: // (If the specified member is a bit-field, the behavior is undefined.) // // We diagnose this as an error. if (MemberDecl->isBitField()) { Diag(OC.LocEnd, diag::err_offsetof_bitfield) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); Diag(MemberDecl->getLocation(), diag::note_bitfield_decl); return ExprError(); } RecordDecl *Parent = MemberDecl->getParent(); if (IndirectMemberDecl) Parent = cast(IndirectMemberDecl->getDeclContext()); // If the member was found in a base class, introduce OffsetOfNodes for // the base class indirections. CXXBasePaths Paths; if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent), Paths)) { if (Paths.getDetectedVirtual()) { Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base) << MemberDecl->getDeclName() << SourceRange(BuiltinLoc, RParenLoc); return ExprError(); } CXXBasePath &Path = Paths.front(); for (const CXXBasePathElement &B : Path) Comps.push_back(OffsetOfNode(B.Base)); } if (IndirectMemberDecl) { for (auto *FI : IndirectMemberDecl->chain()) { assert(isa(FI)); Comps.push_back(OffsetOfNode(OC.LocStart, cast(FI), OC.LocEnd)); } } else Comps.push_back(OffsetOfNode(OC.LocStart, MemberDecl, OC.LocEnd)); CurrentType = MemberDecl->getType().getNonReferenceType(); } return OffsetOfExpr::Create(Context, Context.getSizeType(), BuiltinLoc, TInfo, Comps, Exprs, RParenLoc); } ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S, SourceLocation BuiltinLoc, SourceLocation TypeLoc, ParsedType ParsedArgTy, ArrayRef Components, SourceLocation RParenLoc) { TypeSourceInfo *ArgTInfo; QualType ArgTy = GetTypeFromParser(ParsedArgTy, &ArgTInfo); if (ArgTy.isNull()) return ExprError(); if (!ArgTInfo) ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc); return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc); } ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc, Expr *CondExpr, Expr *LHSExpr, Expr *RHSExpr, SourceLocation RPLoc) { assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)"); ExprValueKind VK = VK_RValue; ExprObjectKind OK = OK_Ordinary; QualType resType; bool ValueDependent = false; bool CondIsTrue = false; if (CondExpr->isTypeDependent() || CondExpr->isValueDependent()) { resType = Context.DependentTy; ValueDependent = true; } else { // The conditional expression is required to be a constant expression. llvm::APSInt condEval(32); ExprResult CondICE = VerifyIntegerConstantExpression(CondExpr, &condEval, diag::err_typecheck_choose_expr_requires_constant, false); if (CondICE.isInvalid()) return ExprError(); CondExpr = CondICE.get(); CondIsTrue = condEval.getZExtValue(); // If the condition is > zero, then the AST type is the same as the LSHExpr. Expr *ActiveExpr = CondIsTrue ? LHSExpr : RHSExpr; resType = ActiveExpr->getType(); ValueDependent = ActiveExpr->isValueDependent(); VK = ActiveExpr->getValueKind(); OK = ActiveExpr->getObjectKind(); } return new (Context) ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr, resType, VK, OK, RPLoc, CondIsTrue, resType->isDependentType(), ValueDependent); } //===----------------------------------------------------------------------===// // Clang Extensions. //===----------------------------------------------------------------------===// /// ActOnBlockStart - This callback is invoked when a block literal is started. void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) { BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc); if (LangOpts.CPlusPlus) { Decl *ManglingContextDecl; if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext(Block->getDeclContext(), ManglingContextDecl)) { unsigned ManglingNumber = MCtx->getManglingNumber(Block); Block->setBlockMangling(ManglingNumber, ManglingContextDecl); } } PushBlockScope(CurScope, Block); CurContext->addDecl(Block); if (CurScope) PushDeclContext(CurScope, Block); else CurContext = Block; getCurBlock()->HasImplicitReturnType = true; // Enter a new evaluation context to insulate the block from any // cleanups from the enclosing full-expression. PushExpressionEvaluationContext(PotentiallyEvaluated); } void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo, Scope *CurScope) { assert(ParamInfo.getIdentifier() == nullptr && "block-id should have no identifier!"); assert(ParamInfo.getContext() == Declarator::BlockLiteralContext); BlockScopeInfo *CurBlock = getCurBlock(); TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope); QualType T = Sig->getType(); // FIXME: We should allow unexpanded parameter packs here, but that would, // in turn, make the block expression contain unexpanded parameter packs. if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) { // Drop the parameters. FunctionProtoType::ExtProtoInfo EPI; EPI.HasTrailingReturn = false; EPI.TypeQuals |= DeclSpec::TQ_const; T = Context.getFunctionType(Context.DependentTy, None, EPI); Sig = Context.getTrivialTypeSourceInfo(T); } // GetTypeForDeclarator always produces a function type for a block // literal signature. Furthermore, it is always a FunctionProtoType // unless the function was written with a typedef. assert(T->isFunctionType() && "GetTypeForDeclarator made a non-function block signature"); // Look for an explicit signature in that function type. FunctionProtoTypeLoc ExplicitSignature; TypeLoc tmp = Sig->getTypeLoc().IgnoreParens(); if ((ExplicitSignature = tmp.getAs())) { // Check whether that explicit signature was synthesized by // GetTypeForDeclarator. If so, don't save that as part of the // written signature. if (ExplicitSignature.getLocalRangeBegin() == ExplicitSignature.getLocalRangeEnd()) { // This would be much cheaper if we stored TypeLocs instead of // TypeSourceInfos. TypeLoc Result = ExplicitSignature.getReturnLoc(); unsigned Size = Result.getFullDataSize(); Sig = Context.CreateTypeSourceInfo(Result.getType(), Size); Sig->getTypeLoc().initializeFullCopy(Result, Size); ExplicitSignature = FunctionProtoTypeLoc(); } } CurBlock->TheDecl->setSignatureAsWritten(Sig); CurBlock->FunctionType = T; const FunctionType *Fn = T->getAs(); QualType RetTy = Fn->getReturnType(); bool isVariadic = (isa(Fn) && cast(Fn)->isVariadic()); CurBlock->TheDecl->setIsVariadic(isVariadic); // Context.DependentTy is used as a placeholder for a missing block // return type. TODO: what should we do with declarators like: // ^ * { ... } // If the answer is "apply template argument deduction".... if (RetTy != Context.DependentTy) { CurBlock->ReturnType = RetTy; CurBlock->TheDecl->setBlockMissingReturnType(false); CurBlock->HasImplicitReturnType = false; } // Push block parameters from the declarator if we had them. SmallVector Params; if (ExplicitSignature) { for (unsigned I = 0, E = ExplicitSignature.getNumParams(); I != E; ++I) { ParmVarDecl *Param = ExplicitSignature.getParam(I); if (Param->getIdentifier() == nullptr && !Param->isImplicit() && !Param->isInvalidDecl() && !getLangOpts().CPlusPlus) Diag(Param->getLocation(), diag::err_parameter_name_omitted); Params.push_back(Param); } // Fake up parameter variables if we have a typedef, like // ^ fntype { ... } } else if (const FunctionProtoType *Fn = T->getAs()) { for (const auto &I : Fn->param_types()) { ParmVarDecl *Param = BuildParmVarDeclForTypedef( CurBlock->TheDecl, ParamInfo.getLocStart(), I); Params.push_back(Param); } } // Set the parameters on the block decl. if (!Params.empty()) { CurBlock->TheDecl->setParams(Params); CheckParmsForFunctionDef(CurBlock->TheDecl->param_begin(), CurBlock->TheDecl->param_end(), /*CheckParameterNames=*/false); } // Finally we can process decl attributes. ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo); // Put the parameter variables in scope. for (auto AI : CurBlock->TheDecl->params()) { AI->setOwningFunction(CurBlock->TheDecl); // If this has an identifier, add it to the scope stack. if (AI->getIdentifier()) { CheckShadow(CurBlock->TheScope, AI); PushOnScopeChains(AI, CurBlock->TheScope); } } } /// ActOnBlockError - If there is an error parsing a block, this callback /// is invoked to pop the information about the block from the action impl. void Sema::ActOnBlockError(SourceLocation CaretLoc, Scope *CurScope) { // Leave the expression-evaluation context. DiscardCleanupsInEvaluationContext(); PopExpressionEvaluationContext(); // Pop off CurBlock, handle nested blocks. PopDeclContext(); PopFunctionScopeInfo(); } /// ActOnBlockStmtExpr - This is called when the body of a block statement /// literal was successfully completed. ^(int x){...} ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, Stmt *Body, Scope *CurScope) { // If blocks are disabled, emit an error. if (!LangOpts.Blocks) Diag(CaretLoc, diag::err_blocks_disable); // Leave the expression-evaluation context. if (hasAnyUnrecoverableErrorsInThisFunction()) DiscardCleanupsInEvaluationContext(); assert(!ExprNeedsCleanups && "cleanups within block not correctly bound!"); PopExpressionEvaluationContext(); BlockScopeInfo *BSI = cast(FunctionScopes.back()); if (BSI->HasImplicitReturnType) deduceClosureReturnType(*BSI); PopDeclContext(); QualType RetTy = Context.VoidTy; if (!BSI->ReturnType.isNull()) RetTy = BSI->ReturnType; bool NoReturn = BSI->TheDecl->hasAttr(); QualType BlockTy; // Set the captured variables on the block. // FIXME: Share capture structure between BlockDecl and CapturingScopeInfo! SmallVector Captures; for (CapturingScopeInfo::Capture &Cap : BSI->Captures) { if (Cap.isThisCapture()) continue; BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(), Cap.isNested(), Cap.getInitExpr()); Captures.push_back(NewCap); } BSI->TheDecl->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0); // If the user wrote a function type in some form, try to use that. if (!BSI->FunctionType.isNull()) { const FunctionType *FTy = BSI->FunctionType->getAs(); FunctionType::ExtInfo Ext = FTy->getExtInfo(); if (NoReturn && !Ext.getNoReturn()) Ext = Ext.withNoReturn(true); // Turn protoless block types into nullary block types. if (isa(FTy)) { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, None, EPI); // Otherwise, if we don't need to change anything about the function type, // preserve its sugar structure. } else if (FTy->getReturnType() == RetTy && (!NoReturn || FTy->getNoReturnAttr())) { BlockTy = BSI->FunctionType; // Otherwise, make the minimal modifications to the function type. } else { const FunctionProtoType *FPT = cast(FTy); FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); EPI.TypeQuals = 0; // FIXME: silently? EPI.ExtInfo = Ext; BlockTy = Context.getFunctionType(RetTy, FPT->getParamTypes(), EPI); } // If we don't have a function type, just build one from nothing. } else { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn); BlockTy = Context.getFunctionType(RetTy, None, EPI); } DiagnoseUnusedParameters(BSI->TheDecl->param_begin(), BSI->TheDecl->param_end()); BlockTy = Context.getBlockPointerType(BlockTy); // If needed, diagnose invalid gotos and switches in the block. if (getCurFunction()->NeedsScopeChecking() && !PP.isCodeCompletionEnabled()) DiagnoseInvalidJumps(cast(Body)); BSI->TheDecl->setBody(cast(Body)); // Try to apply the named return value optimization. We have to check again // if we can do this, though, because blocks keep return statements around // to deduce an implicit return type. if (getLangOpts().CPlusPlus && RetTy->isRecordType() && !BSI->TheDecl->isDependentContext()) computeNRVO(Body, BSI); BlockExpr *Result = new (Context) BlockExpr(BSI->TheDecl, BlockTy); AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); PopFunctionScopeInfo(&WP, Result->getBlockDecl(), Result); // If the block isn't obviously global, i.e. it captures anything at // all, then we need to do a few things in the surrounding context: if (Result->getBlockDecl()->hasCaptures()) { // First, this expression has a new cleanup object. ExprCleanupObjects.push_back(Result->getBlockDecl()); ExprNeedsCleanups = true; // It also gets a branch-protected scope if any of the captured // variables needs destruction. for (const auto &CI : Result->getBlockDecl()->captures()) { const VarDecl *var = CI.getVariable(); if (var->getType().isDestructedType() != QualType::DK_none) { getCurFunction()->setHasBranchProtectedScope(); break; } } } return Result; } ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty, SourceLocation RPLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(Ty, &TInfo); return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc); } ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, Expr *E, TypeSourceInfo *TInfo, SourceLocation RPLoc) { Expr *OrigExpr = E; bool IsMS = false; // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg() // as Microsoft ABI on an actual Microsoft platform, where // __builtin_ms_va_list and __builtin_va_list are the same.) if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() && Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) { QualType MSVaListType = Context.getBuiltinMSVaListType(); if (Context.hasSameType(MSVaListType, E->getType())) { if (CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); IsMS = true; } } // Get the va_list type QualType VaListType = Context.getBuiltinVaListType(); if (!IsMS) { if (VaListType->isArrayType()) { // Deal with implicit array decay; for example, on x86-64, // va_list is an array, but it's supposed to decay to // a pointer for va_arg. VaListType = Context.getArrayDecayedType(VaListType); // Make sure the input expression also decays appropriately. ExprResult Result = UsualUnaryConversions(E); if (Result.isInvalid()) return ExprError(); E = Result.get(); } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) { // If va_list is a record type and we are compiling in C++ mode, // check the argument using reference binding. InitializedEntity Entity = InitializedEntity::InitializeParameter( Context, Context.getLValueReferenceType(VaListType), false); ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E); if (Init.isInvalid()) return ExprError(); E = Init.getAs(); } else { // Otherwise, the va_list argument must be an l-value because // it is modified by va_arg. if (!E->isTypeDependent() && CheckForModifiableLvalue(E, BuiltinLoc, *this)) return ExprError(); } } if (!IsMS && !E->isTypeDependent() && !Context.hasSameType(VaListType, E->getType())) return ExprError(Diag(E->getLocStart(), diag::err_first_argument_to_va_arg_not_of_type_va_list) << OrigExpr->getType() << E->getSourceRange()); if (!TInfo->getType()->isDependentType()) { if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_incomplete, TInfo->getTypeLoc())) return ExprError(); if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), diag::err_second_parameter_to_va_arg_abstract, TInfo->getTypeLoc())) return ExprError(); if (!TInfo->getType().isPODType(Context)) { Diag(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType()->isObjCLifetimeType() ? diag::warn_second_parameter_to_va_arg_ownership_qualified : diag::warn_second_parameter_to_va_arg_not_pod) << TInfo->getType() << TInfo->getTypeLoc().getSourceRange(); } // Check for va_arg where arguments of the given type will be promoted // (i.e. this va_arg is guaranteed to have undefined behavior). QualType PromoteType; if (TInfo->getType()->isPromotableIntegerType()) { PromoteType = Context.getPromotedIntegerType(TInfo->getType()); if (Context.typesAreCompatible(PromoteType, TInfo->getType())) PromoteType = QualType(); } if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float)) PromoteType = Context.DoubleTy; if (!PromoteType.isNull()) DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E, PDiag(diag::warn_second_parameter_to_va_arg_never_compatible) << TInfo->getType() << PromoteType << TInfo->getTypeLoc().getSourceRange()); } QualType T = TInfo->getType().getNonLValueExprType(Context); return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS); } ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) { // The type of __null will be int or long, depending on the size of // pointers on the target. QualType Ty; unsigned pw = Context.getTargetInfo().getPointerWidth(0); if (pw == Context.getTargetInfo().getIntWidth()) Ty = Context.IntTy; else if (pw == Context.getTargetInfo().getLongWidth()) Ty = Context.LongTy; else if (pw == Context.getTargetInfo().getLongLongWidth()) Ty = Context.LongLongTy; else { llvm_unreachable("I don't know size of pointer!"); } return new (Context) GNUNullExpr(Ty, TokenLoc); } bool Sema::ConversionToObjCStringLiteralCheck(QualType DstType, Expr *&Exp, bool Diagnose) { if (!getLangOpts().ObjC1) return false; const ObjCObjectPointerType *PT = DstType->getAs(); if (!PT) return false; if (!PT->isObjCIdType()) { // Check if the destination is the 'NSString' interface. const ObjCInterfaceDecl *ID = PT->getInterfaceDecl(); if (!ID || !ID->getIdentifier()->isStr("NSString")) return false; } // Ignore any parens, implicit casts (should only be // array-to-pointer decays), and not-so-opaque values. The last is // important for making this trigger for property assignments. Expr *SrcExpr = Exp->IgnoreParenImpCasts(); if (OpaqueValueExpr *OV = dyn_cast(SrcExpr)) if (OV->getSourceExpr()) SrcExpr = OV->getSourceExpr()->IgnoreParenImpCasts(); StringLiteral *SL = dyn_cast(SrcExpr); if (!SL || !SL->isAscii()) return false; if (Diagnose) Diag(SL->getLocStart(), diag::err_missing_atsign_prefix) << FixItHint::CreateInsertion(SL->getLocStart(), "@"); Exp = BuildObjCStringLiteral(SL->getLocStart(), SL).get(); return true; } static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType, const Expr *SrcExpr) { if (!DstType->isFunctionPointerType() || !SrcExpr->getType()->isFunctionType()) return false; auto *DRE = dyn_cast(SrcExpr->IgnoreParenImpCasts()); if (!DRE) return false; auto *FD = dyn_cast(DRE->getDecl()); if (!FD) return false; return !S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true, SrcExpr->getLocStart()); } bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, SourceLocation Loc, QualType DstType, QualType SrcType, Expr *SrcExpr, AssignmentAction Action, bool *Complained) { if (Complained) *Complained = false; // Decode the result (notice that AST's are still created for extensions). bool CheckInferredResultType = false; bool isInvalid = false; unsigned DiagKind = 0; FixItHint Hint; ConversionFixItGenerator ConvHints; bool MayHaveConvFixit = false; bool MayHaveFunctionDiff = false; const ObjCInterfaceDecl *IFace = nullptr; const ObjCProtocolDecl *PDecl = nullptr; switch (ConvTy) { case Compatible: DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr); return false; case PointerToInt: DiagKind = diag::ext_typecheck_convert_pointer_int; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IntToPointer: DiagKind = diag::ext_typecheck_convert_int_pointer; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; case IncompatiblePointer: DiagKind = (Action == AA_Passing_CFAudited ? diag::err_arc_typecheck_convert_incompatible_pointer : diag::ext_typecheck_convert_incompatible_pointer); CheckInferredResultType = DstType->isObjCObjectPointerType() && SrcType->isObjCObjectPointerType(); if (Hint.isNull() && !CheckInferredResultType) { ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); } else if (CheckInferredResultType) { SrcType = SrcType.getUnqualifiedType(); DstType = DstType.getUnqualifiedType(); } MayHaveConvFixit = true; break; case IncompatiblePointerSign: DiagKind = diag::ext_typecheck_convert_incompatible_pointer_sign; break; case FunctionVoidPointer: DiagKind = diag::ext_typecheck_convert_pointer_void_func; break; case IncompatiblePointerDiscardsQualifiers: { // Perform array-to-pointer decay if necessary. if (SrcType->isArrayType()) SrcType = Context.getArrayDecayedType(SrcType); Qualifiers lhq = SrcType->getPointeeType().getQualifiers(); Qualifiers rhq = DstType->getPointeeType().getQualifiers(); if (lhq.getAddressSpace() != rhq.getAddressSpace()) { DiagKind = diag::err_typecheck_incompatible_address_space; break; } else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) { DiagKind = diag::err_typecheck_incompatible_ownership; break; } llvm_unreachable("unknown error case for discarding qualifiers!"); // fallthrough } case CompatiblePointerDiscardsQualifiers: // If the qualifiers lost were because we were applying the // (deprecated) C++ conversion from a string literal to a char* // (or wchar_t*), then there was no error (C++ 4.2p2). FIXME: // Ideally, this check would be performed in // checkPointerTypesForAssignment. However, that would require a // bit of refactoring (so that the second argument is an // expression, rather than a type), which should be done as part // of a larger effort to fix checkPointerTypesForAssignment for // C++ semantics. if (getLangOpts().CPlusPlus && IsStringLiteralToNonConstPointerConversion(SrcExpr, DstType)) return false; DiagKind = diag::ext_typecheck_convert_discards_qualifiers; break; case IncompatibleNestedPointerQualifiers: DiagKind = diag::ext_nested_pointer_qualifier_mismatch; break; case IntToBlockPointer: DiagKind = diag::err_int_to_block_pointer; break; case IncompatibleBlockPointer: DiagKind = diag::err_typecheck_convert_incompatible_block_pointer; break; case IncompatibleObjCQualifiedId: { if (SrcType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *srcOPT = SrcType->getAs(); for (auto *srcProto : srcOPT->quals()) { PDecl = srcProto; break; } if (const ObjCInterfaceType *IFaceT = DstType->getAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } else if (DstType->isObjCQualifiedIdType()) { const ObjCObjectPointerType *dstOPT = DstType->getAs(); for (auto *dstProto : dstOPT->quals()) { PDecl = dstProto; break; } if (const ObjCInterfaceType *IFaceT = SrcType->getAs()->getInterfaceType()) IFace = IFaceT->getDecl(); } DiagKind = diag::warn_incompatible_qualified_id; break; } case IncompatibleVectors: DiagKind = diag::warn_incompatible_vectors; break; case IncompatibleObjCWeakRef: DiagKind = diag::err_arc_weak_unavailable_assign; break; case Incompatible: if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) { if (Complained) *Complained = true; return true; } DiagKind = diag::err_typecheck_convert_incompatible; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; isInvalid = true; MayHaveFunctionDiff = true; break; } QualType FirstType, SecondType; switch (Action) { case AA_Assigning: case AA_Initializing: // The destination type comes first. FirstType = DstType; SecondType = SrcType; break; case AA_Returning: case AA_Passing: case AA_Passing_CFAudited: case AA_Converting: case AA_Sending: case AA_Casting: // The source type comes first. FirstType = SrcType; SecondType = DstType; break; } PartialDiagnostic FDiag = PDiag(DiagKind); if (Action == AA_Passing_CFAudited) FDiag << FirstType << SecondType << AA_Passing << SrcExpr->getSourceRange(); else FDiag << FirstType << SecondType << Action << SrcExpr->getSourceRange(); // If we can fix the conversion, suggest the FixIts. assert(ConvHints.isNull() || Hint.isNull()); if (!ConvHints.isNull()) { for (FixItHint &H : ConvHints.Hints) FDiag << H; } else { FDiag << Hint; } if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); } if (MayHaveFunctionDiff) HandleFunctionTypeMismatch(FDiag, SecondType, FirstType); Diag(Loc, FDiag); if (DiagKind == diag::warn_incompatible_qualified_id && PDecl && IFace && !IFace->hasDefinition()) Diag(IFace->getLocation(), diag::not_incomplete_class_and_qualified_id) << IFace->getName() << PDecl->getName(); if (SecondType == Context.OverloadTy) NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression, FirstType, /*TakingAddress=*/true); if (CheckInferredResultType) EmitRelatedResultTypeNote(SrcExpr); if (Action == AA_Returning && ConvTy == IncompatiblePointer) EmitRelatedResultTypeNoteForReturn(DstType); if (Complained) *Complained = true; return isInvalid; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result) { class SimpleICEDiagnoser : public VerifyICEDiagnoser { public: void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus << SR; } } Diagnoser; return VerifyIntegerConstantExpression(E, Result, Diagnoser); } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, unsigned DiagID, bool AllowFold) { class IDDiagnoser : public VerifyICEDiagnoser { unsigned DiagID; public: IDDiagnoser(unsigned DiagID) : VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { } void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, DiagID) << SR; } } Diagnoser(DiagID); return VerifyIntegerConstantExpression(E, Result, Diagnoser, AllowFold); } void Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc, SourceRange SR) { S.Diag(Loc, diag::ext_expr_not_ice) << SR << S.LangOpts.CPlusPlus; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, VerifyICEDiagnoser &Diagnoser, bool AllowFold) { SourceLocation DiagLoc = E->getLocStart(); if (getLangOpts().CPlusPlus11) { // C++11 [expr.const]p5: // If an expression of literal class type is used in a context where an // integral constant expression is required, then that class type shall // have a single non-explicit conversion function to an integral or // unscoped enumeration type ExprResult Converted; class CXX11ConvertDiagnoser : public ICEConvertDiagnoser { public: CXX11ConvertDiagnoser(bool Silent) : ICEConvertDiagnoser(/*AllowScopedEnumerations*/false, Silent, true) {} SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_not_integral) << T; } SemaDiagnosticBuilder diagnoseIncomplete( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_incomplete_type) << T; } SemaDiagnosticBuilder diagnoseExplicitConv( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy; } SemaDiagnosticBuilder noteExplicitConv( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseAmbiguous( Sema &S, SourceLocation Loc, QualType T) override { return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T; } SemaDiagnosticBuilder noteAmbiguous( Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override { return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) << ConvTy->isEnumeralType() << ConvTy; } SemaDiagnosticBuilder diagnoseConversion( Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override { llvm_unreachable("conversion functions are permitted"); } } ConvertDiagnoser(Diagnoser.Suppress); Converted = PerformContextualImplicitConversion(DiagLoc, E, ConvertDiagnoser); if (Converted.isInvalid()) return Converted; E = Converted.get(); if (!E->getType()->isIntegralOrUnscopedEnumerationType()) return ExprError(); } else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { // An ICE must be of integral or unscoped enumeration type. if (!Diagnoser.Suppress) Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); return ExprError(); } // Circumvent ICE checking in C++11 to avoid evaluating the expression twice // in the non-ICE case. if (!getLangOpts().CPlusPlus11 && E->isIntegerConstantExpr(Context)) { if (Result) *Result = E->EvaluateKnownConstInt(Context); return E; } Expr::EvalResult EvalResult; SmallVector Notes; EvalResult.Diag = &Notes; // Try to evaluate the expression, and produce diagnostics explaining why it's // not a constant expression as a side-effect. bool Folded = E->EvaluateAsRValue(EvalResult, Context) && EvalResult.Val.isInt() && !EvalResult.HasSideEffects; // In C++11, we can rely on diagnostics being produced for any expression // which is not a constant expression. If no diagnostics were produced, then // this is a constant expression. if (Folded && getLangOpts().CPlusPlus11 && Notes.empty()) { if (Result) *Result = EvalResult.Val.getInt(); return E; } // If our only note is the usual "invalid subexpression" note, just point // the caret at its location rather than producing an essentially // redundant note. if (Notes.size() == 1 && Notes[0].second.getDiagID() == diag::note_invalid_subexpr_in_const_expr) { DiagLoc = Notes[0].first; Notes.clear(); } if (!Folded || !AllowFold) { if (!Diagnoser.Suppress) { Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); } return ExprError(); } Diagnoser.diagnoseFold(*this, DiagLoc, E->getSourceRange()); for (const PartialDiagnosticAt &Note : Notes) Diag(Note.first, Note.second); if (Result) *Result = EvalResult.Val.getInt(); return E; } namespace { // Handle the case where we conclude a expression which we speculatively // considered to be unevaluated is actually evaluated. class TransformToPE : public TreeTransform { typedef TreeTransform BaseTransform; public: TransformToPE(Sema &SemaRef) : BaseTransform(SemaRef) { } // Make sure we redo semantic analysis bool AlwaysRebuild() { return true; } // Make sure we handle LabelStmts correctly. // FIXME: This does the right thing, but maybe we need a more general // fix to TreeTransform? StmtResult TransformLabelStmt(LabelStmt *S) { S->getDecl()->setStmt(nullptr); return BaseTransform::TransformLabelStmt(S); } // We need to special-case DeclRefExprs referring to FieldDecls which // are not part of a member pointer formation; normal TreeTransforming // doesn't catch this case because of the way we represent them in the AST. // FIXME: This is a bit ugly; is it really the best way to handle this // case? // // Error on DeclRefExprs referring to FieldDecls. ExprResult TransformDeclRefExpr(DeclRefExpr *E) { if (isa(E->getDecl()) && !SemaRef.isUnevaluatedContext()) return SemaRef.Diag(E->getLocation(), diag::err_invalid_non_static_member_use) << E->getDecl() << E->getSourceRange(); return BaseTransform::TransformDeclRefExpr(E); } // Exception: filter out member pointer formation ExprResult TransformUnaryOperator(UnaryOperator *E) { if (E->getOpcode() == UO_AddrOf && E->getType()->isMemberPointerType()) return E; return BaseTransform::TransformUnaryOperator(E); } ExprResult TransformLambdaExpr(LambdaExpr *E) { // Lambdas never need to be transformed. return E; } }; } ExprResult Sema::TransformToPotentiallyEvaluated(Expr *E) { assert(isUnevaluatedContext() && "Should only transform unevaluated expressions"); ExprEvalContexts.back().Context = ExprEvalContexts[ExprEvalContexts.size()-2].Context; if (isUnevaluatedContext()) return E; return TransformToPE(*this).TransformExpr(E); } void Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext, Decl *LambdaContextDecl, bool IsDecltype) { ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), ExprNeedsCleanups, LambdaContextDecl, IsDecltype); ExprNeedsCleanups = false; if (!MaybeODRUseExprs.empty()) std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs); } void Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext, ReuseLambdaContextDecl_t, bool IsDecltype) { Decl *ClosureContextDecl = ExprEvalContexts.back().ManglingContextDecl; PushExpressionEvaluationContext(NewContext, ClosureContextDecl, IsDecltype); } void Sema::PopExpressionEvaluationContext() { ExpressionEvaluationContextRecord& Rec = ExprEvalContexts.back(); unsigned NumTypos = Rec.NumTypos; if (!Rec.Lambdas.empty()) { if (Rec.isUnevaluated() || Rec.Context == ConstantEvaluated) { unsigned D; if (Rec.isUnevaluated()) { // C++11 [expr.prim.lambda]p2: // A lambda-expression shall not appear in an unevaluated operand // (Clause 5). D = diag::err_lambda_unevaluated_operand; } else { // C++1y [expr.const]p2: // A conditional-expression e is a core constant expression unless the // evaluation of e, following the rules of the abstract machine, would // evaluate [...] a lambda-expression. D = diag::err_lambda_in_constant_expression; } for (const auto *L : Rec.Lambdas) Diag(L->getLocStart(), D); } else { // Mark the capture expressions odr-used. This was deferred // during lambda expression creation. for (auto *Lambda : Rec.Lambdas) { for (auto *C : Lambda->capture_inits()) MarkDeclarationsReferencedInExpr(C); } } } // When are coming out of an unevaluated context, clear out any // temporaries that we may have created as part of the evaluation of // the expression in that context: they aren't relevant because they // will never be constructed. if (Rec.isUnevaluated() || Rec.Context == ConstantEvaluated) { ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects, ExprCleanupObjects.end()); ExprNeedsCleanups = Rec.ParentNeedsCleanups; CleanupVarDeclMarking(); std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs); // Otherwise, merge the contexts together. } else { ExprNeedsCleanups |= Rec.ParentNeedsCleanups; MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(), Rec.SavedMaybeODRUseExprs.end()); } // Pop the current expression evaluation context off the stack. ExprEvalContexts.pop_back(); if (!ExprEvalContexts.empty()) ExprEvalContexts.back().NumTypos += NumTypos; else assert(NumTypos == 0 && "There are outstanding typos after popping the " "last ExpressionEvaluationContextRecord"); } void Sema::DiscardCleanupsInEvaluationContext() { ExprCleanupObjects.erase( ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects, ExprCleanupObjects.end()); ExprNeedsCleanups = false; MaybeODRUseExprs.clear(); } ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) { if (!E->getType()->isVariablyModifiedType()) return E; return TransformToPotentiallyEvaluated(E); } static bool IsPotentiallyEvaluatedContext(Sema &SemaRef) { // Do not mark anything as "used" within a dependent context; wait for // an instantiation. if (SemaRef.CurContext->isDependentContext()) return false; switch (SemaRef.ExprEvalContexts.back().Context) { case Sema::Unevaluated: case Sema::UnevaluatedAbstract: // We are in an expression that is not potentially evaluated; do nothing. // (Depending on how you read the standard, we actually do need to do // something here for null pointer constants, but the standard's // definition of a null pointer constant is completely crazy.) return false; case Sema::ConstantEvaluated: case Sema::PotentiallyEvaluated: // We are in a potentially evaluated expression (or a constant-expression // in C++03); we need to do implicit template instantiation, implicitly // define class members, and mark most declarations as used. return true; case Sema::PotentiallyEvaluatedIfUsed: // Referenced declarations will only be used if the construct in the // containing expression is used. return false; } llvm_unreachable("Invalid context"); } /// \brief Mark a function referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3) void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, bool OdrUse) { assert(Func && "No function?"); Func->setReferenced(); // C++11 [basic.def.odr]p3: // A function whose name appears as a potentially-evaluated expression is // odr-used if it is the unique lookup result or the selected member of a // set of overloaded functions [...]. // // We (incorrectly) mark overload resolution as an unevaluated context, so we // can just check that here. Skip the rest of this function if we've already // marked the function as used. if (Func->isUsed(/*CheckUsedAttr=*/false) || !IsPotentiallyEvaluatedContext(*this)) { // C++11 [temp.inst]p3: // Unless a function template specialization has been explicitly // instantiated or explicitly specialized, the function template // specialization is implicitly instantiated when the specialization is // referenced in a context that requires a function definition to exist. // // We consider constexpr function templates to be referenced in a context // that requires a definition to exist whenever they are referenced. // // FIXME: This instantiates constexpr functions too frequently. If this is // really an unevaluated context (and we're not just in the definition of a // function template or overload resolution or other cases which we // incorrectly consider to be unevaluated contexts), and we're not in a // subexpression which we actually need to evaluate (for instance, a // template argument, array bound or an expression in a braced-init-list), // we are not permitted to instantiate this constexpr function definition. // // FIXME: This also implicitly defines special members too frequently. They // are only supposed to be implicitly defined if they are odr-used, but they // are not odr-used from constant expressions in unevaluated contexts. // However, they cannot be referenced if they are deleted, and they are // deleted whenever the implicit definition of the special member would // fail. if (!Func->isConstexpr() || Func->getBody()) return; CXXMethodDecl *MD = dyn_cast(Func); if (!Func->isImplicitlyInstantiable() && (!MD || MD->isUserProvided())) return; } // Note that this declaration has been used. if (CXXConstructorDecl *Constructor = dyn_cast(Func)) { Constructor = cast(Constructor->getFirstDecl()); if (Constructor->isDefaulted() && !Constructor->isDeleted()) { if (Constructor->isDefaultConstructor()) { if (Constructor->isTrivial() && !Constructor->hasAttr()) return; DefineImplicitDefaultConstructor(Loc, Constructor); } else if (Constructor->isCopyConstructor()) { DefineImplicitCopyConstructor(Loc, Constructor); } else if (Constructor->isMoveConstructor()) { DefineImplicitMoveConstructor(Loc, Constructor); } } else if (Constructor->getInheritedConstructor()) { DefineInheritingConstructor(Loc, Constructor); } } else if (CXXDestructorDecl *Destructor = dyn_cast(Func)) { Destructor = cast(Destructor->getFirstDecl()); if (Destructor->isDefaulted() && !Destructor->isDeleted()) { if (Destructor->isTrivial() && !Destructor->hasAttr()) return; DefineImplicitDestructor(Loc, Destructor); } if (Destructor->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, Destructor->getParent()); } else if (CXXMethodDecl *MethodDecl = dyn_cast(Func)) { if (MethodDecl->isOverloadedOperator() && MethodDecl->getOverloadedOperator() == OO_Equal) { MethodDecl = cast(MethodDecl->getFirstDecl()); if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) { if (MethodDecl->isCopyAssignmentOperator()) DefineImplicitCopyAssignment(Loc, MethodDecl); else DefineImplicitMoveAssignment(Loc, MethodDecl); } } else if (isa(MethodDecl) && MethodDecl->getParent()->isLambda()) { CXXConversionDecl *Conversion = cast(MethodDecl->getFirstDecl()); if (Conversion->isLambdaToBlockPointerConversion()) DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion); else DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion); } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext) MarkVTableUsed(Loc, MethodDecl->getParent()); } // Recursive functions should be marked when used from another function. // FIXME: Is this really right? if (CurContext == Func) return; // Resolve the exception specification for any function which is // used: CodeGen will need it. const FunctionProtoType *FPT = Func->getType()->getAs(); if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) ResolveExceptionSpec(Loc, FPT); if (!OdrUse) return; // Implicit instantiation of function templates and member functions of // class templates. if (Func->isImplicitlyInstantiable()) { bool AlreadyInstantiated = false; SourceLocation PointOfInstantiation = Loc; if (FunctionTemplateSpecializationInfo *SpecInfo = Func->getTemplateSpecializationInfo()) { if (SpecInfo->getPointOfInstantiation().isInvalid()) SpecInfo->setPointOfInstantiation(Loc); else if (SpecInfo->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) { AlreadyInstantiated = true; PointOfInstantiation = SpecInfo->getPointOfInstantiation(); } } else if (MemberSpecializationInfo *MSInfo = Func->getMemberSpecializationInfo()) { if (MSInfo->getPointOfInstantiation().isInvalid()) MSInfo->setPointOfInstantiation(Loc); else if (MSInfo->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) { AlreadyInstantiated = true; PointOfInstantiation = MSInfo->getPointOfInstantiation(); } } if (!AlreadyInstantiated || Func->isConstexpr()) { if (isa(Func->getDeclContext()) && cast(Func->getDeclContext())->isLocalClass() && ActiveTemplateInstantiations.size()) PendingLocalImplicitInstantiations.push_back( std::make_pair(Func, PointOfInstantiation)); else if (Func->isConstexpr()) // Do not defer instantiations of constexpr functions, to avoid the // expression evaluator needing to call back into Sema if it sees a // call to such a function. InstantiateFunctionDefinition(PointOfInstantiation, Func); else { PendingInstantiations.push_back(std::make_pair(Func, PointOfInstantiation)); // Notify the consumer that a function was implicitly instantiated. Consumer.HandleCXXImplicitFunctionInstantiation(Func); } } } else { // Walk redefinitions, as some of them may be instantiable. for (auto i : Func->redecls()) { if (!i->isUsed(false) && i->isImplicitlyInstantiable()) MarkFunctionReferenced(Loc, i); } } // Keep track of used but undefined functions. if (!Func->isDefined()) { if (mightHaveNonExternalLinkage(Func)) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); else if (Func->getMostRecentDecl()->isInlined() && !LangOpts.GNUInline && !Func->getMostRecentDecl()->hasAttr()) UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc)); } // Normally the most current decl is marked used while processing the use and // any subsequent decls are marked used by decl merging. This fails with // template instantiation since marking can happen at the end of the file // and, because of the two phase lookup, this function is called with at // decl in the middle of a decl chain. We loop to maintain the invariant // that once a decl is used, all decls after it are also used. for (FunctionDecl *F = Func->getMostRecentDecl();; F = F->getPreviousDecl()) { F->markUsed(Context); if (F == Func) break; } } static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, VarDecl *var, DeclContext *DC) { DeclContext *VarDC = var->getDeclContext(); // If the parameter still belongs to the translation unit, then // we're actually just using one parameter in the declaration of // the next. if (isa(var) && isa(VarDC)) return; // For C code, don't diagnose about capture if we're not actually in code // right now; it's impossible to write a non-constant expression outside of // function context, so we'll get other (more useful) diagnostics later. // // For C++, things get a bit more nasty... it would be nice to suppress this // diagnostic for certain cases like using a local variable in an array bound // for a member of a local class, but the correct predicate is not obvious. if (!S.getLangOpts().CPlusPlus && !S.CurContext->isFunctionOrMethod()) return; if (isa(VarDC) && cast(VarDC->getParent())->isLambda()) { S.Diag(loc, diag::err_reference_to_local_var_in_enclosing_lambda) << var->getIdentifier(); } else if (FunctionDecl *fn = dyn_cast(VarDC)) { S.Diag(loc, diag::err_reference_to_local_var_in_enclosing_function) << var->getIdentifier() << fn->getDeclName(); } else if (isa(VarDC)) { S.Diag(loc, diag::err_reference_to_local_var_in_enclosing_block) << var->getIdentifier(); } else { // FIXME: Is there any other context where a local variable can be // declared? S.Diag(loc, diag::err_reference_to_local_var_in_enclosing_context) << var->getIdentifier(); } S.Diag(var->getLocation(), diag::note_entity_declared_at) << var->getIdentifier(); // FIXME: Add additional diagnostic info about class etc. which prevents // capture. } static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, VarDecl *Var, bool &SubCapturesAreNested, QualType &CaptureType, QualType &DeclRefType) { // Check whether we've already captured it. if (CSI->CaptureMap.count(Var)) { // If we found a capture, any subcaptures are nested. SubCapturesAreNested = true; // Retrieve the capture type for this variable. CaptureType = CSI->getCapture(Var).getCaptureType(); // Compute the type of an expression that refers to this variable. DeclRefType = CaptureType.getNonReferenceType(); // Similarly to mutable captures in lambda, all the OpenMP captures by copy // are mutable in the sense that user can change their value - they are // private instances of the captured declarations. const CapturingScopeInfo::Capture &Cap = CSI->getCapture(Var); if (Cap.isCopyCapture() && !(isa(CSI) && cast(CSI)->Mutable) && !(isa(CSI) && cast(CSI)->CapRegionKind == CR_OpenMP)) DeclRefType.addConst(); return true; } return false; } // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { if (isa(DC) || isa(DC) || isLambdaCallOperator(DC)) return getLambdaAwareParentOfDeclContext(DC); else if (Var->hasLocalStorage()) { if (Diagnose) diagnoseUncapturableValueReference(S, Loc, Var, DC); } return nullptr; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. static bool isVariableCapturable(CapturingScopeInfo *CSI, VarDecl *Var, SourceLocation Loc, const bool Diagnose, Sema &S) { bool IsBlock = isa(CSI); bool IsLambda = isa(CSI); // Lambdas are not allowed to capture unnamed variables // (e.g. anonymous unions). // FIXME: The C++11 rule don't actually state this explicitly, but I'm // assuming that's the intent. if (IsLambda && !Var->getDeclName()) { if (Diagnose) { S.Diag(Loc, diag::err_lambda_capture_anonymous_var); S.Diag(Var->getLocation(), diag::note_declared_at); } return false; } // Prohibit variably-modified types in blocks; they're difficult to deal with. if (Var->getType()->isVariablyModifiedType() && IsBlock) { if (Diagnose) { S.Diag(Loc, diag::err_ref_vm_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Prohibit structs with flexible array members too. // We cannot capture what is in the tail end of the struct. if (const RecordType *VTTy = Var->getType()->getAs()) { if (VTTy->getDecl()->hasFlexibleArrayMember()) { if (Diagnose) { if (IsBlock) S.Diag(Loc, diag::err_ref_flexarray_type); else S.Diag(Loc, diag::err_lambda_capture_flexarray_type) << Var->getDeclName(); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } } const bool HasBlocksAttr = Var->hasAttr(); // Lambdas and captured statements are not allowed to capture __block // variables; they don't support the expected semantics. if (HasBlocksAttr && (IsLambda || isa(CSI))) { if (Diagnose) { S.Diag(Loc, diag::err_capture_block_variable) << Var->getDeclName() << !IsLambda; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } return true; } // Returns true if the capture by block was successful. static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool Nested, Sema &S) { Expr *CopyExpr = nullptr; bool ByRef = false; // Blocks are not allowed to capture arrays. if (CaptureType->isArrayType()) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_ref_array_type); S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Forbid the block-capture of autoreleasing variables. if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*block*/ 0; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } const bool HasBlocksAttr = Var->hasAttr(); if (HasBlocksAttr || CaptureType->isReferenceType()) { // Block capture by reference does not change the capture or // declaration reference types. ByRef = true; } else { // Block capture by copy introduces 'const'. CaptureType = CaptureType.getNonReferenceType().withConst(); DeclRefType = CaptureType; if (S.getLangOpts().CPlusPlus && BuildAndDiagnose) { if (const RecordType *Record = DeclRefType->getAs()) { // The capture logic needs the destructor, so make sure we mark it. // Usually this is unnecessary because most local variables have // their destructors marked at declaration time, but parameters are // an exception because it's technically only the call site that // actually requires the destructor. if (isa(Var)) S.FinalizeVarWithDestructor(Var, Record); // Enter a new evaluation context to insulate the copy // full-expression. EnterExpressionEvaluationContext scope(S, S.PotentiallyEvaluated); // According to the blocks spec, the capture of a variable from // the stack requires a const copy constructor. This is not true // of the copy/move done to move a __block variable to the heap. Expr *DeclRef = new (S.Context) DeclRefExpr(Var, Nested, DeclRefType.withConst(), VK_LValue, Loc); ExprResult Result = S.PerformCopyInitialization( InitializedEntity::InitializeBlock(Var->getLocation(), CaptureType, false), Loc, DeclRef); // Build a full-expression copy expression if initialization // succeeded and used a non-trivial constructor. Recover from // errors by pretending that the copy isn't necessary. if (!Result.isInvalid() && !cast(Result.get())->getConstructor() ->isTrivial()) { Result = S.MaybeCreateExprWithCleanups(Result); CopyExpr = Result.get(); } } } } // Actually capture the variable. if (BuildAndDiagnose) BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(), CaptureType, CopyExpr); return true; } /// \brief Capture the given variable in the captured region. static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, Sema &S) { // By default, capture variables by reference. bool ByRef = true; // Using an LValue reference type is consistent with Lambdas (see below). if (S.getLangOpts().OpenMP) { ByRef = S.IsOpenMPCapturedByRef(Var, RSI); if (S.IsOpenMPCapturedVar(Var)) DeclRefType = DeclRefType.getUnqualifiedType(); } if (ByRef) CaptureType = S.Context.getLValueReferenceType(DeclRefType); else CaptureType = DeclRefType; Expr *CopyExpr = nullptr; if (BuildAndDiagnose) { // The current implementation assumes that all variables are captured // by references. Since there is no capture by copy, no expression // evaluation will be needed. RecordDecl *RD = RSI->TheRecordDecl; FieldDecl *Field = FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType, S.Context.getTrivialTypeSourceInfo(CaptureType, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc); Var->setReferenced(true); Var->markUsed(S.Context); } // Actually capture the variable. if (BuildAndDiagnose) RSI->addCapture(Var, /*isBlock*/false, ByRef, RefersToCapturedVariable, Loc, SourceLocation(), CaptureType, CopyExpr); return true; } /// \brief Create a field within the lambda class for the variable /// being captured. static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, VarDecl *Var, QualType FieldType, QualType DeclRefType, SourceLocation Loc, bool RefersToCapturedVariable) { CXXRecordDecl *Lambda = LSI->Lambda; // Build the non-static data member. FieldDecl *Field = FieldDecl::Create(S.Context, Lambda, Loc, Loc, nullptr, FieldType, S.Context.getTrivialTypeSourceInfo(FieldType, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Lambda->addDecl(Field); } /// \brief Capture the given variable in the lambda. static bool captureInLambda(LambdaScopeInfo *LSI, VarDecl *Var, SourceLocation Loc, const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const bool RefersToCapturedVariable, const Sema::TryCaptureKind Kind, SourceLocation EllipsisLoc, const bool IsTopScope, Sema &S) { // Determine whether we are capturing by reference or by value. bool ByRef = false; if (IsTopScope && Kind != Sema::TryCapture_Implicit) { ByRef = (Kind == Sema::TryCapture_ExplicitByRef); } else { ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref); } // Compute the type of the field that will capture this variable. if (ByRef) { // C++11 [expr.prim.lambda]p15: // An entity is captured by reference if it is implicitly or // explicitly captured but not captured by copy. It is // unspecified whether additional unnamed non-static data // members are declared in the closure type for entities // captured by reference. // // FIXME: It is not clear whether we want to build an lvalue reference // to the DeclRefType or to CaptureType.getNonReferenceType(). GCC appears // to do the former, while EDG does the latter. Core issue 1249 will // clarify, but for now we follow GCC because it's a more permissive and // easily defensible position. CaptureType = S.Context.getLValueReferenceType(DeclRefType); } else { // C++11 [expr.prim.lambda]p14: // For each entity captured by copy, an unnamed non-static // data member is declared in the closure type. The // declaration order of these members is unspecified. The type // of such a data member is the type of the corresponding // captured entity if the entity is not a reference to an // object, or the referenced type otherwise. [Note: If the // captured entity is a reference to a function, the // corresponding data member is also a reference to a // function. - end note ] if (const ReferenceType *RefType = CaptureType->getAs()){ if (!RefType->getPointeeType()->isFunctionType()) CaptureType = RefType->getPointeeType(); } // Forbid the lambda copy-capture of autoreleasing variables. if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) { if (BuildAndDiagnose) { S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*lambda*/ 1; S.Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); } return false; } // Make sure that by-copy captures are of a complete and non-abstract type. if (BuildAndDiagnose) { if (!CaptureType->isDependentType() && S.RequireCompleteType(Loc, CaptureType, diag::err_capture_of_incomplete_type, Var->getDeclName())) return false; if (S.RequireNonAbstractType(Loc, CaptureType, diag::err_capture_of_abstract_type)) return false; } } // Capture this variable in the lambda. if (BuildAndDiagnose) addAsFieldToClosureType(S, LSI, Var, CaptureType, DeclRefType, Loc, RefersToCapturedVariable); // Compute the type of a reference to this captured variable. if (ByRef) DeclRefType = CaptureType.getNonReferenceType(); else { // C++ [expr.prim.lambda]p5: // The closure type for a lambda-expression has a public inline // function call operator [...]. This function call operator is // declared const (9.3.1) if and only if the lambda-expression’s // parameter-declaration-clause is not followed by mutable. DeclRefType = CaptureType.getNonReferenceType(); if (!LSI->Mutable && !CaptureType->isReferenceType()) DeclRefType.addConst(); } // Add the capture. if (BuildAndDiagnose) LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable, Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr); return true; } bool Sema::tryCaptureVariable( VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind, SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) { // An init-capture is notionally from the context surrounding its // declaration, but its parent DC is the lambda class. DeclContext *VarDC = Var->getDeclContext(); if (Var->isInitCapture()) VarDC = VarDC->getParent(); DeclContext *DC = CurContext; const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1; // We need to sync up the Declaration Context with the // FunctionScopeIndexToStopAt if (FunctionScopeIndexToStopAt) { unsigned FSIndex = FunctionScopes.size() - 1; while (FSIndex != MaxFunctionScopesIndex) { DC = getLambdaAwareParentOfDeclContext(DC); --FSIndex; } } // If the variable is declared in the current context, there is no need to // capture it. if (VarDC == DC) return true; // Capture global variables if it is required to use private copy of this // variable. bool IsGlobal = !Var->hasLocalStorage(); if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedVar(Var))) return true; // Walk up the stack to determine whether we can capture the variable, // performing the "simple" checks that don't depend on type. We stop when // we've either hit the declared scope of the variable or find an existing // capture of that variable. We start from the innermost capturing-entity // (the DC) and ensure that all intervening capturing-entities // (blocks/lambdas etc.) between the innermost capturer and the variable`s // declcontext can either capture the variable or have already captured // the variable. CaptureType = Var->getType(); DeclRefType = CaptureType.getNonReferenceType(); bool Nested = false; bool Explicit = (Kind != TryCapture_Implicit); unsigned FunctionScopesIndex = MaxFunctionScopesIndex; unsigned OpenMPLevel = 0; do { // Only block literals, captured statements, and lambda expressions can // capture; other scopes don't work. DeclContext *ParentDC = getParentOfCapturingContextOrNull(DC, Var, ExprLoc, BuildAndDiagnose, *this); // We need to check for the parent *first* because, if we *have* // private-captured a global variable, we need to recursively capture it in // intermediate blocks, lambdas, etc. if (!ParentDC) { if (IsGlobal) { FunctionScopesIndex = MaxFunctionScopesIndex - 1; break; } return true; } FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex]; CapturingScopeInfo *CSI = cast(FSI); // Check whether we've already captured it. if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, DeclRefType)) break; // If we are instantiating a generic lambda call operator body, // we do not want to capture new variables. What was captured // during either a lambdas transformation or initial parsing // should be used. if (isGenericLambdaCallOperatorSpecialization(DC)) { if (BuildAndDiagnose) { LambdaScopeInfo *LSI = cast(CSI); if (LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None) { Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName(); Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); Diag(LSI->Lambda->getLocStart(), diag::note_lambda_decl); } else diagnoseUncapturableValueReference(*this, ExprLoc, Var, DC); } return true; } // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture // certain types of variables (unnamed, variably modified types etc.) // so check for eligibility. if (!isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this)) return true; // Try to capture variable-length arrays types. if (Var->getType()->isVariablyModifiedType()) { // We're going to walk down into the type and look for VLA // expressions. QualType QTy = Var->getType(); if (ParmVarDecl *PVD = dyn_cast_or_null(Var)) QTy = PVD->getOriginalType(); captureVariablyModifiedType(Context, QTy, CSI); } if (getLangOpts().OpenMP) { if (auto *RSI = dyn_cast(CSI)) { // OpenMP private variables should not be captured in outer scope, so // just break here. Similarly, global variables that are captured in a // target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel); // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the // enclosing region. Therefore, the capture is not initially nested. if (isTargetCap) FunctionScopesIndex--; if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) { Nested = !isTargetCap; DeclRefType = DeclRefType.getUnqualifiedType(); CaptureType = Context.getLValueReferenceType(DeclRefType); break; } ++OpenMPLevel; } } } if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) { // No capture-default, and this is not an explicit capture // so cannot capture this variable. if (BuildAndDiagnose) { Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName(); Diag(Var->getLocation(), diag::note_previous_decl) << Var->getDeclName(); Diag(cast(CSI)->Lambda->getLocStart(), diag::note_lambda_decl); // FIXME: If we error out because an outer lambda can not implicitly // capture a variable that an inner lambda explicitly captures, we // should have the inner lambda do the explicit capture - because // it makes for cleaner diagnostics later. This would purely be done // so that the diagnostic does not misleadingly claim that a variable // can not be captured by a lambda implicitly even though it is captured // explicitly. Suggestion: // - create const bool VariableCaptureWasInitiallyExplicit = Explicit // at the function head // - cache the StartingDeclContext - this must be a lambda // - captureInLambda in the innermost lambda the variable. } return true; } FunctionScopesIndex--; DC = ParentDC; Explicit = false; } while (!VarDC->Equals(DC)); // Walk back down the scope stack, (e.g. from outer lambda to inner lambda) // computing the type of the capture at each step, checking type-specific // requirements, and adding captures if requested. // If the variable had already been captured previously, we start capturing // at the lambda nested within that one. for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N; ++I) { CapturingScopeInfo *CSI = cast(FunctionScopes[I]); if (BlockScopeInfo *BSI = dyn_cast(CSI)) { if (!captureInBlock(BSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this)) return true; Nested = true; } else if (CapturedRegionScopeInfo *RSI = dyn_cast(CSI)) { if (!captureInCapturedRegion(RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, *this)) return true; Nested = true; } else { LambdaScopeInfo *LSI = cast(CSI); if (!captureInLambda(LSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested, Kind, EllipsisLoc, /*IsTopScope*/I == N - 1, *this)) return true; Nested = true; } } return false; } bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc, TryCaptureKind Kind, SourceLocation EllipsisLoc) { QualType CaptureType; QualType DeclRefType; return tryCaptureVariable(Var, Loc, Kind, EllipsisLoc, /*BuildAndDiagnose=*/true, CaptureType, DeclRefType, nullptr); } bool Sema::NeedToCaptureVariable(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; return !tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr); } QualType Sema::getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc) { QualType CaptureType; QualType DeclRefType; // Determine whether we can capture this variable. if (tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(), /*BuildAndDiagnose=*/false, CaptureType, DeclRefType, nullptr)) return QualType(); return DeclRefType; } // If either the type of the variable or the initializer is dependent, // return false. Otherwise, determine whether the variable is a constant // expression. Use this if you need to know if a variable that might or // might not be dependent is truly a constant expression. static inline bool IsVariableNonDependentAndAConstantExpression(VarDecl *Var, ASTContext &Context) { if (Var->getType()->isDependentType()) return false; const VarDecl *DefVD = nullptr; Var->getAnyInitializer(DefVD); if (!DefVD) return false; EvaluatedStmt *Eval = DefVD->ensureEvaluatedStmt(); Expr *Init = cast(Eval->Value); if (Init->isValueDependent()) return false; return IsVariableAConstantExpression(Var, Context); } void Sema::UpdateMarkingForLValueToRValue(Expr *E) { // Per C++11 [basic.def.odr], a variable is odr-used "unless it is // an object that satisfies the requirements for appearing in a // constant expression (5.19) and the lvalue-to-rvalue conversion (4.1) // is immediately applied." This function handles the lvalue-to-rvalue // conversion part. MaybeODRUseExprs.erase(E->IgnoreParens()); // If we are in a lambda, check if this DeclRefExpr or MemberExpr refers // to a variable that is a constant expression, and if so, identify it as // a reference to a variable that does not involve an odr-use of that // variable. if (LambdaScopeInfo *LSI = getCurLambda()) { Expr *SansParensExpr = E->IgnoreParens(); VarDecl *Var = nullptr; if (DeclRefExpr *DRE = dyn_cast(SansParensExpr)) Var = dyn_cast(DRE->getFoundDecl()); else if (MemberExpr *ME = dyn_cast(SansParensExpr)) Var = dyn_cast(ME->getMemberDecl()); if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context)) LSI->markVariableExprAsNonODRUsed(SansParensExpr); } } ExprResult Sema::ActOnConstantExpression(ExprResult Res) { Res = CorrectDelayedTyposInExpr(Res); if (!Res.isUsable()) return Res; // If a constant-expression is a reference to a variable where we delay // deciding whether it is an odr-use, just assume we will apply the // lvalue-to-rvalue conversion. In the one case where this doesn't happen // (a non-type template argument), we have special handling anyway. UpdateMarkingForLValueToRValue(Res.get()); return Res; } void Sema::CleanupVarDeclMarking() { for (Expr *E : MaybeODRUseExprs) { VarDecl *Var; SourceLocation Loc; if (DeclRefExpr *DRE = dyn_cast(E)) { Var = cast(DRE->getDecl()); Loc = DRE->getLocation(); } else if (MemberExpr *ME = dyn_cast(E)) { Var = cast(ME->getMemberDecl()); Loc = ME->getMemberLoc(); } else { llvm_unreachable("Unexpected expression"); } MarkVarDeclODRUsed(Var, Loc, *this, /*MaxFunctionScopeIndex Pointer*/ nullptr); } MaybeODRUseExprs.clear(); } static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc, VarDecl *Var, Expr *E) { assert((!E || isa(E) || isa(E)) && "Invalid Expr argument to DoMarkVarDeclReferenced"); Var->setReferenced(); TemplateSpecializationKind TSK = Var->getTemplateSpecializationKind(); bool MarkODRUsed = true; // If the context is not potentially evaluated, this is not an odr-use and // does not trigger instantiation. if (!IsPotentiallyEvaluatedContext(SemaRef)) { if (SemaRef.isUnevaluatedContext()) return; // If we don't yet know whether this context is going to end up being an // evaluated context, and we're referencing a variable from an enclosing // scope, add a potential capture. // // FIXME: Is this necessary? These contexts are only used for default // arguments, where local variables can't be used. const bool RefersToEnclosingScope = (SemaRef.CurContext != Var->getDeclContext() && Var->getDeclContext()->isFunctionOrMethod() && Var->hasLocalStorage()); if (RefersToEnclosingScope) { if (LambdaScopeInfo *const LSI = SemaRef.getCurLambda()) { // If a variable could potentially be odr-used, defer marking it so // until we finish analyzing the full expression for any // lvalue-to-rvalue // or discarded value conversions that would obviate odr-use. // Add it to the list of potential captures that will be analyzed // later (ActOnFinishFullExpr) for eventual capture and odr-use marking // unless the variable is a reference that was initialized by a constant // expression (this will never need to be captured or odr-used). assert(E && "Capture variable should be used in an expression."); if (!Var->getType()->isReferenceType() || !IsVariableNonDependentAndAConstantExpression(Var, SemaRef.Context)) LSI->addPotentialCapture(E->IgnoreParens()); } } if (!isTemplateInstantiation(TSK)) return; // Instantiate, but do not mark as odr-used, variable templates. MarkODRUsed = false; } VarTemplateSpecializationDecl *VarSpec = dyn_cast(Var); assert(!isa(Var) && "Can't instantiate a partial template specialization."); // Perform implicit instantiation of static data members, static data member // templates of class templates, and variable template specializations. Delay // instantiations of variable templates, except for those that could be used // in a constant expression. if (isTemplateInstantiation(TSK)) { bool TryInstantiating = TSK == TSK_ImplicitInstantiation; if (TryInstantiating && !isa(Var)) { if (Var->getPointOfInstantiation().isInvalid()) { // This is a modification of an existing AST node. Notify listeners. if (ASTMutationListener *L = SemaRef.getASTMutationListener()) L->StaticDataMemberInstantiated(Var); } else if (!Var->isUsableInConstantExpressions(SemaRef.Context)) // Don't bother trying to instantiate it again, unless we might need // its initializer before we get to the end of the TU. TryInstantiating = false; } if (Var->getPointOfInstantiation().isInvalid()) Var->setTemplateSpecializationKind(TSK, Loc); if (TryInstantiating) { SourceLocation PointOfInstantiation = Var->getPointOfInstantiation(); bool InstantiationDependent = false; bool IsNonDependent = VarSpec ? !TemplateSpecializationType::anyDependentTemplateArguments( VarSpec->getTemplateArgsInfo(), InstantiationDependent) : true; // Do not instantiate specializations that are still type-dependent. if (IsNonDependent) { if (Var->isUsableInConstantExpressions(SemaRef.Context)) { // Do not defer instantiations of variables which could be used in a // constant expression. SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var); } else { SemaRef.PendingInstantiations .push_back(std::make_pair(Var, PointOfInstantiation)); } } } } if(!MarkODRUsed) return; // Per C++11 [basic.def.odr], a variable is odr-used "unless it satisfies // the requirements for appearing in a constant expression (5.19) and, if // it is an object, the lvalue-to-rvalue conversion (4.1) // is immediately applied." We check the first part here, and // Sema::UpdateMarkingForLValueToRValue deals with the second part. // Note that we use the C++11 definition everywhere because nothing in // C++03 depends on whether we get the C++03 version correct. The second // part does not apply to references, since they are not objects. if (E && IsVariableAConstantExpression(Var, SemaRef.Context)) { // A reference initialized by a constant expression can never be // odr-used, so simply ignore it. if (!Var->getType()->isReferenceType()) SemaRef.MaybeODRUseExprs.insert(E); } else MarkVarDeclODRUsed(Var, Loc, SemaRef, /*MaxFunctionScopeIndex ptr*/ nullptr); } /// \brief Mark a variable referenced, and check whether it is odr-used /// (C++ [basic.def.odr]p2, C99 6.9p3). Note that this should not be /// used directly for normal expressions referring to VarDecl. void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) { DoMarkVarDeclReferenced(*this, Loc, Var, nullptr); } static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, Decl *D, Expr *E, bool OdrUse) { if (VarDecl *Var = dyn_cast(D)) { DoMarkVarDeclReferenced(SemaRef, Loc, Var, E); return; } SemaRef.MarkAnyDeclReferenced(Loc, D, OdrUse); // If this is a call to a method via a cast, also mark the method in the // derived class used in case codegen can devirtualize the call. const MemberExpr *ME = dyn_cast(E); if (!ME) return; CXXMethodDecl *MD = dyn_cast(ME->getMemberDecl()); if (!MD) return; // Only attempt to devirtualize if this is truly a virtual call. bool IsVirtualCall = MD->isVirtual() && ME->performsVirtualDispatch(SemaRef.getLangOpts()); if (!IsVirtualCall) return; const Expr *Base = ME->getBase(); const CXXRecordDecl *MostDerivedClassDecl = Base->getBestDynamicClassType(); if (!MostDerivedClassDecl) return; CXXMethodDecl *DM = MD->getCorrespondingMethodInClass(MostDerivedClassDecl); if (!DM || DM->isPure()) return; SemaRef.MarkAnyDeclReferenced(Loc, DM, OdrUse); } /// \brief Perform reference-marking and odr-use handling for a DeclRefExpr. void Sema::MarkDeclRefReferenced(DeclRefExpr *E) { // TODO: update this with DR# once a defect report is filed. // C++11 defect. The address of a pure member should not be an ODR use, even // if it's a qualified reference. bool OdrUse = true; if (CXXMethodDecl *Method = dyn_cast(E->getDecl())) if (Method->isVirtual()) OdrUse = false; MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse); } /// \brief Perform reference-marking and odr-use handling for a MemberExpr. void Sema::MarkMemberReferenced(MemberExpr *E) { // C++11 [basic.def.odr]p2: // A non-overloaded function whose name appears as a potentially-evaluated // expression or a member of a set of candidate functions, if selected by // overload resolution when referred to from a potentially-evaluated // expression, is odr-used, unless it is a pure virtual function and its // name is not explicitly qualified. bool OdrUse = true; if (E->performsVirtualDispatch(getLangOpts())) { if (CXXMethodDecl *Method = dyn_cast(E->getMemberDecl())) if (Method->isPure()) OdrUse = false; } SourceLocation Loc = E->getMemberLoc().isValid() ? E->getMemberLoc() : E->getLocStart(); MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, OdrUse); } /// \brief Perform marking for a reference to an arbitrary declaration. It /// marks the declaration referenced, and performs odr-use checking for /// functions and variables. This method should not be used when building a /// normal expression which refers to a variable. void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D, bool OdrUse) { if (OdrUse) { if (auto *VD = dyn_cast(D)) { MarkVariableReferenced(Loc, VD); return; } } if (auto *FD = dyn_cast(D)) { MarkFunctionReferenced(Loc, FD, OdrUse); return; } D->setReferenced(); } namespace { // Mark all of the declarations referenced // FIXME: Not fully implemented yet! We need to have a better understanding // of when we're entering class MarkReferencedDecls : public RecursiveASTVisitor { Sema &S; SourceLocation Loc; public: typedef RecursiveASTVisitor Inherited; MarkReferencedDecls(Sema &S, SourceLocation Loc) : S(S), Loc(Loc) { } bool TraverseTemplateArgument(const TemplateArgument &Arg); bool TraverseRecordType(RecordType *T); }; } bool MarkReferencedDecls::TraverseTemplateArgument( const TemplateArgument &Arg) { if (Arg.getKind() == TemplateArgument::Declaration) { if (Decl *D = Arg.getAsDecl()) S.MarkAnyDeclReferenced(Loc, D, true); } return Inherited::TraverseTemplateArgument(Arg); } bool MarkReferencedDecls::TraverseRecordType(RecordType *T) { if (ClassTemplateSpecializationDecl *Spec = dyn_cast(T->getDecl())) { const TemplateArgumentList &Args = Spec->getTemplateArgs(); return TraverseTemplateArguments(Args.data(), Args.size()); } return true; } void Sema::MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T) { MarkReferencedDecls Marker(*this, Loc); Marker.TraverseType(Context.getCanonicalType(T)); } namespace { /// \brief Helper class that marks all of the declarations referenced by /// potentially-evaluated subexpressions as "referenced". class EvaluatedExprMarker : public EvaluatedExprVisitor { Sema &S; bool SkipLocalVariables; public: typedef EvaluatedExprVisitor Inherited; EvaluatedExprMarker(Sema &S, bool SkipLocalVariables) : Inherited(S.Context), S(S), SkipLocalVariables(SkipLocalVariables) { } void VisitDeclRefExpr(DeclRefExpr *E) { // If we were asked not to visit local variables, don't. if (SkipLocalVariables) { if (VarDecl *VD = dyn_cast(E->getDecl())) if (VD->hasLocalStorage()) return; } S.MarkDeclRefReferenced(E); } void VisitMemberExpr(MemberExpr *E) { S.MarkMemberReferenced(E); Inherited::VisitMemberExpr(E); } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { S.MarkFunctionReferenced(E->getLocStart(), const_cast(E->getTemporary()->getDestructor())); Visit(E->getSubExpr()); } void VisitCXXNewExpr(CXXNewExpr *E) { if (E->getOperatorNew()) S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorNew()); if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorDelete()); Inherited::VisitCXXNewExpr(E); } void VisitCXXDeleteExpr(CXXDeleteExpr *E) { if (E->getOperatorDelete()) S.MarkFunctionReferenced(E->getLocStart(), E->getOperatorDelete()); QualType Destroyed = S.Context.getBaseElementType(E->getDestroyedType()); if (const RecordType *DestroyedRec = Destroyed->getAs()) { CXXRecordDecl *Record = cast(DestroyedRec->getDecl()); S.MarkFunctionReferenced(E->getLocStart(), S.LookupDestructor(Record)); } Inherited::VisitCXXDeleteExpr(E); } void VisitCXXConstructExpr(CXXConstructExpr *E) { S.MarkFunctionReferenced(E->getLocStart(), E->getConstructor()); Inherited::VisitCXXConstructExpr(E); } void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Visit(E->getExpr()); } void VisitImplicitCastExpr(ImplicitCastExpr *E) { Inherited::VisitImplicitCastExpr(E); if (E->getCastKind() == CK_LValueToRValue) S.UpdateMarkingForLValueToRValue(E->getSubExpr()); } }; } /// \brief Mark any declarations that appear within this expression or any /// potentially-evaluated subexpressions as "referenced". /// /// \param SkipLocalVariables If true, don't mark local variables as /// 'referenced'. void Sema::MarkDeclarationsReferencedInExpr(Expr *E, bool SkipLocalVariables) { EvaluatedExprMarker(*this, SkipLocalVariables).Visit(E); } /// \brief Emit a diagnostic that describes an effect on the run-time behavior /// of the program being compiled. /// /// This routine emits the given diagnostic when the code currently being /// type-checked is "potentially evaluated", meaning that there is a /// possibility that the code will actually be executable. Code in sizeof() /// expressions, code used only during overload resolution, etc., are not /// potentially evaluated. This routine will suppress such diagnostics or, /// in the absolutely nutty case of potentially potentially evaluated /// expressions (C++ typeid), queue the diagnostic to potentially emit it /// later. /// /// This routine should be used for all diagnostics that describe the run-time /// behavior of a program, such as passing a non-POD value through an ellipsis. /// Failure to do so will likely result in spurious diagnostics or failures /// during overload resolution or within sizeof/alignof/typeof/typeid. bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement, const PartialDiagnostic &PD) { switch (ExprEvalContexts.back().Context) { case Unevaluated: case UnevaluatedAbstract: // The argument will never be evaluated, so don't complain. break; case ConstantEvaluated: // Relevant diagnostics should be produced by constant evaluation. break; case PotentiallyEvaluated: case PotentiallyEvaluatedIfUsed: if (Statement && getCurFunctionOrMethodDecl()) { FunctionScopes.back()->PossiblyUnreachableDiags. push_back(sema::PossiblyUnreachableDiag(PD, Loc, Statement)); } else Diag(Loc, PD); return true; } return false; } bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc, CallExpr *CE, FunctionDecl *FD) { if (ReturnType->isVoidType() || !ReturnType->isIncompleteType()) return false; // If we're inside a decltype's expression, don't check for a valid return // type or construct temporaries until we know whether this is the last call. if (ExprEvalContexts.back().IsDecltype) { ExprEvalContexts.back().DelayedDecltypeCalls.push_back(CE); return false; } class CallReturnIncompleteDiagnoser : public TypeDiagnoser { FunctionDecl *FD; CallExpr *CE; public: CallReturnIncompleteDiagnoser(FunctionDecl *FD, CallExpr *CE) : FD(FD), CE(CE) { } void diagnose(Sema &S, SourceLocation Loc, QualType T) override { if (!FD) { S.Diag(Loc, diag::err_call_incomplete_return) << T << CE->getSourceRange(); return; } S.Diag(Loc, diag::err_call_function_incomplete_return) << CE->getSourceRange() << FD->getDeclName() << T; S.Diag(FD->getLocation(), diag::note_entity_declared_at) << FD->getDeclName(); } } Diagnoser(FD, CE); if (RequireCompleteType(Loc, ReturnType, Diagnoser)) return true; return false; } // Diagnose the s/=/==/ and s/\|=/!=/ typos. Note that adding parentheses // will prevent this condition from triggering, which is what we want. void Sema::DiagnoseAssignmentAsCondition(Expr *E) { SourceLocation Loc; unsigned diagnostic = diag::warn_condition_is_assignment; bool IsOrAssign = false; if (BinaryOperator *Op = dyn_cast(E)) { if (Op->getOpcode() != BO_Assign && Op->getOpcode() != BO_OrAssign) return; IsOrAssign = Op->getOpcode() == BO_OrAssign; // Greylist some idioms by putting them into a warning subcategory. if (ObjCMessageExpr *ME = dyn_cast(Op->getRHS()->IgnoreParenCasts())) { Selector Sel = ME->getSelector(); // self = [ init...] if (isSelfExpr(Op->getLHS()) && ME->getMethodFamily() == OMF_init) diagnostic = diag::warn_condition_is_idiomatic_assignment; // = [ nextObject] else if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "nextObject") diagnostic = diag::warn_condition_is_idiomatic_assignment; } Loc = Op->getOperatorLoc(); } else if (CXXOperatorCallExpr *Op = dyn_cast(E)) { if (Op->getOperator() != OO_Equal && Op->getOperator() != OO_PipeEqual) return; IsOrAssign = Op->getOperator() == OO_PipeEqual; Loc = Op->getOperatorLoc(); } else if (PseudoObjectExpr *POE = dyn_cast(E)) return DiagnoseAssignmentAsCondition(POE->getSyntacticForm()); else { // Not an assignment. return; } Diag(Loc, diagnostic) << E->getSourceRange(); SourceLocation Open = E->getLocStart(); SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd()); Diag(Loc, diag::note_condition_assign_silence) << FixItHint::CreateInsertion(Open, "(") << FixItHint::CreateInsertion(Close, ")"); if (IsOrAssign) Diag(Loc, diag::note_condition_or_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "!="); else Diag(Loc, diag::note_condition_assign_to_comparison) << FixItHint::CreateReplacement(Loc, "=="); } /// \brief Redundant parentheses over an equality comparison can indicate /// that the user intended an assignment used as condition. void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) { // Don't warn if the parens came from a macro. SourceLocation parenLoc = ParenE->getLocStart(); if (parenLoc.isInvalid() || parenLoc.isMacroID()) return; // Don't warn for dependent expressions. if (ParenE->isTypeDependent()) return; Expr *E = ParenE->IgnoreParens(); if (BinaryOperator *opE = dyn_cast(E)) if (opE->getOpcode() == BO_EQ && opE->getLHS()->IgnoreParenImpCasts()->isModifiableLvalue(Context) == Expr::MLV_Valid) { SourceLocation Loc = opE->getOperatorLoc(); Diag(Loc, diag::warn_equality_with_extra_parens) << E->getSourceRange(); SourceRange ParenERange = ParenE->getSourceRange(); Diag(Loc, diag::note_equality_comparison_silence) << FixItHint::CreateRemoval(ParenERange.getBegin()) << FixItHint::CreateRemoval(ParenERange.getEnd()); Diag(Loc, diag::note_equality_comparison_to_assign) << FixItHint::CreateReplacement(Loc, "="); } } ExprResult Sema::CheckBooleanCondition(Expr *E, SourceLocation Loc) { DiagnoseAssignmentAsCondition(E); if (ParenExpr *parenE = dyn_cast(E)) DiagnoseEqualityWithExtraParens(parenE); ExprResult result = CheckPlaceholderExpr(E); if (result.isInvalid()) return ExprError(); E = result.get(); if (!E->isTypeDependent()) { if (getLangOpts().CPlusPlus) return CheckCXXBooleanCondition(E); // C++ 6.4p4 ExprResult ERes = DefaultFunctionArrayLvalueConversion(E); if (ERes.isInvalid()) return ExprError(); E = ERes.get(); QualType T = E->getType(); if (!T->isScalarType()) { // C99 6.8.4.1p1 Diag(Loc, diag::err_typecheck_statement_requires_scalar) << T << E->getSourceRange(); return ExprError(); } CheckBoolLikeConversion(E, Loc); } return E; } ExprResult Sema::ActOnBooleanCondition(Scope *S, SourceLocation Loc, Expr *SubExpr) { if (!SubExpr) return ExprError(); return CheckBooleanCondition(SubExpr, Loc); } namespace { /// A visitor for rebuilding a call to an __unknown_any expression /// to have an appropriate type. struct RebuildUnknownAnyFunction : StmtVisitor { Sema &S; RebuildUnknownAnyFunction(Sema &S) : S(S) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_call) << E->getSourceRange(); return ExprError(); } /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(S.Context.getPointerType(SubExpr->getType())); assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult resolveDecl(Expr *E, ValueDecl *VD) { if (!isa(VD)) return VisitExpr(E); E->setType(VD->getType()); assert(E->getValueKind() == VK_RValue); if (S.getLangOpts().CPlusPlus && !(isa(VD) && cast(VD)->isInstance())) E->setValueKind(VK_LValue); return E; } ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Given a function expression of unknown-any type, try to rebuild it /// to have a function type. static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *FunctionExpr) { ExprResult Result = RebuildUnknownAnyFunction(S).Visit(FunctionExpr); if (Result.isInvalid()) return ExprError(); return S.DefaultFunctionArrayConversion(Result.get()); } namespace { /// A visitor for rebuilding an expression of type __unknown_anytype /// into one which resolves the type directly on the referring /// expression. Strict preservation of the original source /// structure is not a goal. struct RebuildUnknownAnyExpr : StmtVisitor { Sema &S; /// The current destination type. QualType DestType; RebuildUnknownAnyExpr(Sema &S, QualType CastType) : S(S), DestType(CastType) {} ExprResult VisitStmt(Stmt *S) { llvm_unreachable("unexpected statement!"); } ExprResult VisitExpr(Expr *E) { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } ExprResult VisitCallExpr(CallExpr *E); ExprResult VisitObjCMessageExpr(ObjCMessageExpr *E); /// Rebuild an expression which simply semantically wraps another /// expression which it shares the type and value kind of. template ExprResult rebuildSugarExpr(T *E) { ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); Expr *SubExpr = SubResult.get(); E->setSubExpr(SubExpr); E->setType(SubExpr->getType()); E->setValueKind(SubExpr->getValueKind()); assert(E->getObjectKind() == OK_Ordinary); return E; } ExprResult VisitParenExpr(ParenExpr *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryExtension(UnaryOperator *E) { return rebuildSugarExpr(E); } ExprResult VisitUnaryAddrOf(UnaryOperator *E) { const PointerType *Ptr = DestType->getAs(); if (!Ptr) { S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof) << E->getSourceRange(); return ExprError(); } assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Build the sub-expression as if it were an object of the pointee type. DestType = Ptr->getPointeeType(); ExprResult SubResult = Visit(E->getSubExpr()); if (SubResult.isInvalid()) return ExprError(); E->setSubExpr(SubResult.get()); return E; } ExprResult VisitImplicitCastExpr(ImplicitCastExpr *E); ExprResult resolveDecl(Expr *E, ValueDecl *VD); ExprResult VisitMemberExpr(MemberExpr *E) { return resolveDecl(E, E->getMemberDecl()); } ExprResult VisitDeclRefExpr(DeclRefExpr *E) { return resolveDecl(E, E->getDecl()); } }; } /// Rebuilds a call expression which yielded __unknown_anytype. ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) { Expr *CalleeExpr = E->getCallee(); enum FnKind { FK_MemberFunction, FK_FunctionPointer, FK_BlockPointer }; FnKind Kind; QualType CalleeType = CalleeExpr->getType(); if (CalleeType == S.Context.BoundMemberTy) { assert(isa(E) || isa(E)); Kind = FK_MemberFunction; CalleeType = Expr::findBoundMemberType(CalleeExpr); } else if (const PointerType *Ptr = CalleeType->getAs()) { CalleeType = Ptr->getPointeeType(); Kind = FK_FunctionPointer; } else { CalleeType = CalleeType->castAs()->getPointeeType(); Kind = FK_BlockPointer; } const FunctionType *FnType = CalleeType->castAs(); // Verify that this is a legal result type of a function. if (DestType->isArrayType() || DestType->isFunctionType()) { unsigned diagID = diag::err_func_returning_array_function; if (Kind == FK_BlockPointer) diagID = diag::err_block_returning_array_function; S.Diag(E->getExprLoc(), diagID) << DestType->isFunctionType() << DestType; return ExprError(); } // Otherwise, go ahead and set DestType as the call's result. E->setType(DestType.getNonLValueExprType(S.Context)); E->setValueKind(Expr::getValueKindForType(DestType)); assert(E->getObjectKind() == OK_Ordinary); // Rebuild the function type, replacing the result type with DestType. const FunctionProtoType *Proto = dyn_cast(FnType); if (Proto) { // __unknown_anytype(...) is a special case used by the debugger when // it has no idea what a function's signature is. // // We want to build this call essentially under the K&R // unprototyped rules, but making a FunctionNoProtoType in C++ // would foul up all sorts of assumptions. However, we cannot // simply pass all arguments as variadic arguments, nor can we // portably just call the function under a non-variadic type; see // the comment on IR-gen's TargetInfo::isNoProtoCallVariadic. // However, it turns out that in practice it is generally safe to // call a function declared as "A foo(B,C,D);" under the prototype // "A foo(B,C,D,...);". The only known exception is with the // Windows ABI, where any variadic function is implicitly cdecl // regardless of its normal CC. Therefore we change the parameter // types to match the types of the arguments. // // This is a hack, but it is far superior to moving the // corresponding target-specific code from IR-gen to Sema/AST. ArrayRef ParamTypes = Proto->getParamTypes(); SmallVector ArgTypes; if (ParamTypes.empty() && Proto->isVariadic()) { // the special case ArgTypes.reserve(E->getNumArgs()); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Expr *Arg = E->getArg(i); QualType ArgType = Arg->getType(); if (E->isLValue()) { ArgType = S.Context.getLValueReferenceType(ArgType); } else if (E->isXValue()) { ArgType = S.Context.getRValueReferenceType(ArgType); } ArgTypes.push_back(ArgType); } ParamTypes = ArgTypes; } DestType = S.Context.getFunctionType(DestType, ParamTypes, Proto->getExtProtoInfo()); } else { DestType = S.Context.getFunctionNoProtoType(DestType, FnType->getExtInfo()); } // Rebuild the appropriate pointer-to-function type. switch (Kind) { case FK_MemberFunction: // Nothing to do. break; case FK_FunctionPointer: DestType = S.Context.getPointerType(DestType); break; case FK_BlockPointer: DestType = S.Context.getBlockPointerType(DestType); break; } // Finally, we can recurse. ExprResult CalleeResult = Visit(CalleeExpr); if (!CalleeResult.isUsable()) return ExprError(); E->setCallee(CalleeResult.get()); // Bind a temporary if necessary. return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitObjCMessageExpr(ObjCMessageExpr *E) { // Verify that this is a legal result type of a call. if (DestType->isArrayType() || DestType->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_func_returning_array_function) << DestType->isFunctionType() << DestType; return ExprError(); } // Rewrite the method result type if available. if (ObjCMethodDecl *Method = E->getMethodDecl()) { assert(Method->getReturnType() == S.Context.UnknownAnyTy); Method->setReturnType(DestType); } // Change the type of the message. E->setType(DestType.getNonReferenceType()); E->setValueKind(Expr::getValueKindForType(DestType)); return S.MaybeBindToTemporary(E); } ExprResult RebuildUnknownAnyExpr::VisitImplicitCastExpr(ImplicitCastExpr *E) { // The only case we should ever see here is a function-to-pointer decay. if (E->getCastKind() == CK_FunctionToPointerDecay) { assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); E->setType(DestType); // Rebuild the sub-expression as the pointee (function) type. DestType = DestType->castAs()->getPointeeType(); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else if (E->getCastKind() == CK_LValueToRValue) { assert(E->getValueKind() == VK_RValue); assert(E->getObjectKind() == OK_Ordinary); assert(isa(E->getType())); E->setType(DestType); // The sub-expression has to be a lvalue reference, so rebuild it as such. DestType = S.Context.getLValueReferenceType(DestType); ExprResult Result = Visit(E->getSubExpr()); if (!Result.isUsable()) return ExprError(); E->setSubExpr(Result.get()); return E; } else { llvm_unreachable("Unhandled cast type!"); } } ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) { ExprValueKind ValueKind = VK_LValue; QualType Type = DestType; // We know how to make this work for certain kinds of decls: // - functions if (FunctionDecl *FD = dyn_cast(VD)) { if (const PointerType *Ptr = Type->getAs()) { DestType = Ptr->getPointeeType(); ExprResult Result = resolveDecl(E, VD); if (Result.isInvalid()) return ExprError(); return S.ImpCastExprToType(Result.get(), Type, CK_FunctionToPointerDecay, VK_RValue); } if (!Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_function) << VD << E->getSourceRange(); return ExprError(); } if (const FunctionProtoType *FT = Type->getAs()) { // We must match the FunctionDecl's type to the hack introduced in // RebuildUnknownAnyExpr::VisitCallExpr to vararg functions of unknown // type. See the lengthy commentary in that routine. QualType FDT = FD->getType(); const FunctionType *FnType = FDT->castAs(); const FunctionProtoType *Proto = dyn_cast_or_null(FnType); DeclRefExpr *DRE = dyn_cast(E); if (DRE && Proto && Proto->getParamTypes().empty() && Proto->isVariadic()) { SourceLocation Loc = FD->getLocation(); FunctionDecl *NewFD = FunctionDecl::Create(FD->getASTContext(), FD->getDeclContext(), Loc, Loc, FD->getNameInfo().getName(), DestType, FD->getTypeSourceInfo(), SC_None, false/*isInlineSpecified*/, FD->hasPrototype(), false/*isConstexprSpecified*/); if (FD->getQualifier()) NewFD->setQualifierInfo(FD->getQualifierLoc()); SmallVector Params; for (const auto &AI : FT->param_types()) { ParmVarDecl *Param = S.BuildParmVarDeclForTypedef(FD, Loc, AI); Param->setScopeInfo(0, Params.size()); Params.push_back(Param); } NewFD->setParams(Params); DRE->setDecl(NewFD); VD = DRE->getDecl(); } } if (CXXMethodDecl *MD = dyn_cast(FD)) if (MD->isInstance()) { ValueKind = VK_RValue; Type = S.Context.BoundMemberTy; } // Function references aren't l-values in C. if (!S.getLangOpts().CPlusPlus) ValueKind = VK_RValue; // - variables } else if (isa(VD)) { if (const ReferenceType *RefTy = Type->getAs()) { Type = RefTy->getPointeeType(); } else if (Type->isFunctionType()) { S.Diag(E->getExprLoc(), diag::err_unknown_any_var_function_type) << VD << E->getSourceRange(); return ExprError(); } // - nothing else } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_decl) << VD << E->getSourceRange(); return ExprError(); } // Modifying the declaration like this is friendly to IR-gen but // also really dangerous. VD->setType(DestType); E->setType(Type); E->setValueKind(ValueKind); return E; } /// Check a cast of an unknown-any type. We intentionally only /// trigger this for C-style casts. ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType, Expr *CastExpr, CastKind &CastKind, ExprValueKind &VK, CXXCastPath &Path) { // Rewrite the casted expression from scratch. ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr); if (!result.isUsable()) return ExprError(); CastExpr = result.get(); VK = CastExpr->getValueKind(); CastKind = CK_NoOp; return CastExpr; } ExprResult Sema::forceUnknownAnyToType(Expr *E, QualType ToType) { return RebuildUnknownAnyExpr(*this, ToType).Visit(E); } ExprResult Sema::checkUnknownAnyArg(SourceLocation callLoc, Expr *arg, QualType ¶mType) { // If the syntactic form of the argument is not an explicit cast of // any sort, just do default argument promotion. ExplicitCastExpr *castArg = dyn_cast(arg->IgnoreParens()); if (!castArg) { ExprResult result = DefaultArgumentPromotion(arg); if (result.isInvalid()) return ExprError(); paramType = result.get()->getType(); return result; } // Otherwise, use the type that was written in the explicit cast. assert(!arg->hasPlaceholderType()); paramType = castArg->getTypeAsWritten(); // Copy-initialize a parameter of that type. InitializedEntity entity = InitializedEntity::InitializeParameter(Context, paramType, /*consumed*/ false); return PerformCopyInitialization(entity, callLoc, arg); } static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) { Expr *orig = E; unsigned diagID = diag::err_uncasted_use_of_unknown_any; while (true) { E = E->IgnoreParenImpCasts(); if (CallExpr *call = dyn_cast(E)) { E = call->getCallee(); diagID = diag::err_uncasted_call_of_unknown_any; } else { break; } } SourceLocation loc; NamedDecl *d; if (DeclRefExpr *ref = dyn_cast(E)) { loc = ref->getLocation(); d = ref->getDecl(); } else if (MemberExpr *mem = dyn_cast(E)) { loc = mem->getMemberLoc(); d = mem->getMemberDecl(); } else if (ObjCMessageExpr *msg = dyn_cast(E)) { diagID = diag::err_uncasted_call_of_unknown_any; loc = msg->getSelectorStartLoc(); d = msg->getMethodDecl(); if (!d) { S.Diag(loc, diag::err_uncasted_send_to_unknown_any_method) << static_cast(msg->isClassMessage()) << msg->getSelector() << orig->getSourceRange(); return ExprError(); } } else { S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr) << E->getSourceRange(); return ExprError(); } S.Diag(loc, diagID) << d << orig->getSourceRange(); // Never recoverable. return ExprError(); } /// Check for operands with placeholder types and complain if found. /// Returns true if there was an error and no recovery was possible. ExprResult Sema::CheckPlaceholderExpr(Expr *E) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it // doesn't handle dependent types properly, so make sure any TypoExprs have // been dealt with before checking the operands. ExprResult Result = CorrectDelayedTyposInExpr(E); if (!Result.isUsable()) return ExprError(); E = Result.get(); } const BuiltinType *placeholderType = E->getType()->getAsPlaceholderType(); if (!placeholderType) return E; switch (placeholderType->getKind()) { // Overloaded expressions. case BuiltinType::Overload: { // Try to resolve a single function template specialization. // This is obligatory. ExprResult result = E; if (ResolveAndFixSingleFunctionTemplateSpecialization(result, false)) { return result; // If that failed, try to recover with a call. } else { tryToRecoverWithCall(result, PDiag(diag::err_ovl_unresolvable), /*complain*/ true); return result; } } // Bound member functions. case BuiltinType::BoundMember: { ExprResult result = E; const Expr *BME = E->IgnoreParens(); PartialDiagnostic PD = PDiag(diag::err_bound_member_function); // Try to give a nicer diagnostic if it is a bound member that we recognize. if (isa(BME)) { PD = PDiag(diag::err_dtor_expr_without_call) << /*pseudo-destructor*/ 1; } else if (const auto *ME = dyn_cast(BME)) { if (ME->getMemberNameInfo().getName().getNameKind() == DeclarationName::CXXDestructorName) PD = PDiag(diag::err_dtor_expr_without_call) << /*destructor*/ 0; } tryToRecoverWithCall(result, PD, /*complain*/ true); return result; } // ARC unbridged casts. case BuiltinType::ARCUnbridgedCast: { Expr *realCast = stripARCUnbridgedCast(E); diagnoseARCUnbridgedCast(realCast); return realCast; } // Expressions of unknown type. case BuiltinType::UnknownAny: return diagnoseUnknownAnyExpr(*this, E); // Pseudo-objects. case BuiltinType::PseudoObject: return checkPseudoObjectRValue(E); case BuiltinType::BuiltinFn: { // Accept __noop without parens by implicitly converting it to a call expr. auto *DRE = dyn_cast(E->IgnoreParenImpCasts()); if (DRE) { auto *FD = cast(DRE->getDecl()); if (FD->getBuiltinID() == Builtin::BI__noop) { E = ImpCastExprToType(E, Context.getPointerType(FD->getType()), CK_BuiltinFnToFnPtr).get(); return new (Context) CallExpr(Context, E, None, Context.IntTy, VK_RValue, SourceLocation()); } } Diag(E->getLocStart(), diag::err_builtin_fn_use); return ExprError(); } // Expressions of unknown type. case BuiltinType::OMPArraySection: Diag(E->getLocStart(), diag::err_omp_array_section_use); return ExprError(); // Everything else should be impossible. #define BUILTIN_TYPE(Id, SingletonId) \ case BuiltinType::Id: #define PLACEHOLDER_TYPE(Id, SingletonId) #include "clang/AST/BuiltinTypes.def" break; } llvm_unreachable("invalid placeholder type!"); } bool Sema::CheckCaseExpression(Expr *E) { if (E->isTypeDependent()) return true; if (E->isValueDependent() || E->isIntegerConstantExpr(Context)) return E->getType()->isIntegralOrEnumerationType(); return false; } /// ActOnObjCBoolLiteral - Parse {__objc_yes,__objc_no} literals. ExprResult Sema::ActOnObjCBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { assert((Kind == tok::kw___objc_yes || Kind == tok::kw___objc_no) && "Unknown Objective-C Boolean value!"); QualType BoolT = Context.ObjCBuiltinBoolTy; if (!Context.getBOOLDecl()) { LookupResult Result(*this, &Context.Idents.get("BOOL"), OpLoc, Sema::LookupOrdinaryName); if (LookupName(Result, getCurScope()) && Result.isSingleResult()) { NamedDecl *ND = Result.getFoundDecl(); if (TypedefDecl *TD = dyn_cast(ND)) Context.setBOOLDecl(TD); } } if (Context.getBOOLDecl()) BoolT = Context.getBOOLType(); return new (Context) ObjCBoolLiteralExpr(Kind == tok::kw___objc_yes, BoolT, OpLoc); } Index: projects/clang380-import/contrib/llvm/tools/clang =================================================================== --- projects/clang380-import/contrib/llvm/tools/clang (revision 296010) +++ projects/clang380-import/contrib/llvm/tools/clang (revision 296011) Property changes on: projects/clang380-import/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/clang/dist:r295854-296008 Index: projects/clang380-import/contrib/llvm =================================================================== --- projects/clang380-import/contrib/llvm (revision 296010) +++ projects/clang380-import/contrib/llvm (revision 296011) Property changes on: projects/clang380-import/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/llvm/dist:r295854-296008 Index: projects/clang380-import/lib/clang/include/clang/Basic/Version.inc =================================================================== --- projects/clang380-import/lib/clang/include/clang/Basic/Version.inc (revision 296010) +++ projects/clang380-import/lib/clang/include/clang/Basic/Version.inc (revision 296011) @@ -1,10 +1,10 @@ /* $FreeBSD$ */ #define CLANG_VERSION 3.8.0 #define CLANG_VERSION_MAJOR 3 #define CLANG_VERSION_MINOR 8 #define CLANG_VERSION_PATCHLEVEL 0 #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "261369" +#define SVN_REVISION "261684"