Index: projects/clang400-import/contrib/compiler-rt =================================================================== --- projects/clang400-import/contrib/compiler-rt (revision 314176) +++ projects/clang400-import/contrib/compiler-rt (revision 314177) Property changes on: projects/clang400-import/contrib/compiler-rt ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/compiler-rt/dist:r313893-314176 Index: projects/clang400-import/contrib/libc++ =================================================================== --- projects/clang400-import/contrib/libc++ (revision 314176) +++ projects/clang400-import/contrib/libc++ (revision 314177) Property changes on: projects/clang400-import/contrib/libc++ ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/libc++/dist:r313894-314176 Index: projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp (revision 314177) @@ -1,826 +1,823 @@ #include "DwarfCompileUnit.h" #include "DwarfExpression.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" namespace llvm { DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID), Skeleton(nullptr), BaseAddress(nullptr) { insertDIE(Node, &getUnitDie()); MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin"); } /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { // Don't use the address pool in non-fission or in the skeleton unit itself. // FIXME: Once GDB supports this, it's probably worthwhile using the address // pool from the skeleton - maybe even in non-fission (possibly fewer // relocations by sharing them in the pool, but we have other ideas about how // to reduce the number of relocations as well/instead). if (!DD->useSplitDwarf() || !Skeleton) return addLocalLabelAddress(Die, Attribute, Label); if (Label) DD->addArangeLabel(SymbolCU(this, Label)); unsigned idx = DD->getAddressPool().getIndex(Label); Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index, DIEInteger(idx)); } void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { if (Label) DD->addArangeLabel(SymbolCU(this, Label)); if (Label) Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, DIELabel(Label)); else Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, DIEInteger(0)); } unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { // If we print assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. // FIXME: add a better feature test than hasRawTextSupport. Even better, // extend .file to support this. return Asm->OutStreamer->EmitDwarfFileDirective( 0, DirName, FileName, Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); } DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( const DIGlobalVariable *GV, ArrayRef GlobalExprs) { // Check for pre-existence. if (DIE *Die = getDIE(GV)) return Die; assert(GV); auto *GVContext = GV->getScope(); auto *GTy = DD->resolve(GV->getType()); // Construct the context before querying for the existence of the DIE in // case such construction creates the DIE. DIE *ContextDIE = getOrCreateContextDIE(GVContext); // Add to map. DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV); DIScope *DeclContext; if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) { DeclContext = resolve(SDMDecl->getScope()); assert(SDMDecl->isStaticMember() && "Expected static member decl"); assert(GV->isDefinition()); // We need the declaration DIE that is in the static member's class. DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); // If the global variable's type is different from the one in the class // member type, assume that it's more specific and also emit it. if (GTy != DD->resolve(SDMDecl->getBaseType())) addType(*VariableDIE, GTy); } else { DeclContext = GV->getScope(); // Add name and type. addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); addType(*VariableDIE, GTy); // Add scoping info. if (!GV->isLocalToUnit()) addFlag(*VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(*VariableDIE, GV); } if (!GV->isDefinition()) addFlag(*VariableDIE, dwarf::DW_AT_declaration); else addGlobalName(GV->getName(), *VariableDIE, DeclContext); if (uint32_t AlignInBytes = GV->getAlignInBytes()) addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, AlignInBytes); // Add location. bool addToAccelTable = false; DIELoc *Loc = nullptr; std::unique_ptr DwarfExpr; bool AllConstant = std::all_of( GlobalExprs.begin(), GlobalExprs.end(), [&](const GlobalExpr GE) { return GE.Expr && GE.Expr->isConstant(); }); for (const auto &GE : GlobalExprs) { const GlobalVariable *Global = GE.Var; const DIExpression *Expr = GE.Expr; // For compatibility with DWARF 3 and earlier, // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes // DW_AT_const_value(X). if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) { addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1)); // We cannot describe the location of dllimport'd variables: the // computation of their address requires loads from the IAT. } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) { if (!Loc) { Loc = new (DIEValueAllocator) DIELoc; DwarfExpr = llvm::make_unique(*Asm, *this, *Loc); } addToAccelTable = true; if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { if (Asm->TM.Options.EmulatedTLS) { // TODO: add debug info for emulated thread local mode. } else { // FIXME: Make this work with -gsplit-dwarf. unsigned PointerSize = Asm->getDataLayout().getPointerSize(); assert((PointerSize == 4 || PointerSize == 8) && "Add support for other sizes if necessary"); // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { // 1) Start with a constNu of the appropriate pointer size addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. addExpr(*Loc, dwarf::DW_FORM_udata, Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); addUInt(*Loc, dwarf::DW_FORM_udata, DD->getAddressPool().getIndex(Sym, /* TLS */ true)); } // 3) followed by an OP to make the debugger do a TLS lookup. addUInt(*Loc, dwarf::DW_FORM_data1, DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address : dwarf::DW_OP_form_tls_address); } } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); } } if (Expr) { DwarfExpr->addFragmentOffset(Expr); DwarfExpr->AddExpression(Expr); } } } if (Loc) addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); if (DD->useAllLinkageNames()) addLinkageName(*VariableDIE, GV->getLinkageName()); if (addToAccelTable) { DD->addAccelName(GV->getName(), *VariableDIE); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName()) DD->addAccelName(GV->getLinkageName(), *VariableDIE); } return VariableDIE; } void DwarfCompileUnit::addRange(RangeSpan Range) { bool SameAsPrevCU = this == DD->getPrevCU(); DD->setPrevCU(this); // If we have no current ranges just add the range and return, otherwise, // check the current section and CU against the previous section and CU we // emitted into and the subprogram was contained within. If these are the // same then extend our current range, otherwise add this as a new range. if (CURanges.empty() || !SameAsPrevCU || (&CURanges.back().getEnd()->getSection() != &Range.getEnd()->getSection())) { CURanges.push_back(Range); return; } CURanges.back().setEnd(Range.getEnd()); } DIE::value_iterator DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label, const MCSymbol *Sec) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) return addLabel(Die, Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4, Label); return addSectionDelta(Die, Attribute, Label, Sec); } void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. For split dwarf this is // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); StmtListValue = addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym, TLOF.getDwarfLineSection()->getBeginSymbol()); } void DwarfCompileUnit::applyStmtList(DIE &D) { D.addValue(DIEValueAllocator, *StmtListValue); } void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End) { assert(Begin && "Begin label should not be null!"); assert(End && "End label should not be null!"); assert(Begin->isDefined() && "Invalid starting label"); assert(End->isDefined() && "Invalid end label"); addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); if (DD->getDwarfVersion() < 4) addLabelAddress(D, dwarf::DW_AT_high_pc, End); else addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); } // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); if (DD->useAppleExtensionAttributes() && !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( *DD->getCurrentFunction())) addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); // Only include DW_AT_frame_base in full debug info if (!includeMinimalInlineScopes()) { const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); if (RI->isPhysicalRegister(Location.getReg())) addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); } // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_subprogram nodes. DD->addSubprogramNames(SP, *SPDie); return *SPDie; } // Construct a DIE for this scope. void DwarfCompileUnit::constructScopeDIE( LexicalScope *Scope, SmallVectorImpl &FinalChildren) { if (!Scope || !Scope->getScopeNode()) return; auto *DS = Scope->getScopeNode(); assert((Scope->getInlinedAt() || !isa(DS)) && "Only handle inlined subprograms here, use " "constructSubprogramScopeDIE for non-inlined " "subprograms"); SmallVector Children; // We try to create the scope DIE first, then the children DIEs. This will // avoid creating un-used children then removing them later when we find out // the scope DIE is null. DIE *ScopeDIE; if (Scope->getParent() && isa(DS)) { ScopeDIE = constructInlinedScopeDIE(Scope); if (!ScopeDIE) return; // We create children when the scope DIE is not null. createScopeChildrenDIE(Scope, Children); } else { // Early exit when we know the scope DIE is going to be null. if (DD->isLexicalScopeDIENull(Scope)) return; unsigned ChildScopeCount; // We create children here when we know the scope DIE is not going to be // null and the children will be added to the scope DIE. createScopeChildrenDIE(Scope, Children, &ChildScopeCount); // Skip imported directives in gmlt-like data. if (!includeMinimalInlineScopes()) { // There is no need to emit empty lexical block DIE. for (const auto *IE : ImportedEntities[DS]) Children.push_back( constructImportedEntityDIE(cast(IE))); } // If there are only other scopes as children, put them directly in the // parent instead, as this scope would serve no purpose. if (Children.size() == ChildScopeCount) { FinalChildren.insert(FinalChildren.end(), std::make_move_iterator(Children.begin()), std::make_move_iterator(Children.end())); return; } ScopeDIE = constructLexicalScopeDIE(Scope); assert(ScopeDIE && "Scope DIE should not be null."); } // Add children for (auto &I : Children) ScopeDIE->addChild(std::move(I)); FinalChildren.push_back(std::move(ScopeDIE)); } DIE::value_iterator DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { return Die.addValue(DIEValueAllocator, Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4, new (DIEValueAllocator) DIEDelta(Hi, Lo)); } void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector Range) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); // Emit offset in .debug_range as a relocatable label. emitDIE will handle // emitting it appropriately. const MCSymbol *RangeSectionSym = TLOF.getDwarfRangesSection()->getBeginSymbol(); RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. if (isDwoUnit()) addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), RangeSectionSym); else addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), RangeSectionSym); // Add the range list to the set of ranges to be emitted. (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); } void DwarfCompileUnit::attachRangesOrLowHighPC( DIE &Die, SmallVector Ranges) { if (Ranges.size() == 1) { const auto &single = Ranges.front(); attachLowHighPC(Die, single.getStart(), single.getEnd()); } else addScopeRangeList(Die, std::move(Ranges)); } void DwarfCompileUnit::attachRangesOrLowHighPC( DIE &Die, const SmallVectorImpl &Ranges) { SmallVector List; List.reserve(Ranges.size()); for (const InsnRange &R : Ranges) List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second))); attachRangesOrLowHighPC(Die, std::move(List)); } // This scope represents inlined body of a function. Construct DIE to // represent this concrete inlined copy of the function. DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { assert(Scope->getScopeNode()); auto *DS = Scope->getScopeNode(); auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); // Add the call site information to the DIE. const DILocation *IA = Scope->getInlinedAt(); addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, IA->getDiscriminator()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. DD->addSubprogramNames(InlinedSP, *ScopeDIE); return ScopeDIE; } // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) { if (DD->isLexicalScopeDIENull(Scope)) return nullptr; auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); return ScopeDIE; } /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) { auto D = constructVariableDIEImpl(DV, Abstract); DV.setDIE(*D); return D; } DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, bool Abstract) { // Define variable debug information entry. auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag()); if (Abstract) { applyVariableAttributes(DV, *VariableDie); return VariableDie; } // Add variable address. unsigned Offset = DV.getDebugLocListIndex(); if (Offset != ~0U) { addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); return VariableDie; } // Check if variable is described by a DBG_VALUE instruction. if (const MachineInstr *DVInsn = DV.getMInsn()) { assert(DVInsn->getNumOperands() == 4); if (DVInsn->getOperand(0).isReg()) { const MachineOperand RegOp = DVInsn->getOperand(0); // If the second operand is an immediate, this is an indirect value. if (DVInsn->getOperand(1).isImm()) { MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm()); addVariableAddress(DV, *VariableDie, Location); } else if (RegOp.getReg()) addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); } else if (DVInsn->getOperand(0).isImm()) { // This variable is described by a single constant. // Check whether it has a DIExpression. auto *Expr = DV.getSingleExpression(); if (Expr && Expr->getNumElements()) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); // If there is an expression, emit raw unsigned bytes. DwarfExpr.addFragmentOffset(Expr); DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm()); DwarfExpr.AddExpression(Expr); addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); } else addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); } else if (DVInsn->getOperand(0).isFPImm()) addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isCImm()) addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), DV.getType()); return VariableDie; } // .. else use frame index. - if (DV.getFrameIndex().empty()) + if (!DV.hasFrameIndexExprs()) return VariableDie; - auto Expr = DV.getExpression().begin(); DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - for (auto FI : DV.getFrameIndex()) { + for (auto &Fragment : DV.getFrameIndexExprs()) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); - DwarfExpr.addFragmentOffset(*Expr); + int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); + DwarfExpr.addFragmentOffset(Fragment.Expr); DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), FrameReg, Offset); - DwarfExpr.AddExpression(*Expr); - ++Expr; + DwarfExpr.AddExpression(Fragment.Expr); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); return VariableDie; } DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) { auto Var = constructVariableDIE(DV, Scope.isAbstractScope()); if (DV.isObjectPointer()) ObjectPointer = Var; return Var; } DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, SmallVectorImpl &Children, unsigned *ChildScopeCount) { DIE *ObjectPointer = nullptr; for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); unsigned ChildCountWithoutScopes = Children.size(); for (LexicalScope *LS : Scope->getChildren()) constructScopeDIE(LS, Children); if (ChildScopeCount) *ChildScopeCount = Children.size() - ChildCountWithoutScopes; return ObjectPointer; } void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) { DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); if (Scope) { assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a // block's synthetic this pointer. if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); } // If this is a variadic function, add an unspecified parameter. DITypeRefArray FnArgs = Sub->getType()->getTypeArray(); // If we have a single element of null, it is a function that returns void. // If we have more than one elements and the last one is null, it is a // variadic function. if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] && !includeMinimalInlineScopes()) ScopeDIE.addChild( DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters)); } DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE) { // We create children when the scope DIE is not null. SmallVector Children; DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); // Add children for (auto &I : Children) ScopeDIE.addChild(std::move(I)); return ObjectPointer; } void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; if (AbsDef) return; auto *SP = cast(Scope->getScopeNode()); DIE *ContextDIE; if (includeMinimalInlineScopes()) ContextDIE = &getUnitDie(); // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with // the important distinction that the debug node is not associated with the // DIE (since the debug node will be associated with the concrete DIE, if // any). It could be refactored to some common utility function. else if (auto *SPDecl = SP->getDeclaration()) { ContextDIE = &getUnitDie(); getOrCreateSubprogramDIE(SPDecl); } else ContextDIE = getOrCreateContextDIE(resolve(SP->getScope())); // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. AbsDef = &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); applySubprogramAttributesToDefinition(SP, *AbsDef); if (!includeMinimalInlineScopes()) addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef)) addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } DIE *DwarfCompileUnit::constructImportedEntityDIE( const DIImportedEntity *Module) { DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); insertDIE(Module, IMDie); DIE *EntityDie; auto *Entity = resolve(Module->getEntity()); if (auto *NS = dyn_cast(Entity)) EntityDie = getOrCreateNameSpace(NS); else if (auto *M = dyn_cast(Entity)) EntityDie = getOrCreateModule(M); else if (auto *SP = dyn_cast(Entity)) EntityDie = getOrCreateSubprogramDIE(SP); else if (auto *T = dyn_cast(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast(Entity)) EntityDie = getOrCreateGlobalVariableDIE(GV, {}); else EntityDie = getDIE(Entity); assert(EntityDie); addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(), Module->getScope()->getDirectory()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); return IMDie; } void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); } else { assert(D || includeMinimalInlineScopes()); if (D) // And attach the attributes applySubprogramAttributesToDefinition(SP, *D); } } void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. if (!Skeleton) { LabelBegin = Asm->createTempSymbol("cu_begin"); Asm->OutStreamer->EmitLabel(LabelBegin); } DwarfUnit::emitHeader(UseOffsets); } /// addGlobalName - Add a new global name to the compile unit. void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) { if (includeMinimalInlineScopes()) return; std::string FullName = getParentContextString(Context) + Name.str(); GlobalNames[FullName] = &Die; } /// Add a new global type to the unit. void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { if (includeMinimalInlineScopes()) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; } /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else addAddress(Die, dwarf::DW_AT_location, Location); } /// Add an address attribute to a die based on the location provided. void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression Expr(*Asm, *this, *Loc); bool validReg; if (Location.isReg()) validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(), Location.getReg()); else validReg = Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), Location.getReg(), Location.getOffset()); if (!validReg) return; // Now attach the location information to the DIE. addBlock(Die, Attribute, Expr.finalize()); } /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual variable given the extra /// address information encoded in the DbgVariable, starting from the starting /// location. Add the DWARF information to the die. void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); const DIExpression *Expr = DV.getSingleExpression(); DIExpressionCursor ExprCursor(Expr); const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); auto Reg = Location.getReg(); DwarfExpr.addFragmentOffset(Expr); bool ValidReg = Location.getOffset() ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset()) : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg); if (!ValidReg) return; DwarfExpr.AddExpression(std::move(ExprCursor)); // Now attach the location information to the DIE. addBlock(Die, Attribute, Loc); } /// Add a Dwarf loclistptr attribute data and value. void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index) { dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4; Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); } void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie) { StringRef Name = Var.getName(); if (!Name.empty()) addString(VariableDie, dwarf::DW_AT_name, Name); const auto *DIVar = Var.getVariable(); if (DIVar) if (uint32_t AlignInBytes = DIVar->getAlignInBytes()) addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, AlignInBytes); addSourceLine(VariableDie, DIVar); addType(VariableDie, Var.getType()); if (Var.isArtificial()) addFlag(VariableDie, dwarf::DW_AT_artificial); } /// Add a Dwarf expression attribute data and value. void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr)); } void DwarfCompileUnit::applySubprogramAttributesToDefinition( const DISubprogram *SP, DIE &SPDie) { auto *SPDecl = SP->getDeclaration(); auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope()); applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); addGlobalName(SP->getName(), SPDie, Context); } bool DwarfCompileUnit::isDwoUnit() const { return DD->useSplitDwarf() && Skeleton; } bool DwarfCompileUnit::includeMinimalInlineScopes() const { return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); } } // end llvm namespace Index: projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (revision 314177) @@ -1,2043 +1,2052 @@ //===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains support for writing dwarf debug info into asm files. // //===----------------------------------------------------------------------===// #include "DwarfDebug.h" #include "ByteStreamer.h" #include "DIEHash.h" #include "DebugLocEntry.h" #include "DwarfCompileUnit.h" #include "DwarfExpression.h" #include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "dwarfdebug" static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); static cl::opt GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, cl::desc("Generate GNU-style pubnames and pubtypes"), cl::init(false)); static cl::opt GenerateARangeSection("generate-arange-section", cl::Hidden, cl::desc("Generate dwarf aranges"), cl::init(false)); namespace { enum DefaultOnOff { Default, Enable, Disable }; } static cl::opt UnknownLocations( "use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::values(clEnumVal(Default, "At top of block or after label"), clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")), cl::init(Default)); static cl::opt DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::desc("Output prototype dwarf accelerator tables."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), cl::init(Default)); static cl::opt SplitDwarf("split-dwarf", cl::Hidden, cl::desc("Output DWARF5 split debug info."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), cl::init(Default)); static cl::opt DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, cl::desc("Generate DWARF pubnames and pubtypes sections"), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), cl::init(Default)); enum LinkageNameOption { DefaultLinkageNames, AllLinkageNames, AbstractLinkageNames }; static cl::opt DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, cl::desc("Which DWARF linkage-name attributes to emit."), cl::values(clEnumValN(DefaultLinkageNames, "Default", "Default for platform"), clEnumValN(AllLinkageNames, "All", "All"), clEnumValN(AbstractLinkageNames, "Abstract", "Abstract subprograms")), cl::init(DefaultLinkageNames)); static const char *const DWARFGroupName = "dwarf"; static const char *const DWARFGroupDescription = "DWARF Emission"; static const char *const DbgTimerName = "writer"; static const char *const DbgTimerDescription = "DWARF Debug Writer"; void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { BS.EmitInt8( Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) : dwarf::OperationEncodingString(Op)); } void DebugLocDwarfExpression::EmitSigned(int64_t Value) { BS.EmitSLEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) { BS.EmitULEB128(Value, Twine(Value)); } bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) { // This information is not available while emitting .debug_loc entries. return false; } //===----------------------------------------------------------------------===// bool DbgVariable::isBlockByrefVariable() const { assert(Var && "Invalid complex DbgVariable!"); return Var->getType().resolve()->isBlockByrefStruct(); } const DIType *DbgVariable::getType() const { DIType *Ty = Var->getType().resolve(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (Ty->isBlockByrefStruct()) { /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName either the struct, or a pointer to the struct, as its type. This is necessary for various behind-the-scenes things the compiler needs to do with by-reference variables in blocks. However, as far as the original *programmer* is concerned, the variable should still have type 'SomeType', as originally declared. The following function dives into the __Block_byref_x_VarName struct to find the original type of the variable. This will be passed back to the code generating the type for the Debug Information Entry for the variable 'VarName'. 'VarName' will then have the original type 'SomeType' in its debug information. The original type 'SomeType' will be the type of the field named 'VarName' inside the __Block_byref_x_VarName struct. NOTE: In order for this to not completely fail on the debugger side, the Debug Information Entry for the variable VarName needs to have a DW_AT_location that tells the debugger how to unwind through the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ DIType *subType = Ty; uint16_t tag = Ty->getTag(); if (tag == dwarf::DW_TAG_pointer_type) subType = resolve(cast(Ty)->getBaseType()); auto Elements = cast(subType)->getElements(); for (unsigned i = 0, N = Elements.size(); i < N; ++i) { auto *DT = cast(Elements[i]); if (getName() == DT->getName()) return resolve(DT->getBaseType()); } } return Ty; } +ArrayRef DbgVariable::getFrameIndexExprs() const { + std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), + [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { + return A.Expr->getFragmentInfo()->OffsetInBits < + B.Expr->getFragmentInfo()->OffsetInBits; + }); + return FrameIndexExprs; +} + static const DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()), InfoHolder(A, "info_string", DIEValueAllocator), SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(A->TM.getTargetTriple().isOSDarwin()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { CurFn = nullptr; const Triple &TT = Asm->TM.getTargetTriple(); // Make sure we know our "debugger tuning." The target option takes // precedence; fall back to triple-based defaults. if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default) DebuggerTuning = Asm->TM.Options.DebuggerTuning; else if (IsDarwin) DebuggerTuning = DebuggerKind::LLDB; else if (TT.isPS4CPU()) DebuggerTuning = DebuggerKind::SCE; else DebuggerTuning = DebuggerKind::GDB; // Turn on accelerator tables for LLDB by default. if (DwarfAccelTables == Default) HasDwarfAccelTables = tuneForLLDB(); else HasDwarfAccelTables = DwarfAccelTables == Enable; HasAppleExtensionAttributes = tuneForLLDB(); // Handle split DWARF. Off by default for now. if (SplitDwarf == Default) HasSplitDwarf = false; else HasSplitDwarf = SplitDwarf == Enable; // Pubnames/pubtypes on by default for GDB. if (DwarfPubSections == Default) HasDwarfPubSections = tuneForGDB(); else HasDwarfPubSections = DwarfPubSections == Enable; // SCE defaults to linkage names only for abstract subprograms. if (DwarfLinkageNames == DefaultLinkageNames) UseAllLinkageNames = !tuneForSCE(); else UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames; unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); // Use dwarf 4 by default if nothing is requested. DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; // Work around a GDB bug. GDB doesn't support the standard opcode; // SCE doesn't support GNU's; LLDB prefers the standard opcode, which // is defined as of DWARF 3. // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; // GDB does not fully support the DWARF 4 representation for bitfields. UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB(); Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); } // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. DwarfDebug::~DwarfDebug() { } static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } static bool hasObjCCategory(StringRef Name) { if (!isObjCClass(Name)) return false; return Name.find(") ") != StringRef::npos; } static void getObjCClassCategory(StringRef In, StringRef &Class, StringRef &Category) { if (!hasObjCCategory(In)) { Class = In.slice(In.find('[') + 1, In.find(' ')); Category = ""; return; } Class = In.slice(In.find('[') + 1, In.find('(')); Category = In.slice(In.find('[') + 1, In.find(' ')); } static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { if (!SP->isDefinition()) return; addAccelName(SP->getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName()) addAccelName(SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. if (isObjCClass(SP->getName())) { StringRef Class, Category; getObjCClassCategory(SP->getName(), Class, Category); addAccelObjC(Class, Die); if (Category != "") addAccelObjC(Category, Die); // Also add the base method name to the name table. addAccelName(getObjCMethodName(SP->getName()), Die); } } /// Check whether we should create a DIE for the given Scope, return true /// if we don't create a DIE (the corresponding DIE is null). bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { if (Scope->isAbstractScope()) return false; // We don't create a DIE if there is no Range. const SmallVectorImpl &Ranges = Scope->getRanges(); if (Ranges.empty()) return true; if (Ranges.size() > 1) return false; // We don't create a DIE if we have a single Range and the end label // is null. return !getLabelAfterInsn(Ranges.front().second); } template static void forBothCUs(DwarfCompileUnit &CU, Func F) { F(CU); if (auto *SkelCU = CU.getSkeleton()) if (CU.getCUNode()->getSplitDebugInlining()) F(*SkelCU); } void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); auto *SP = cast(Scope->getScopeNode()); ProcessedSPNodes.insert(SP); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. auto &CU = *CUMap.lookup(SP->getUnit()); forBothCUs(CU, [&](DwarfCompileUnit &CU) { CU.constructAbstractSubprogramScopeDIE(Scope); }); } void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { if (!GenerateGnuPubSections) return; U.addFlag(D, dwarf::DW_AT_GNU_pubnames); } // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { StringRef FN = DIUnit->getFilename(); CompilationDir = DIUnit->getDirectory(); auto OwnedUnit = make_unique( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); DwarfCompileUnit &NewCU = *OwnedUnit; DIE &Die = NewCU.getUnitDie(); InfoHolder.addUnit(std::move(OwnedUnit)); if (useSplitDwarf()) { NewCU.setSkeleton(constructSkeletonCU(NewCU)); NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, DIUnit->getSplitDebugFilename()); } // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the // compilation directory. if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) Asm->OutStreamer->getContext().setMCLineTableCompilationDir( NewCU.getUniqueID(), CompilationDir); NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer()); NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit->getSourceLanguage()); NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { NewCU.initStmtList(); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. if (!CompilationDir.empty()) NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); addGnuPubAttributes(NewCU, Die); } if (useAppleExtensionAttributes()) { if (DIUnit->isOptimized()) NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit->getFlags(); if (!Flags.empty()) NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); if (unsigned RVer = DIUnit->getRuntimeVersion()) NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); } if (useSplitDwarf()) NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); else NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); if (DIUnit->getDWOId()) { // This CU is either a clang module DWO or a skeleton CU. NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, DIUnit->getDWOId()); if (!DIUnit->getSplitDebugFilename().empty()) // This is a prefabricated skeleton CU. NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, DIUnit->getSplitDebugFilename()); } CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&Die, &NewCU}); return NewCU; } void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, const DIImportedEntity *N) { if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) D->addChild(TheCU.constructImportedEntityDIE(N)); } /// Sort and unique GVEs by comparing their fragment offset. static SmallVectorImpl & sortGlobalExprs(SmallVectorImpl &GVEs) { std::sort(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { if (A.Expr != B.Expr && A.Expr && B.Expr) { auto FragmentA = A.Expr->getFragmentInfo(); auto FragmentB = B.Expr->getFragmentInfo(); if (FragmentA && FragmentB) return FragmentA->OffsetInBits < FragmentB->OffsetInBits; } return false; }); GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { return A.Expr == B.Expr; }), GVEs.end()); return GVEs; } // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. void DwarfDebug::beginModule() { NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName, DWARFGroupDescription, TimePassesIsEnabled); if (DisableDebugInfoPrinting) return; const Module *M = MMI->getModule(); unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), M->debug_compile_units_end()); // Tell MMI whether we have debug info. MMI->setDebugInfoAvailability(NumDebugCUs > 0); SingleCU = NumDebugCUs == 1; DenseMap> GVMap; for (const GlobalVariable &Global : M->globals()) { SmallVector GVs; Global.getDebugInfo(GVs); for (auto *GVE : GVs) GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); } for (DICompileUnit *CUNode : M->debug_compile_units()) { DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) CU.addImportedEntity(IE); // Global Variables. for (auto *GVE : CUNode->getGlobalVariables()) GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()}); DenseSet Processed; for (auto *GVE : CUNode->getGlobalVariables()) { DIGlobalVariable *GV = GVE->getVariable(); if (Processed.insert(GV).second) CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); } for (auto *Ty : CUNode->getEnumTypes()) { // The enum types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. CU.getOrCreateTypeDIE(cast(Ty)); } for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. if (DIType *RT = dyn_cast(Ty)) if (!RT->isExternalTypeRef()) // There is no point in force-emitting a forward declaration. CU.getOrCreateTypeDIE(RT); } // Emit imported_modules last so that the relevant context is already // available. for (auto *IE : CUNode->getImportedEntities()) constructAndAddImportedEntityDIE(CU, IE); } } void DwarfDebug::finishVariableDefinitions() { for (const auto &Var : ConcreteVariables) { DIE *VariableDie = Var->getDIE(); assert(VariableDie); // FIXME: Consider the time-space tradeoff of just storing the unit pointer // in the ConcreteVariables list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie()); assert(Unit); DbgVariable *AbsVar = getExistingAbstractVariable( InlinedVariable(Var->getVariable(), Var->getInlinedAt())); if (AbsVar && AbsVar->getDIE()) { Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); } else Unit->applyVariableAttributes(*Var, *VariableDie); } } void DwarfDebug::finishSubprogramDefinitions() { for (const DISubprogram *SP : ProcessedSPNodes) if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); }); } void DwarfDebug::finalizeModuleInfo() { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); finishSubprogramDefinitions(); finishVariableDefinitions(); // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &P : CUMap) { auto &TheCU = *P.second; // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. TheCU.constructContainingTypeDIEs(); // Add CU specific attributes if we need to add any. // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); if (useSplitDwarf()) { // Emit a unique identifier for this CU. uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. if (!AddrPool.isEmpty()) { const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, Sym, Sym); } if (!SkCU->getRangeLists().empty()) { const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, Sym, Sym); } } // If we have code split among multiple sections or non-contiguous // ranges of code then emit a DW_AT_ranges attribute on the unit that will // remain in the .o file, otherwise add a DW_AT_low_pc. // FIXME: We should use ranges allow reordering of code ala // .subsections_via_symbols in mach-o. This would mean turning on // ranges for all subprogram DIEs for mach-o. DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; if (unsigned NumRanges = TheCU.getRanges().size()) { if (NumRanges > 1) // A DW_AT_low_pc attribute may also be specified in combination with // DW_AT_ranges to specify the default base address for use in // location lists (see Section 2.6.2) and range lists (see Section // 2.17.3). U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); else U.setBaseAddress(TheCU.getRanges().front().getStart()); U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } auto *CUNode = cast(P.first); // If compile Unit has macros, emit "DW_AT_macro_info" attribute. if (CUNode->getMacros()) U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, U.getMacroLabelBegin(), TLOF.getDwarfMacinfoSection()->getBeginSymbol()); } // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) SkeletonHolder.computeSizeAndOffsets(); } // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); // If we aren't actually generating debug info (check beginModule - // conditionalized on !DisableDebugInfoPrinting and the presence of the // llvm.dbg.cu metadata node) if (!MMI->hasDebugInfo()) return; // Finalize the debug info for the module. finalizeModuleInfo(); emitDebugStr(); if (useSplitDwarf()) emitDebugLocDWO(); else // Emit info into a debug loc section. emitDebugLoc(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); // Emit all the DIEs into a debug info section. emitDebugInfo(); // Emit info into a debug aranges section. if (GenerateARangeSection) emitDebugARanges(); // Emit info into a debug ranges section. emitDebugRanges(); // Emit info into a debug macinfo section. emitDebugMacinfo(); if (useSplitDwarf()) { emitDebugStrDWO(); emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); } // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { emitAccelNames(); emitAccelObjC(); emitAccelNamespaces(); emitAccelTypes(); } // Emit the pubnames and pubtypes sections if requested. if (HasDwarfPubSections) { emitDebugPubNames(GenerateGnuPubSections); emitDebugPubTypes(GenerateGnuPubSections); } // clean up. AbstractVariables.clear(); } // Find abstract variable, if any, associated with Var. DbgVariable * DwarfDebug::getExistingAbstractVariable(InlinedVariable IV, const DILocalVariable *&Cleansed) { // More then one inlined variable corresponds to one abstract variable. Cleansed = IV.first; auto I = AbstractVariables.find(Cleansed); if (I != AbstractVariables.end()) return I->second.get(); return nullptr; } DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { const DILocalVariable *Cleansed; return getExistingAbstractVariable(IV, Cleansed); } void DwarfDebug::createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope) { auto AbsDbgVariable = make_unique(Var, /* IA */ nullptr); InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); AbstractVariables[Var] = std::move(AbsDbgVariable); } void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV, const MDNode *ScopeNode) { const DILocalVariable *Cleansed = nullptr; if (getExistingAbstractVariable(IV, Cleansed)) return; createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( cast(ScopeNode))); } void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( InlinedVariable IV, const MDNode *ScopeNode) { const DILocalVariable *Cleansed = nullptr; if (getExistingAbstractVariable(IV, Cleansed)) return; if (LexicalScope *Scope = LScopes.findAbstractScope(cast_or_null(ScopeNode))) createAbstractVariable(Cleansed, Scope); } // Collect variable information from side table maintained by MF. void DwarfDebug::collectVariableInfoFromMFTable( DenseSet &Processed) { for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && "Expected inlined-at fields to agree"); InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt()); Processed.insert(Var); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. if (!Scope) continue; ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); auto RegVar = make_unique(Var.first, Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (InfoHolder.addScopeVariable(Scope, RegVar.get())) ConcreteVariables.push_back(std::move(RegVar)); } } // Get .debug_loc entry for the instruction range starting at MI. static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { const DIExpression *Expr = MI->getDebugExpression(); assert(MI->getNumOperands() == 4); if (MI->getOperand(0).isReg()) { MachineLocation MLoc; // If the second operand is an immediate, this is a // register-indirect address. if (!MI->getOperand(1).isImm()) MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); return DebugLocEntry::Value(Expr, MLoc); } if (MI->getOperand(0).isImm()) return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm()); if (MI->getOperand(0).isCImm()) return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm()); llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } /// \brief If this and Next are describing different fragments of the same /// variable, merge them by appending Next's values to the current /// list of values. /// Return true if the merge was successful. bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { auto *FirstExpr = cast(Values[0].Expression); auto *FirstNextExpr = cast(Next.Values[0].Expression); if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment()) return false; // We can only merge entries if none of the fragments overlap any others. // In doing so, we can take advantage of the fact that both lists are // sorted. for (unsigned i = 0, j = 0; i < Values.size(); ++i) { for (; j < Next.Values.size(); ++j) { int res = DebugHandlerBase::fragmentCmp( cast(Values[i].Expression), cast(Next.Values[j].Expression)); if (res == 0) // The two expressions overlap, we can't merge. return false; // Values[i] is entirely before Next.Values[j], // so go back to the next entry of Values. else if (res == -1) break; // Next.Values[j] is entirely before Values[i], so go on to the // next entry of Next.Values. } } addValues(Next.Values); End = Next.End; return true; } return false; } /// Build the location list for all DBG_VALUEs in the function that /// describe the same variable. If the ranges of several independent /// fragments of the same variable overlap partially, split them up and /// combine the ranges. The resulting DebugLocEntries are will have /// strict monotonically increasing begin addresses and will never /// overlap. // // Input: // // Ranges History [var, loc, fragment ofs size] // 0 | [x, (reg0, fragment 0, 32)] // 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry // 2 | | ... // 3 | [clobber reg0] // 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of // x. // // Output: // // [0-1] [x, (reg0, fragment 0, 32)] // [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)] // [3-4] [x, (reg1, fragment 32, 32)] // [4- ] [x, (mem, fragment 0, 64)] void DwarfDebug::buildLocationList(SmallVectorImpl &DebugLoc, const DbgValueHistoryMap::InstrRanges &Ranges) { SmallVector OpenRanges; for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MachineInstr *Begin = I->first; const MachineInstr *End = I->second; assert(Begin->isDebugValue() && "Invalid History entry"); // Check if a variable is inaccessible in this range. if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) { OpenRanges.clear(); continue; } // If this fragment overlaps with any open ranges, truncate them. const DIExpression *DIExpr = Begin->getDebugExpression(); auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) { return fragmentsOverlap(DIExpr, R.getExpression()); }); OpenRanges.erase(Last, OpenRanges.end()); const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); const MCSymbol *EndLabel; if (End != nullptr) EndLabel = getLabelAfterInsn(End); else if (std::next(I) == Ranges.end()) EndLabel = Asm->getFunctionEnd(); else EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); auto Value = getDebugLocValue(Begin); DebugLocEntry Loc(StartLabel, EndLabel, Value); bool couldMerge = false; // If this is a fragment, it may belong to the current DebugLocEntry. if (DIExpr->isFragment()) { // Add this value to the list of open ranges. OpenRanges.push_back(Value); // Attempt to add the fragment to the last entry. if (!DebugLoc.empty()) if (DebugLoc.back().MergeValues(Loc)) couldMerge = true; } if (!couldMerge) { // Need to add a new DebugLocEntry. Add all values from still // valid non-overlapping fragments. if (OpenRanges.size()) Loc.addValues(OpenRanges); DebugLoc.push_back(std::move(Loc)); } // Attempt to coalesce the ranges of two otherwise identical // DebugLocEntries. auto CurEntry = DebugLoc.rbegin(); DEBUG({ dbgs() << CurEntry->getValues().size() << " Values:\n"; for (auto &Value : CurEntry->getValues()) Value.dump(); dbgs() << "-----\n"; }); auto PrevEntry = std::next(CurEntry); if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) DebugLoc.pop_back(); } } DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope, InlinedVariable IV) { ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode()); ConcreteVariables.push_back(make_unique(IV.first, IV.second)); InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); return ConcreteVariables.back().get(); } // Determine whether this DBG_VALUE is valid at the beginning of the function. static bool validAtEntry(const MachineInstr *MInsn) { auto MBB = MInsn->getParent(); // Is it in the entry basic block? if (!MBB->pred_empty()) return false; for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I) if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup))) return false; return true; } // Find variables for each lexical scope. void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMFTable(Processed); for (const auto &I : DbgValues) { InlinedVariable IV = I.first; if (Processed.count(IV)) continue; // Instruction ranges, specifying where IV is accessible. const auto &Ranges = I.second; if (Ranges.empty()) continue; LexicalScope *Scope = nullptr; if (const DILocation *IA = IV.second) Scope = LScopes.findInlinedScope(IV.first->getScope(), IA); else Scope = LScopes.findLexicalScope(IV.first->getScope()); // If variable scope is not found then skip this variable. if (!Scope) continue; Processed.insert(IV); DbgVariable *RegVar = createConcreteVariable(*Scope, IV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); // Check if there is a single DBG_VALUE, valid throughout the function. // A single constant is also considered valid for the entire function. if (Ranges.size() == 1 && (MInsn->getOperand(0).isImm() || (validAtEntry(MInsn) && Ranges.front().second == nullptr))) { RegVar->initializeDbgValue(MInsn); continue; } // Handle multiple DBG_VALUE instructions describing one variable. DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn); // Build the location list for this variable. SmallVector Entries; buildLocationList(Entries, Ranges); // If the variable has a DIBasicType, extract it. Basic types cannot have // unique identifiers, so don't bother resolving the type with the // identifier map. const DIBasicType *BT = dyn_cast( static_cast(IV.first->getType())); // Finalize the entry by lowering it into a DWARF bytestream. for (auto &Entry : Entries) Entry.finalize(*Asm, List, BT); } // Collect info for variables that were optimized out. for (const DILocalVariable *DV : SP->getVariables()) { if (Processed.insert(InlinedVariable(DV, nullptr)).second) if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) createConcreteVariable(*Scope, InlinedVariable(DV, nullptr)); } } // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); // Check if source location changes, but ignore DBG_VALUE and CFI locations. if (MI->isDebugValue() || MI->isCFIInstruction()) return; const DebugLoc &DL = MI->getDebugLoc(); // When we emit a line-0 record, we don't update PrevInstLoc; so look at // the last line number actually emitted, to see if it was line 0. unsigned LastAsmLine = Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine(); if (DL == PrevInstLoc) { // If we have an ongoing unspecified location, nothing to do here. if (!DL) return; // We have an explicit location, same as the previous location. // But we might be coming back to it after a line 0 record. if (LastAsmLine == 0 && DL.getLine() != 0) { // Reinstate the source location but not marked as a statement. const MDNode *Scope = DL.getScope(); recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0); } return; } if (!DL) { // We have an unspecified location, which might want to be line 0. // If we have already emitted a line-0 record, don't repeat it. if (LastAsmLine == 0) return; // If user said Don't Do That, don't do that. if (UnknownLocations == Disable) return; // See if we have a reason to emit a line-0 record now. // Reasons to emit a line-0 record include: // - User asked for it (UnknownLocations). // - Instruction has a label, so it's referenced from somewhere else, // possibly debug information; we want it to have a source location. // - Instruction is at the top of a block; we don't want to inherit the // location from the physically previous (maybe unrelated) block. if (UnknownLocations == Enable || PrevLabel || (PrevInstBB && PrevInstBB != MI->getParent())) { // Preserve the file and column numbers, if we can, to save space in // the encoded line table. // Do not update PrevInstLoc, it remembers the last non-0 line. const MDNode *Scope = nullptr; unsigned Column = 0; if (PrevInstLoc) { Scope = PrevInstLoc.getScope(); Column = PrevInstLoc.getCol(); } recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0); } return; } // We have an explicit location, different from the previous location. // Don't repeat a line-0 record, but otherwise emit the new location. // (The new location might be an explicit line 0, which we do emit.) if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0) return; unsigned Flags = 0; if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT; PrologEndLoc = DebugLoc(); } // If the line changed, we call that a new statement; unless we went to // line 0 and came back, in which case it is not a new statement. unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine; if (DL.getLine() && DL.getLine() != OldLine) Flags |= DWARF2_FLAG_IS_STMT; const MDNode *Scope = DL.getScope(); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); // If we're not at line 0, remember this location. if (DL.getLine()) PrevInstLoc = DL; } static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) return MI.getDebugLoc(); return DebugLoc(); } // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { CurFn = MF; // If there's no debug info for the function we're not going to do anything. if (!MMI->hasDebugInfo()) return; auto DI = MF->getFunction()->getSubprogram(); if (!DI) return; // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. DebugHandlerBase::beginFunction(MF); if (LScopes.empty()) return; // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); // FnScope->getScopeNode() and DI->second should represent the same function, // though they may not be the same MDNode due to inline functions merged in // LTO where the debug info metadata still differs (either due to distinct // written differences - two versions of a linkonce_odr function // written/copied into two separate files, or some sub-optimal metadata that // isn't structurally identical (see: file path/name info from clang, which // includes the directory of the cpp file being built, even when the file name // is absolute (such as an <> lookup header))) auto *SP = cast(FnScope->getScopeNode()); DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit()); if (!TheCU) { assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug && "DICompileUnit missing from llvm.dbg.cu?"); return; } if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); if (DILocation *L = PrologEndLoc) { // We'd like to list the prologue as "not statements" but GDB behaves // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. auto *SP = L->getInlinedAtScope()->getSubprogram(); recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); } } // Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { assert(CurFn == MF && "endFunction should be called with the same function as beginFunction"); const DISubprogram *SP = MF->getFunction()->getSubprogram(); if (!MMI->hasDebugInfo() || !SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) { // If we don't have a subprogram for this function then there will be a hole // in the range information. Keep note of this by setting the previously // used section to nullptr. PrevCU = nullptr; CurFn = nullptr; DebugHandlerBase::endFunction(MF); return; } // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); assert(!FnScope || SP == FnScope->getScopeNode()); DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit()); DenseSet ProcessedVars; collectVariableInfo(TheCU, SP, ProcessedVars); // Add the range of this function to the list of ranges for the CU. TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly && LScopes.getAbstractScopesList().empty() && !IsDarwin) { assert(InfoHolder.getScopeVariables().empty()); assert(DbgValues.empty()); // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed // by a -gmlt CU. Add a test and remove this assertion. assert(AbstractVariables.empty()); PrevLabel = nullptr; CurFn = nullptr; DebugHandlerBase::endFunction(MF); return; } #ifndef NDEBUG size_t NumAbstractScopes = LScopes.getAbstractScopesList().size(); #endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { auto *SP = cast(AScope->getScopeNode()); // Collect info for variables that were optimized out. for (const DILocalVariable *DV : SP->getVariables()) { if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) continue; ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr), DV->getScope()); assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes && "ensureAbstractVariableIsCreated inserted abstract scopes"); } constructAbstractSubprogramScopeDIE(AScope); } ProcessedSPNodes.insert(SP); TheCU.constructSubprogramScopeDIE(SP, FnScope); if (auto *SkelCU = TheCU.getSkeleton()) if (!LScopes.getAbstractScopesList().empty() && TheCU.getCUNode()->getSplitDebugInlining()) SkelCU->constructSubprogramScopeDIE(SP, FnScope); // Clear debug info // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) InfoHolder.getScopeVariables().clear(); PrevLabel = nullptr; CurFn = nullptr; DebugHandlerBase::endFunction(MF); } // Register a source line with debug info. Returns the unique label that was // emitted and which provides correspondence to the source line list. void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, unsigned Flags) { StringRef Fn; StringRef Dir; unsigned Src = 1; unsigned Discriminator = 0; if (auto *Scope = cast_or_null(S)) { Fn = Scope->getFilename(); Dir = Scope->getDirectory(); if (auto *LBF = dyn_cast(Scope)) if (getDwarfVersion() >= 4) Discriminator = LBF->getDiscriminator(); unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); Src = static_cast(*InfoHolder.getUnits()[CUID]) .getOrCreateSourceID(Fn, Dir); } Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0, Discriminator, Fn); } //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.emitUnits(/* UseOffsets */ false); } // Emit the abbreviation section. void DwarfDebug::emitAbbreviations() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section, StringRef TableName) { Accel.FinalizeTable(Asm, TableName); Asm->OutStreamer->SwitchSection(Section); // Emit the full data. Accel.emit(Asm, Section->getBeginSymbol(), this); } // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), "Names"); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), "ObjC"); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { emitAccel(AccelNamespace, Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), "namespac"); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), "types"); } // Public name handling. // The format for the various pubnames: // // dwarf pubnames - offset/name pairs where the offset is the offset into the CU // for the DIE that is named. // // gnu pubnames - offset/index value/name tuples where the offset is the offset // into the CU and the index value is computed according to the type of value // for the DIE that is named. // // For type units the offset is the offset of the skeleton DIE. For split dwarf // it's the offset within the debug_info/debug_types dwo section, however, the // reference in the pubname header doesn't change. /// computeIndexValue - Compute the gdb index value for the DIE and CU. static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, const DIE *Die) { dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; // We could have a specification DIE that has our most of our knowledge, // look for that now. if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) { DIE &SpecDIE = SpecVal.getDIEEntry().getEntry(); if (SpecDIE.findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; } else if (Die->findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; switch (Die->getTag()) { case dwarf::DW_TAG_class_type: case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_enumeration_type: return dwarf::PubIndexEntryDescriptor( dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus ? dwarf::GIEL_STATIC : dwarf::GIEL_EXTERNAL); case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); case dwarf::DW_TAG_namespace: return dwarf::GIEK_TYPE; case dwarf::DW_TAG_subprogram: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); case dwarf::DW_TAG_variable: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); case dwarf::DW_TAG_enumerator: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, dwarf::GIEL_STATIC); default: return dwarf::GIEK_NONE; } } /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames(bool GnuStyle) { MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection(); emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfCompileUnit::getGlobalNames); } void DwarfDebug::emitDebugPubSection( bool GnuStyle, MCSection *PSec, StringRef Name, const StringMap &(DwarfCompileUnit::*Accessor)() const) { for (const auto &NU : CUMap) { DwarfCompileUnit *TheU = NU.second; const auto &Globals = (TheU->*Accessor)(); if (Globals.empty()) continue; if (auto *Skeleton = TheU->getSkeleton()) TheU = Skeleton; // Start the dwarf pubnames section. Asm->OutStreamer->SwitchSection(PSec); // Emit the header. Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); Asm->OutStreamer->EmitLabel(BeginLabel); Asm->OutStreamer->AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); Asm->emitDwarfSymbolReference(TheU->getLabelBegin()); Asm->OutStreamer->AddComment("Compilation Unit Length"); Asm->EmitInt32(TheU->getLength()); // Emit the pubnames for this compilation unit. for (const auto &GI : Globals) { const char *Name = GI.getKeyData(); const DIE *Entity = GI.second; Asm->OutStreamer->AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); if (GnuStyle) { dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer->AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->EmitInt8(Desc.toBits()); } Asm->OutStreamer->AddComment("External Name"); Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); } Asm->OutStreamer->AddComment("End Mark"); Asm->EmitInt32(0); Asm->OutStreamer->EmitLabel(EndLabel); } } void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection(); emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfCompileUnit::getGlobalTypes); } /// Emit null-terminated strings into a debug str section. void DwarfDebug::emitDebugStr() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocStream::Entry &Entry) { auto &&Comments = DebugLocs.getComments(Entry); auto Comment = Comments.begin(); auto End = Comments.end(); for (uint8_t Byte : DebugLocs.getBytes(Entry)) Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : ""); } static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, ByteStreamer &Streamer, const DebugLocEntry::Value &Value, DwarfExpression &DwarfExpr) { DIExpressionCursor ExprCursor(Value.getExpression()); DwarfExpr.addFragmentOffset(Value.getExpression()); // Regular entry. if (Value.isInt()) { if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || BT->getEncoding() == dwarf::DW_ATE_signed_char)) DwarfExpr.AddSignedConstant(Value.getInt()); else DwarfExpr.AddUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); if (Loc.getOffset()) DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset()); else DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg()); } else if (Value.isConstantFP()) { APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); DwarfExpr.AddUnsignedConstant(RawBytes); } DwarfExpr.AddExpression(std::move(ExprCursor)); } void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List, const DIBasicType *BT) { DebugLocStream::EntryBuilder Entry(List, Begin, End); BufferByteStreamer Streamer = Entry.getStreamer(); DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer); const DebugLocEntry::Value &Value = Values[0]; if (Value.isFragment()) { // Emit all fragments that belong to the same variable and range. assert(all_of(Values, [](DebugLocEntry::Value P) { return P.isFragment(); }) && "all values are expected to be fragments"); assert(std::is_sorted(Values.begin(), Values.end()) && "fragments are expected to be sorted"); for (auto Fragment : Values) emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr); } else { assert(Values.size() == 1 && "only fragments may have >1 value"); emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr); } DwarfExpr.finalize(); } void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { // Emit the size. Asm->OutStreamer->AddComment("Loc expr size"); Asm->EmitInt16(DebugLocs.getBytes(Entry).size()); // Emit the entry. APByteStreamer Streamer(*Asm); emitDebugLocEntry(Streamer, Entry); } // Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->EmitLabel(List.Label); const DwarfCompileUnit *CU = List.CU; for (const auto &Entry : DebugLocs.getEntries(List)) { // Set up the range. This range is relative to the entry point of the // compile unit. This is a hard coded 0 for low_pc when we're emitting // ranges, or the DW_AT_low_pc on the compile unit otherwise. if (auto *Base = CU->getBaseAddress()) { Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); Asm->EmitLabelDifference(Entry.EndSym, Base, Size); } else { Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); } emitDebugLocEntryLocation(Entry); } Asm->OutStreamer->EmitIntValue(0, Size); Asm->OutStreamer->EmitIntValue(0, Size); } } void DwarfDebug::emitDebugLocDWO() { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocDWOSection()); for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->EmitLabel(List.Label); for (const auto &Entry : DebugLocs.getEntries(List)) { // Just always use start_length for now - at least that's one address // rather than two. We could get fancier and try to, say, reuse an // address we know we've emitted elsewhere (the start of the function? // The start of the CU or CU subrange that encloses this range?) Asm->EmitInt8(dwarf::DW_LLE_startx_length); unsigned idx = AddrPool.getIndex(Entry.BeginSym); Asm->EmitULEB128(idx); Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); emitDebugLocEntryLocation(Entry); } Asm->EmitInt8(dwarf::DW_LLE_end_of_list); } } struct ArangeSpan { const MCSymbol *Start, *End; }; // Emit a debug aranges section, containing a CU lookup for any // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Provides a unique id per text section. MapVector> SectionMap; // Filter labels by section. for (const SymbolCU &SCU : ArangeLabels) { if (SCU.Sym->isInSection()) { // Make a note of this symbol and it's section. MCSection *Section = &SCU.Sym->getSection(); if (!Section->getKind().isMetadata()) SectionMap[Section].push_back(SCU); } else { // Some symbols (e.g. common/bss on mach-o) can have no section but still // appear in the output. This sucks as we rely on sections to build // arange spans. We can do it without, but it's icky. SectionMap[nullptr].push_back(SCU); } } DenseMap> Spans; for (auto &I : SectionMap) { MCSection *Section = I.first; SmallVector &List = I.second; if (List.size() < 1) continue; // If we have no section (e.g. common), just write out // individual spans for each symbol. if (!Section) { for (const SymbolCU &Cur : List) { ArangeSpan Span; Span.Start = Cur.Sym; Span.End = nullptr; assert(Cur.CU); Spans[Cur.CU].push_back(Span); } continue; } // Sort the symbols by offset within the section. std::sort( List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) { unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; // Symbols with no order assigned should be placed at the end. // (e.g. section end labels) if (IA == 0) return false; if (IB == 0) return true; return IA < IB; }); // Insert a final terminator. List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section))); // Build spans between each label. const MCSymbol *StartSym = List[0].Sym; for (size_t n = 1, e = List.size(); n < e; n++) { const SymbolCU &Prev = List[n - 1]; const SymbolCU &Cur = List[n]; // Try and build the longest span we can within the same CU. if (Cur.CU != Prev.CU) { ArangeSpan Span; Span.Start = StartSym; Span.End = Cur.Sym; assert(Prev.CU); Spans[Prev.CU].push_back(Span); StartSym = Cur.Sym; } } } // Start the dwarf aranges section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfARangesSection()); unsigned PtrSize = Asm->getDataLayout().getPointerSize(); // Build a list of CUs used. std::vector CUs; for (const auto &it : Spans) { DwarfCompileUnit *CU = it.first; CUs.push_back(CU); } // Sort the CU list (again, to ensure consistent output order). std::sort(CUs.begin(), CUs.end(), [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { return A->getUniqueID() < B->getUniqueID(); }); // Emit an arange table for each CU we used. for (DwarfCompileUnit *CU : CUs) { std::vector &List = Spans[CU]; // Describe the skeleton CU's offset and length, not the dwo file's. if (auto *Skel = CU->getSkeleton()) CU = Skel; // Emit size of content not including length itself. unsigned ContentSize = sizeof(int16_t) + // DWARF ARange version number sizeof(int32_t) + // Offset of CU in the .debug_info section sizeof(int8_t) + // Pointer Size (in bytes) sizeof(int8_t); // Segment Size (in bytes) unsigned TupleSize = PtrSize * 2; // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. unsigned Padding = OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; // For each compile unit, write the list of spans it covers. Asm->OutStreamer->AddComment("Length of ARange Set"); Asm->EmitInt32(ContentSize); Asm->OutStreamer->AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); Asm->emitDwarfSymbolReference(CU->getLabelBegin()); Asm->OutStreamer->AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer->AddComment("Segment Size (in bytes)"); Asm->EmitInt8(0); Asm->OutStreamer->emitFill(Padding, 0xff); for (const ArangeSpan &Span : List) { Asm->EmitLabelReference(Span.Start, PtrSize); // Calculate the size as being from the span start to it's end. if (Span.End) { Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); } else { // For symbols without an end marker (e.g. common), we // write a single arange entry containing just that one symbol. uint64_t Size = SymSize[Span.Start]; if (Size == 0) Size = 1; Asm->OutStreamer->EmitIntValue(Size, PtrSize); } } Asm->OutStreamer->AddComment("ARange terminator"); Asm->OutStreamer->EmitIntValue(0, PtrSize); Asm->OutStreamer->EmitIntValue(0, PtrSize); } } /// Emit address ranges into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); // Size for our labels. unsigned char Size = Asm->getDataLayout().getPointerSize(); // Grab the specific ranges for the compile units in the module. for (const auto &I : CUMap) { DwarfCompileUnit *TheCU = I.second; if (auto *Skel = TheCU->getSkeleton()) TheCU = Skel; // Iterate over the misc ranges for the compile units in the module. for (const RangeSpanList &List : TheCU->getRangeLists()) { // Emit our symbol so we can find the beginning of the range. Asm->OutStreamer->EmitLabel(List.getSym()); for (const RangeSpan &Range : List.getRanges()) { const MCSymbol *Begin = Range.getStart(); const MCSymbol *End = Range.getEnd(); assert(Begin && "Range without a begin symbol?"); assert(End && "Range without an end symbol?"); if (auto *Base = TheCU->getBaseAddress()) { Asm->EmitLabelDifference(Begin, Base, Size); Asm->EmitLabelDifference(End, Base, Size); } else { Asm->OutStreamer->EmitSymbolValue(Begin, Size); Asm->OutStreamer->EmitSymbolValue(End, Size); } } // And terminate the list with two 0 values. Asm->OutStreamer->EmitIntValue(0, Size); Asm->OutStreamer->EmitIntValue(0, Size); } } } void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { for (auto *MN : Nodes) { if (auto *M = dyn_cast(MN)) emitMacro(*M); else if (auto *F = dyn_cast(MN)) emitMacroFile(*F, U); else llvm_unreachable("Unexpected DI type!"); } } void DwarfDebug::emitMacro(DIMacro &M) { Asm->EmitULEB128(M.getMacinfoType()); Asm->EmitULEB128(M.getLine()); StringRef Name = M.getName(); StringRef Value = M.getValue(); Asm->OutStreamer->EmitBytes(Name); if (!Value.empty()) { // There should be one space between macro name and macro value. Asm->EmitInt8(' '); Asm->OutStreamer->EmitBytes(Value); } Asm->EmitInt8('\0'); } void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); Asm->EmitULEB128(dwarf::DW_MACINFO_start_file); Asm->EmitULEB128(F.getLine()); DIFile *File = F.getFile(); unsigned FID = U.getOrCreateSourceID(File->getFilename(), File->getDirectory()); Asm->EmitULEB128(FID); handleMacroNodes(F.getElements(), U); Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); } /// Emit macros into a debug macinfo section. void DwarfDebug::emitDebugMacinfo() { // Start the dwarf macinfo section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfMacinfoSection()); for (const auto &P : CUMap) { auto &TheCU = *P.second; auto *SkCU = TheCU.getSkeleton(); DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; auto *CUNode = cast(P.first); Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); handleMacroNodes(CUNode->getMacros(), U); } Asm->OutStreamer->AddComment("End Of Macro List Mark"); Asm->EmitInt8(0); } // DWARF5 Experimental Separate Dwarf emitters. void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, U.getCUNode()->getSplitDebugFilename()); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); addGnuPubAttributes(*NewU, Die); SkeletonHolder.addUnit(std::move(NewU)); } // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, // DW_AT_addr_base, DW_AT_ranges_base. DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = make_unique( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); NewCU.initStmtList(); initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); return NewCU; } // Emit the .debug_info.dwo section for separated dwarf. This contains the // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); // Don't emit relocations into the dwo file. InfoHolder.emitUnits(/* UseOffsets */ true); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the // abbreviations for the .debug_info.dwo section. void DwarfDebug::emitDebugAbbrevDWO() { assert(useSplitDwarf() && "No split dwarf?"); InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection()); } void DwarfDebug::emitDebugLineDWO() { assert(useSplitDwarf() && "No split dwarf?"); Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLineDWOSection()); SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams()); } // Emit the .debug_str.dwo section for separated dwarf. This contains the // string section and is identical in format to traditional .debug_str // sections. void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), OffSec); } MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { if (!useSplitDwarf()) return nullptr; if (SingleCU) SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory()); return &SplitTypeUnitFileTable; } uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { MD5 Hash; Hash.update(Identifier); // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. MD5::MD5Result Result; Hash.final(Result); return support::endian::read64le(Result + 8); } void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &RefDie, const DICompositeType *CTy) { // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) return; auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); if (!Ins.second) { CU.addDIETypeSignature(RefDie, Ins.first->second); return; } bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); auto OwnedUnit = make_unique(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy); NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, CU.getLanguage()); uint64_t Signature = makeTypeSignature(Identifier); NewTU.setTypeSignature(Signature); Ins.first->second = Signature; if (useSplitDwarf()) NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); else { CU.applyStmtList(UnitDie); NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature)); } NewTU.setType(NewTU.createTypeDIE(CTy)); if (TopLevelType) { auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction); TypeUnitsUnderConstruction.clear(); // Types referencing entries in the address table cannot be placed in type // units. if (AddrPool.hasBeenUsed()) { // Remove all the types built while building this type. // This is pessimistic as some of these types might not be dependent on // the type that used an address. for (const auto &TU : TypeUnitsToAdd) TypeSignatures.erase(TU.second); // Construct this type in the CU directly. // This is inefficient because all the dependent types will be rebuilt // from scratch, including building them in type units, discovering that // they depend on addresses, throwing them out and rebuilding them. CU.constructTypeDIE(RefDie, cast(CTy)); return; } // If the type wasn't dependent on fission addresses, finish adding the type // and all its dependent types. for (auto &TU : TypeUnitsToAdd) { InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get()); InfoHolder.emitUnit(TU.first.get(), useSplitDwarf()); } } CU.addDIETypeSignature(RefDie, Signature); } // Accelerator table mutators - add each name along with its companion // DIE to the proper table while ensuring that the name that we're going // to reference is in the string table. We do this since the names we // add may not only be identical to the names in the DIE. void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { if (!useDwarfAccelTables()) return; AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { if (!useDwarfAccelTables()) return; AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { if (!useDwarfAccelTables()) return; AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { if (!useDwarfAccelTables()) return; AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } Index: projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h =================================================================== --- projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h (revision 314177) @@ -1,559 +1,561 @@ //===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains support for writing dwarf debug info into asm files. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #include "DbgValueHistoryCalculator.h" #include "DebugHandlerBase.h" #include "DebugLocStream.h" #include "DwarfAccelTable.h" #include "DwarfFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" #include "llvm/Target/TargetOptions.h" #include namespace llvm { class AsmPrinter; class ByteStreamer; class ConstantInt; class ConstantFP; class DebugLocEntry; class DwarfCompileUnit; class DwarfDebug; class DwarfTypeUnit; class DwarfUnit; class MachineModuleInfo; //===----------------------------------------------------------------------===// /// This class is used to track local variable information. /// /// Variables can be created from allocas, in which case they're generated from /// the MMI table. Such variables can have multiple expressions and frame -/// indices. The \a Expr and \a FrameIndices array must match. +/// indices. /// /// Variables can be created from \c DBG_VALUE instructions. Those whose /// location changes over time use \a DebugLocListIndex, while those with a /// single instruction use \a MInsn and (optionally) a single entry of \a Expr. /// /// Variables that have been optimized out use none of these fields. class DbgVariable { const DILocalVariable *Var; /// Variable Descriptor. const DILocation *IA; /// Inlined at location. - SmallVector Expr; /// Complex address. DIE *TheDIE = nullptr; /// Variable DIE. unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. - SmallVector FrameIndex; /// Frame index. + struct FrameIndexExpr { + int FI; + const DIExpression *Expr; + }; + mutable SmallVector + FrameIndexExprs; /// Frame index + expression. + public: /// Construct a DbgVariable. /// /// Creates a variable without any DW_AT_location. Call \a initializeMMI() /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions. DbgVariable(const DILocalVariable *V, const DILocation *IA) : Var(V), IA(IA) {} /// Initialize from the MMI table. void initializeMMI(const DIExpression *E, int FI) { - assert(Expr.empty() && "Already initialized?"); - assert(FrameIndex.empty() && "Already initialized?"); + assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); assert((!E || E->isValid()) && "Expected valid expression"); assert(~FI && "Expected valid index"); - Expr.push_back(E); - FrameIndex.push_back(FI); + FrameIndexExprs.push_back({FI, E}); } /// Initialize from a DBG_VALUE instruction. void initializeDbgValue(const MachineInstr *DbgValue) { - assert(Expr.empty() && "Already initialized?"); - assert(FrameIndex.empty() && "Already initialized?"); + assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); assert(Var == DbgValue->getDebugVariable() && "Wrong variable"); assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at"); MInsn = DbgValue; if (auto *E = DbgValue->getDebugExpression()) if (E->getNumElements()) - Expr.push_back(E); + FrameIndexExprs.push_back({0, E}); } // Accessors. const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } - ArrayRef getExpression() const { return Expr; } const DIExpression *getSingleExpression() const { - assert(MInsn && Expr.size() <= 1); - return Expr.size() ? Expr[0] : nullptr; + assert(MInsn && FrameIndexExprs.size() <= 1); + return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } - ArrayRef getFrameIndex() const { return FrameIndex; } + /// Get the FI entries, sorted by fragment offset. + ArrayRef getFrameIndexExprs() const; + bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } void addMMIEntry(const DbgVariable &V) { assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); assert(V.Var == Var && "conflicting variable"); assert(V.IA == IA && "conflicting inlined-at location"); - assert(!FrameIndex.empty() && "Expected an MMI entry"); - assert(!V.FrameIndex.empty() && "Expected an MMI entry"); - assert(Expr.size() == FrameIndex.size() && "Mismatched expressions"); - assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions"); + assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); + assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); - Expr.append(V.Expr.begin(), V.Expr.end()); - FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end()); - assert(all_of(Expr, [](const DIExpression *E) { - return E && E->isFragment(); - }) && "conflicting locations for variable"); + FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); + assert(all_of(FrameIndexExprs, + [](FrameIndexExpr &FIE) { + return FIE.Expr && FIE.Expr->isFragment(); + }) && + "conflicting locations for variable"); } // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { // FIXME: Why don't we just infer this tag and store it all along? if (Var->isParameter()) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; } /// Return true if DbgVariable is artificial. bool isArtificial() const { if (Var->isArtificial()) return true; if (getType()->isArtificial()) return true; return false; } bool isObjectPointer() const { if (Var->isObjectPointer()) return true; if (getType()->isObjectPointer()) return true; return false; } bool hasComplexAddress() const { assert(MInsn && "Expected DBG_VALUE, not MMI variable"); - assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable"); - assert( - (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) && - "Invalid Expr for DBG_VALUE"); - return !Expr.empty(); + assert((FrameIndexExprs.empty() || + (FrameIndexExprs.size() == 1 && + FrameIndexExprs[0].Expr->getNumElements())) && + "Invalid Expr for DBG_VALUE"); + return !FrameIndexExprs.empty(); } bool isBlockByrefVariable() const; const DIType *getType() const; private: template T *resolve(TypedDINodeRef Ref) const { return Ref.resolve(); } }; /// Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} const MCSymbol *Sym; DwarfCompileUnit *CU; }; /// Collects and handles dwarf debug information. class DwarfDebug : public DebugHandlerBase { /// All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; /// Maps MDNode with its corresponding DwarfCompileUnit. MapVector CUMap; /// Maps a CU DIE with its corresponding DwarfCompileUnit. DenseMap CUDieMap; /// List of all labels used in aranges generation. std::vector ArangeLabels; /// Size of each symbol emitted (for those symbols that have a specific size). DenseMap SymSize; /// Collection of abstract variables. DenseMap> AbstractVariables; SmallVector, 64> ConcreteVariables; /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists /// can refer to them in spite of insertions into this list. DebugLocStream DebugLocs; /// This is a collection of subprogram MDNodes that are processed to /// create DIEs. SetVector, SmallPtrSet> ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn; /// If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; /// As an optimization, there is no need to emit an entry in the directory /// table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; /// Holder for the file specific debug information. DwarfFile InfoHolder; /// Holders for the various debug information flags that we might need to /// have exposed. See accessor functions below for description. /// Map from MDNodes for user-defined types to their type signatures. Also /// used to keep track of which types we have emitted type units for. DenseMap TypeSignatures; SmallVector< std::pair, const DICompositeType *>, 1> TypeUnitsUnderConstruction; /// Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; /// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format). bool UseDWARF2Bitfields; /// Whether to emit all linkage names, or just abstract subprograms. bool UseAllLinkageNames; /// DWARF5 Experimental Options /// @{ bool HasDwarfAccelTables; bool HasAppleExtensionAttributes; bool HasSplitDwarf; /// Separated Dwarf Variables /// In general these will all be for bits that are left in the /// original object file, rather than things that are meant /// to be in the .dwo sections. /// Holder for the skeleton information. DwarfFile SkeletonHolder; /// Store file names for type units under fission in a line table /// header that will be emitted into debug_line.dwo. // FIXME: replace this with a map from comp_dir to table so that we // can emit multiple tables during LTO each of which uses directory // 0, referencing the comp_dir of all the type units that use it. MCDwarfDwoLineTable SplitTypeUnitFileTable; /// @} /// True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; AddressPool AddrPool; DwarfAccelTable AccelNames; DwarfAccelTable AccelObjC; DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; // Identify a debugger for "tuning" the debug info. DebuggerKind DebuggerTuning; /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. /// /// Returns whether we are "tuning" for a given debugger. /// Should be used only within the constructor, to set feature flags. /// @{ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } /// @} MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); const SmallVectorImpl> &getUnits() { return InfoHolder.getUnits(); } typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; /// Find abstract variable associated with Var. DbgVariable *getExistingAbstractVariable(InlinedVariable IV, const DILocalVariable *&Cleansed); DbgVariable *getExistingAbstractVariable(InlinedVariable IV); void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); void ensureAbstractVariableIsCreated(InlinedVariable Var, const MDNode *Scope); void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var, const MDNode *Scope); DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV); /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); void finishVariableDefinitions(); void finishSubprogramDefinitions(); /// Finish off debug information after all functions have been /// processed. void finalizeModuleInfo(); /// Emit the debug info section. void emitDebugInfo(); /// Emit the abbreviation section. void emitAbbreviations(); /// Emit a specified accelerator table. void emitAccel(DwarfAccelTable &Accel, MCSection *Section, StringRef TableName); /// Emit visible names into a hashed accelerator table section. void emitAccelNames(); /// Emit objective C classes and categories into a hashed /// accelerator table section. void emitAccelObjC(); /// Emit namespace dies into a hashed accelerator table. void emitAccelNamespaces(); /// Emit type dies into a hashed accelerator table. void emitAccelTypes(); /// Emit visible names into a debug pubnames section. /// \param GnuStyle determines whether or not we want to emit /// additional information into the table ala newer gcc for gdb /// index. void emitDebugPubNames(bool GnuStyle = false); /// Emit visible types into a debug pubtypes section. /// \param GnuStyle determines whether or not we want to emit /// additional information into the table ala newer gcc for gdb /// index. void emitDebugPubTypes(bool GnuStyle = false); void emitDebugPubSection( bool GnuStyle, MCSection *PSec, StringRef Name, const StringMap &(DwarfCompileUnit::*Accessor)() const); /// Emit null-terminated strings into a debug str section. void emitDebugStr(); /// Emit variable locations into a debug loc section. void emitDebugLoc(); /// Emit variable locations into a debug loc dwo section. void emitDebugLocDWO(); /// Emit address ranges into a debug aranges section. void emitDebugARanges(); /// Emit address ranges into a debug ranges section. void emitDebugRanges(); /// Emit macros into a debug macinfo section. void emitDebugMacinfo(); void emitMacro(DIMacro &M); void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U); void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U); /// DWARF 5 Experimental Split Dwarf Emitters /// Initialize common features of skeleton units. void initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr NewU); /// Construct the split debug info compile unit for the debug info /// section. DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); /// Emit the debug info dwo section. void emitDebugInfoDWO(); /// Emit the debug abbrev dwo section. void emitDebugAbbrevDWO(); /// Emit the debug line dwo section. void emitDebugLineDWO(); /// Emit the debug str dwo section. void emitDebugStrDWO(); /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; /// Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); /// Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, const DIImportedEntity *N); /// Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); /// Populate LexicalScope entries with variables' info. void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet &ProcessedVars); /// Build the location list for all DBG_VALUEs in the /// function that describe the same variable. void buildLocationList(SmallVectorImpl &DebugLoc, const DbgValueHistoryMap::InstrRanges &Ranges); /// Collect variable information from the side table maintained by MF. void collectVariableInfoFromMFTable(DenseSet &P); public: //===--------------------------------------------------------------------===// // Main entry points. // DwarfDebug(AsmPrinter *A, Module *M); ~DwarfDebug() override; /// Emit all Dwarf sections that should come prior to the /// content. void beginModule(); /// Emit all Dwarf sections that should come after the content. void endModule() override; /// Gather pre-function debug information. void beginFunction(const MachineFunction *MF) override; /// Gather and emit post-function debug information. void endFunction(const MachineFunction *MF) override; /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits. static uint64_t makeTypeSignature(StringRef Identifier); /// Add a DIE to the set of types that we're going to pull into /// type units. void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } /// For symbols that have a size designated (e.g. common symbols), /// this tracks that size. void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override { SymSize[Sym] = Size; } /// Returns whether we should emit all DW_AT_[MIPS_]linkage_name. /// If not, we still might emit certain cases. bool useAllLinkageNames() const { return UseAllLinkageNames; } /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the /// standard DW_OP_form_tls_address opcode bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } /// Returns whether to use the DWARF2 format for bitfields instyead of the /// DWARF4 format. bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; } // Experimental DWARF5 features. /// Returns whether or not to emit tables that dwarf consumers can /// use to accelerate lookup. bool useDwarfAccelTables() const { return HasDwarfAccelTables; } bool useAppleExtensionAttributes() const { return HasAppleExtensionAttributes; } /// Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } /// Returns the Dwarf Version. uint16_t getDwarfVersion() const; /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } /// Returns the entries for the .debug_loc section. const DebugLocStream &getDebugLocs() const { return DebugLocs; } /// Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocStream::Entry &Entry); /// Emit the location for a debug loc entry, including the size header. void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry); /// Find the MDNode for the given reference. template T *resolve(TypedDINodeRef Ref) const { return Ref.resolve(); } void addSubprogramNames(const DISubprogram *SP, DIE &Die); AddressPool &getAddressPool() { return AddrPool; } void addAccelName(StringRef Name, const DIE &Die); void addAccelObjC(StringRef Name, const DIE &Die); void addAccelNamespace(StringRef Name, const DIE &Die); void addAccelType(StringRef Name, const DIE &Die, char Flags); const MachineFunction *getCurrentFunction() const { return CurFn; } /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); }; } // End of namespace llvm #endif Index: projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (revision 314177) @@ -1,1752 +1,1754 @@ //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains a pass that performs load / store related peephole // optimizations. This pass should be run after register allocation. // //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-ldst-opt" STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); STATISTIC(NumPostFolded, "Number of post-index updates folded"); STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); // The LdStLimit limits how far we search for load/store pairs. static cl::opt LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); // The UpdateLimit limits how far we search for update instructions when we form // pre-/post-index instructions. static cl::opt UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { typedef struct LdStPairFlags { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. bool MergeForward; // SExtIdx gives the index of the result of the load pair that must be // extended. The value of SExtIdx assumes that the paired load produces the // value in this order: (I, returned iterator), i.e., -1 means no value has // to be extended, 0 means I, and 1 means the returned iterator. int SExtIdx; LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} void setMergeForward(bool V = true) { MergeForward = V; } bool getMergeForward() const { return MergeForward; } void setSExtIdx(int V) { SExtIdx = V; } int getSExtIdx() const { return SExtIdx; } } LdStPairFlags; struct AArch64LoadStoreOpt : public MachineFunctionPass { static char ID; AArch64LoadStoreOpt() : MachineFunctionPass(ID) { initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); } const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; const AArch64Subtarget *Subtarget; // Track which registers have been modified and used. BitVector ModifiedRegs, UsedRegs; // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. // Return the matching instruction if one is found, else MBB->end(). MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, unsigned Limit, bool FindNarrowMerge); // Scan the instructions looking for a store that writes to the address from // which the current load instruction reads. Return true if one is found. bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, MachineBasicBlock::iterator &StoreI); // Merge the two instructions indicated into a wider narrow store instruction. MachineBasicBlock::iterator mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, const LdStPairFlags &Flags); // Merge the two instructions indicated into a single pair-wise instruction. MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags); // Promote the load that reads directly from the address stored to. MachineBasicBlock::iterator promoteLoadFromStore(MachineBasicBlock::iterator LoadI, MachineBasicBlock::iterator StoreI); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan forwards. MachineBasicBlock::iterator findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan backwards. MachineBasicBlock::iterator findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); // Find an instruction that updates the base register of the ld/st // instruction. bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); // Find and merge zero store instructions. bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); // Find and pair ldr/str instructions. bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); // Find and promote load instructions which read directly from store. bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; } }; char AArch64LoadStoreOpt::ID = 0; } // namespace INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc) { switch (Opc) { default: return false; case AArch64::STRBBui: case AArch64::STURBBi: case AArch64::STRHHui: case AArch64::STURHHi: return true; } } // Scaling factor for unscaled load or store. static int getMemScale(MachineInstr &MI) { switch (MI.getOpcode()) { default: llvm_unreachable("Opcode has unknown scale!"); case AArch64::LDRBBui: case AArch64::LDURBBi: case AArch64::LDRSBWui: case AArch64::LDURSBWi: case AArch64::STRBBui: case AArch64::STURBBi: return 1; case AArch64::LDRHHui: case AArch64::LDURHHi: case AArch64::LDRSHWui: case AArch64::LDURSHWi: case AArch64::STRHHui: case AArch64::STURHHi: return 2; case AArch64::LDRSui: case AArch64::LDURSi: case AArch64::LDRSWui: case AArch64::LDURSWi: case AArch64::LDRWui: case AArch64::LDURWi: case AArch64::STRSui: case AArch64::STURSi: case AArch64::STRWui: case AArch64::STURWi: case AArch64::LDPSi: case AArch64::LDPSWi: case AArch64::LDPWi: case AArch64::STPSi: case AArch64::STPWi: return 4; case AArch64::LDRDui: case AArch64::LDURDi: case AArch64::LDRXui: case AArch64::LDURXi: case AArch64::STRDui: case AArch64::STURDi: case AArch64::STRXui: case AArch64::STURXi: case AArch64::LDPDi: case AArch64::LDPXi: case AArch64::STPDi: case AArch64::STPXi: return 8; case AArch64::LDRQui: case AArch64::LDURQi: case AArch64::STRQui: case AArch64::STURQi: case AArch64::LDPQi: case AArch64::STPQi: return 16; } } static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc = nullptr) { if (IsValidLdStrOpc) *IsValidLdStrOpc = true; switch (Opc) { default: if (IsValidLdStrOpc) *IsValidLdStrOpc = false; return UINT_MAX; case AArch64::STRDui: case AArch64::STURDi: case AArch64::STRQui: case AArch64::STURQi: case AArch64::STRBBui: case AArch64::STURBBi: case AArch64::STRHHui: case AArch64::STURHHi: case AArch64::STRWui: case AArch64::STURWi: case AArch64::STRXui: case AArch64::STURXi: case AArch64::LDRDui: case AArch64::LDURDi: case AArch64::LDRQui: case AArch64::LDURQi: case AArch64::LDRWui: case AArch64::LDURWi: case AArch64::LDRXui: case AArch64::LDURXi: case AArch64::STRSui: case AArch64::STURSi: case AArch64::LDRSui: case AArch64::LDURSi: return Opc; case AArch64::LDRSWui: return AArch64::LDRWui; case AArch64::LDURSWi: return AArch64::LDURWi; } } static unsigned getMatchingWideOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no wide equivalent!"); case AArch64::STRBBui: return AArch64::STRHHui; case AArch64::STRHHui: return AArch64::STRWui; case AArch64::STURBBi: return AArch64::STURHHi; case AArch64::STURHHi: return AArch64::STURWi; case AArch64::STURWi: return AArch64::STURXi; case AArch64::STRWui: return AArch64::STRXui; } } static unsigned getMatchingPairOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no pairwise equivalent!"); case AArch64::STRSui: case AArch64::STURSi: return AArch64::STPSi; case AArch64::STRDui: case AArch64::STURDi: return AArch64::STPDi; case AArch64::STRQui: case AArch64::STURQi: return AArch64::STPQi; case AArch64::STRWui: case AArch64::STURWi: return AArch64::STPWi; case AArch64::STRXui: case AArch64::STURXi: return AArch64::STPXi; case AArch64::LDRSui: case AArch64::LDURSi: return AArch64::LDPSi; case AArch64::LDRDui: case AArch64::LDURDi: return AArch64::LDPDi; case AArch64::LDRQui: case AArch64::LDURQi: return AArch64::LDPQi; case AArch64::LDRWui: case AArch64::LDURWi: return AArch64::LDPWi; case AArch64::LDRXui: case AArch64::LDURXi: return AArch64::LDPXi; case AArch64::LDRSWui: case AArch64::LDURSWi: return AArch64::LDPSWi; } } static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst) { unsigned LdOpc = LoadInst.getOpcode(); unsigned StOpc = StoreInst.getOpcode(); switch (LdOpc) { default: llvm_unreachable("Unsupported load instruction!"); case AArch64::LDRBBui: return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; case AArch64::LDURBBi: return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; case AArch64::LDRHHui: return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; case AArch64::LDURHHi: return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; case AArch64::LDRWui: return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; case AArch64::LDURWi: return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; case AArch64::LDRXui: return StOpc == AArch64::STRXui; case AArch64::LDURXi: return StOpc == AArch64::STURXi; } } static unsigned getPreIndexedOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); case AArch64::STRSui: return AArch64::STRSpre; case AArch64::STRDui: return AArch64::STRDpre; case AArch64::STRQui: return AArch64::STRQpre; case AArch64::STRBBui: return AArch64::STRBBpre; case AArch64::STRHHui: return AArch64::STRHHpre; case AArch64::STRWui: return AArch64::STRWpre; case AArch64::STRXui: return AArch64::STRXpre; case AArch64::LDRSui: return AArch64::LDRSpre; case AArch64::LDRDui: return AArch64::LDRDpre; case AArch64::LDRQui: return AArch64::LDRQpre; case AArch64::LDRBBui: return AArch64::LDRBBpre; case AArch64::LDRHHui: return AArch64::LDRHHpre; case AArch64::LDRWui: return AArch64::LDRWpre; case AArch64::LDRXui: return AArch64::LDRXpre; case AArch64::LDRSWui: return AArch64::LDRSWpre; case AArch64::LDPSi: return AArch64::LDPSpre; case AArch64::LDPSWi: return AArch64::LDPSWpre; case AArch64::LDPDi: return AArch64::LDPDpre; case AArch64::LDPQi: return AArch64::LDPQpre; case AArch64::LDPWi: return AArch64::LDPWpre; case AArch64::LDPXi: return AArch64::LDPXpre; case AArch64::STPSi: return AArch64::STPSpre; case AArch64::STPDi: return AArch64::STPDpre; case AArch64::STPQi: return AArch64::STPQpre; case AArch64::STPWi: return AArch64::STPWpre; case AArch64::STPXi: return AArch64::STPXpre; } } static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); case AArch64::STRSui: return AArch64::STRSpost; case AArch64::STRDui: return AArch64::STRDpost; case AArch64::STRQui: return AArch64::STRQpost; case AArch64::STRBBui: return AArch64::STRBBpost; case AArch64::STRHHui: return AArch64::STRHHpost; case AArch64::STRWui: return AArch64::STRWpost; case AArch64::STRXui: return AArch64::STRXpost; case AArch64::LDRSui: return AArch64::LDRSpost; case AArch64::LDRDui: return AArch64::LDRDpost; case AArch64::LDRQui: return AArch64::LDRQpost; case AArch64::LDRBBui: return AArch64::LDRBBpost; case AArch64::LDRHHui: return AArch64::LDRHHpost; case AArch64::LDRWui: return AArch64::LDRWpost; case AArch64::LDRXui: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; case AArch64::LDPSi: return AArch64::LDPSpost; case AArch64::LDPSWi: return AArch64::LDPSWpost; case AArch64::LDPDi: return AArch64::LDPDpost; case AArch64::LDPQi: return AArch64::LDPQpost; case AArch64::LDPWi: return AArch64::LDPWpost; case AArch64::LDPXi: return AArch64::LDPXpost; case AArch64::STPSi: return AArch64::STPSpost; case AArch64::STPDi: return AArch64::STPDpost; case AArch64::STPQi: return AArch64::STPQpost; case AArch64::STPWi: return AArch64::STPWpost; case AArch64::STPXi: return AArch64::STPXpost; } } static bool isPairedLdSt(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return false; case AArch64::LDPSi: case AArch64::LDPSWi: case AArch64::LDPDi: case AArch64::LDPQi: case AArch64::LDPWi: case AArch64::LDPXi: case AArch64::STPSi: case AArch64::STPDi: case AArch64::STPQi: case AArch64::STPWi: case AArch64::STPXi: return true; } } static const MachineOperand &getLdStRegOp(const MachineInstr &MI, unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; return MI.getOperand(Idx); } static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) { unsigned Idx = isPairedLdSt(MI) ? 2 : 1; return MI.getOperand(Idx); } static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) { unsigned Idx = isPairedLdSt(MI) ? 3 : 2; return MI.getOperand(Idx); } static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII) { assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); int LoadSize = getMemScale(LoadInst); int StoreSize = getMemScale(StoreInst); int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst) ? getLdStOffsetOp(StoreInst).getImm() : getLdStOffsetOp(StoreInst).getImm() * StoreSize; int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst) ? getLdStOffsetOp(LoadInst).getImm() : getLdStOffsetOp(LoadInst).getImm() * LoadSize; return (UnscaledStOffset <= UnscaledLdOffset) && (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); } static bool isPromotableZeroStoreInst(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); return (Opc == AArch64::STRWui || Opc == AArch64::STURWi || isNarrowStore(Opc)) && getLdStRegOp(MI).getReg() == AArch64::WZR; } MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, const LdStPairFlags &Flags) { assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && "Expected promotable zero stores."); MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == MergeMI) ++NextI; unsigned Opc = I->getOpcode(); bool IsScaled = !TII->isUnscaledLdSt(Opc); int OffsetStride = IsScaled ? 1 : getMemScale(*I); bool MergeForward = Flags.getMergeForward(); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I; // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. const MachineOperand &BaseRegOp = MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI; if (getLdStOffsetOp(*I).getImm() == getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) RtMI = &*MergeMI; else RtMI = &*I; int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); // Change the scaled offset from small to large type. if (IsScaled) { assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); OffsetImm /= 2; } // Construct the new instruction. DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); MachineInstrBuilder MIB; MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) .addOperand(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*MergeMI)); (void)MIB; DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(MergeMI->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions. I->eraseFromParent(); MergeMI->eraseFromParent(); return NextI; } MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) ++NextI; int SExtIdx = Flags.getSExtIdx(); unsigned Opc = SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); bool IsUnscaled = TII->isUnscaledLdSt(Opc); int OffsetStride = IsUnscaled ? getMemScale(*I) : 1; bool MergeForward = Flags.getMergeForward(); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. const MachineOperand &BaseRegOp = MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I); int Offset = getLdStOffsetOp(*I).getImm(); int PairedOffset = getLdStOffsetOp(*Paired).getImm(); bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode()); if (IsUnscaled != PairedIsUnscaled) { // We're trying to pair instructions that differ in how they are scaled. If // I is scaled then scale the offset of Paired accordingly. Otherwise, do // the opposite (i.e., make Paired's offset unscaled). int MemSize = getMemScale(*Paired); if (PairedIsUnscaled) { // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together. assert(!(PairedOffset % getMemScale(*Paired)) && "Offset should be a multiple of the stride!"); PairedOffset /= MemSize; } else { PairedOffset *= MemSize; } } // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; if (Offset == PairedOffset + OffsetStride) { RtMI = &*Paired; Rt2MI = &*I; // Here we swapped the assumption made for SExtIdx. // I.e., we turn ldp I, Paired into ldp Paired, I. // Update the index accordingly. if (SExtIdx != -1) SExtIdx = (SExtIdx + 1) % 2; } else { RtMI = &*I; Rt2MI = &*Paired; } int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); // Scale the immediate offset, if necessary. if (TII->isUnscaledLdSt(RtMI->getOpcode())) { assert(!(OffsetImm % getMemScale(*RtMI)) && "Unscaled offset cannot be scaled."); OffsetImm /= getMemScale(*RtMI); } // Construct the new instruction. MachineInstrBuilder MIB; DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); MachineOperand RegOp0 = getLdStRegOp(*RtMI); MachineOperand RegOp1 = getLdStRegOp(*Rt2MI); // Kill flags may become invalid when moving stores for pairing. if (RegOp0.isUse()) { if (!MergeForward) { // Clear kill flags on store if moving upwards. Example: // STRWui %w0, ... // USE %w1 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards RegOp0.setIsKill(false); RegOp1.setIsKill(false); } else { // Clear kill flags of the first stores register. Example: // STRWui %w1, ... // USE kill %w1 ; need to clear kill flag when moving STRWui downwards // STRW %w0 unsigned Reg = getLdStRegOp(*I).getReg(); for (MachineInstr &MI : make_range(std::next(I), Paired)) MI.clearRegisterKills(Reg, TRI); } } MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc))) .addOperand(RegOp0) .addOperand(RegOp1) .addOperand(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*Paired)); (void)MIB; DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); if (SExtIdx != -1) { // Generate the sign extension for the proper result of the ldp. // I.e., with X1, that would be: // %W1 = KILL %W1, %X1 // %X1 = SBFMXri %X1, 0, 31 MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. unsigned DstRegX = DstMO.getReg(); // Get the W variant of that register. unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); // Update the result of LDP to use the W instead of the X variant. DstMO.setReg(DstRegW); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Make the machine verifier happy by providing a definition for // the X register. // Insert this definition right after the generated LDP, i.e., before // InsertionPoint. MachineInstrBuilder MIBKill = BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW) .addReg(DstRegW) .addReg(DstRegX, RegState::Define); MIBKill->getOperand(2).setImplicit(); // Create the sign extension. MachineInstrBuilder MIBSXTW = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX) .addReg(DstRegX) .addImm(0) .addImm(31); (void)MIBSXTW; DEBUG(dbgs() << " Extend operand:\n "); DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); } else { DEBUG(((MachineInstr *)MIB)->print(dbgs())); } DEBUG(dbgs() << "\n"); // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); return NextI; } MachineBasicBlock::iterator AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, MachineBasicBlock::iterator StoreI) { MachineBasicBlock::iterator NextI = LoadI; ++NextI; int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); unsigned LdRt = getLdStRegOp(*LoadI).getReg(); unsigned StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); assert((IsStoreXReg || TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && "Unexpected RegClass"); MachineInstr *BitExtMI; if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) { // Remove the load, if the destination register of the loads is the same // register for stored value. if (StRt == LdRt && LoadSize == 8) { StoreI->clearRegisterKills(StRt, TRI); DEBUG(dbgs() << "Remove load instruction:\n "); DEBUG(LoadI->print(dbgs())); DEBUG(dbgs() << "\n"); LoadI->eraseFromParent(); return NextI; } // Replace the load with a mov if the load and store are in the same size. BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) .addReg(StRt) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // FIXME: Currently we disable this transformation in big-endian targets as // performance and correctness are verified only in little-endian. if (!Subtarget->isLittleEndian()) return NextI; bool IsUnscaled = TII->isUnscaledLdSt(*LoadI); assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) && "Unsupported ld/st match"); assert(LoadSize <= StoreSize && "Invalid load size"); int UnscaledLdOffset = IsUnscaled ? getLdStOffsetOp(*LoadI).getImm() : getLdStOffsetOp(*LoadI).getImm() * LoadSize; int UnscaledStOffset = IsUnscaled ? getLdStOffsetOp(*StoreI).getImm() : getLdStOffsetOp(*StoreI).getImm() * StoreSize; int Width = LoadSize * 8; int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); int Imms = Immr + Width - 1; unsigned DestReg = IsStoreXReg ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, &AArch64::GPR64RegClass) : LdRt; assert((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && "Invalid offset"); Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); Imms = Immr + Width - 1; if (UnscaledLdOffset == UnscaledStOffset) { uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N | ((Immr) << 6) // immr | ((Imms) << 0) // imms ; BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) .addReg(StRt) .addImm(AndMaskEncoded); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), DestReg) .addReg(StRt) .addImm(Immr) .addImm(Imms); } } - StoreI->clearRegisterKills(StRt, TRI); - (void)BitExtMI; + // Clear kill flags between store and load. + for (MachineInstr &MI : make_range(StoreI->getIterator(), + BitExtMI->getIterator())) + MI.clearRegisterKills(StRt, TRI); DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(LoadI->print(dbgs())); DEBUG(dbgs() << " with instructions:\n "); DEBUG(StoreI->print(dbgs())); DEBUG(dbgs() << " "); DEBUG((BitExtMI)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions. LoadI->eraseFromParent(); return NextI; } /// trackRegDefsUses - Remember what registers the specified instruction uses /// and modifies. static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, BitVector &UsedRegs, const TargetRegisterInfo *TRI) { for (const MachineOperand &MO : MI.operands()) { if (MO.isRegMask()) ModifiedRegs.setBitsNotInMask(MO.getRegMask()); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { // WZR/XZR are not modified even when used as a destination register. if (Reg != AArch64::WZR && Reg != AArch64::XZR) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ModifiedRegs.set(*AI); } else { assert(MO.isUse() && "Reg operand not a def and not a use?!?"); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) UsedRegs.set(*AI); } } } static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { // Convert the byte-offset used by unscaled into an "element" offset used // by the scaled pair load/store instructions. if (IsUnscaled) { // If the byte-offset isn't a multiple of the stride, there's no point // trying to match it. if (Offset % OffsetStride) return false; Offset /= OffsetStride; } return Offset <= 63 && Offset >= -64; } // Do alignment, specialized to power of 2 and for signed ints, // avoiding having to do a C-style cast from uint_64t to int when // using alignTo from include/llvm/Support/MathExtras.h. // FIXME: Move this function to include/MathExtras.h? static int alignTo(int Num, int PowOf2) { return (Num + PowOf2 - 1) & ~(PowOf2 - 1); } static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, const AArch64InstrInfo *TII) { // One of the instructions must modify memory. if (!MIa.mayStore() && !MIb.mayStore()) return false; // Both instructions must be memory operations. if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore()) return false; return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); } static bool mayAlias(MachineInstr &MIa, SmallVectorImpl &MemInsns, const AArch64InstrInfo *TII) { for (MachineInstr *MIb : MemInsns) if (mayAlias(MIa, *MIb, TII)) return true; return false; } bool AArch64LoadStoreOpt::findMatchingStore( MachineBasicBlock::iterator I, unsigned Limit, MachineBasicBlock::iterator &StoreI) { MachineBasicBlock::iterator B = I->getParent()->begin(); MachineBasicBlock::iterator MBBI = I; MachineInstr &LoadMI = *I; unsigned BaseReg = getLdStBaseOp(LoadMI).getReg(); // If the load is the first instruction in the block, there's obviously // not any matching store. if (MBBI == B) return false; // Track which registers have been modified and used between the first insn // and the second insn. ModifiedRegs.reset(); UsedRegs.reset(); unsigned Count = 0; do { --MBBI; MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) ++Count; // If the load instruction reads directly from the address to which the // store instruction writes and the stored value is not modified, we can // promote the load. Since we do not handle stores with pre-/post-index, // it's unnecessary to check if BaseReg is modified by the store itself. if (MI.mayStore() && isMatchingStore(LoadMI, MI) && BaseReg == getLdStBaseOp(MI).getReg() && isLdOffsetInRangeOfSt(LoadMI, MI, TII) && !ModifiedRegs[getLdStRegOp(MI).getReg()]) { StoreI = MBBI; return true; } if (MI.isCall()) return false; // Update modified / uses register lists. trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. if (ModifiedRegs[BaseReg]) return false; // If we encounter a store aliased with the load, return early. if (MI.mayStore() && mayAlias(LoadMI, MI, TII)) return false; } while (MBBI != B && Count < Limit); return false; } // Returns true if FirstMI and MI are candidates for merging or pairing. // Otherwise, returns false. static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII) { // If this is volatile or if pairing is suppressed, not a candidate. if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) return false; // We should have already checked FirstMI for pair suppression and volatility. assert(!FirstMI.hasOrderedMemoryRef() && !TII->isLdStPairSuppressed(FirstMI) && "FirstMI shouldn't get here if either of these checks are true."); unsigned OpcA = FirstMI.getOpcode(); unsigned OpcB = MI.getOpcode(); // Opcodes match: nothing more to check. if (OpcA == OpcB) return true; // Try to match a sign-extended load/store with a zero-extended load/store. bool IsValidLdStrOpc, PairIsValidLdStrOpc; unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc); assert(IsValidLdStrOpc && "Given Opc should be a Load or Store with an immediate"); // OpcA will be the first instruction in the pair. if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) { Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0); return true; } // If the second instruction isn't even a mergable/pairable load/store, bail // out. if (!PairIsValidLdStrOpc) return false; // FIXME: We don't support merging narrow stores with mixed scaled/unscaled // offsets. if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) return false; // Try to match an unscaled load/store with a scaled load/store. return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) && getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB); // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? } /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LdStPairFlags &Flags, unsigned Limit, bool FindNarrowMerge) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr &FirstMI = *I; ++MBBI; bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); unsigned Reg = getLdStRegOp(FirstMI).getReg(); unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. ModifiedRegs.reset(); UsedRegs.reset(); // Remember any instructions that read/write memory between FirstMI and MI. SmallVector MemInsns; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) ++Count; Flags.setSExtIdx(-1); if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) && getLdStOffsetOp(MI).isImm()) { assert(MI.mayLoadOrStore() && "Expected memory operation."); // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); int MIOffset = getLdStOffsetOp(MI).getImm(); bool MIIsUnscaled = TII->isUnscaledLdSt(MI); if (IsUnscaled != MIIsUnscaled) { // We're trying to pair instructions that differ in how they are scaled. // If FirstMI is scaled then scale the offset of MI accordingly. // Otherwise, do the opposite (i.e., make MI's offset unscaled). int MemSize = getMemScale(MI); if (MIIsUnscaled) { // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together: bail and keep looking. if (MIOffset % MemSize) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } MIOffset /= MemSize; } else { MIOffset *= MemSize; } } if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || (Offset + OffsetStride == MIOffset))) { int MinOffset = Offset < MIOffset ? Offset : MIOffset; if (FindNarrowMerge) { // If the alignment requirements of the scaled wide load/store // instruction can't express the offset of the scaled narrow input, // bail and keep looking. For promotable zero stores, allow only when // the stored value is the same (i.e., WZR). if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) || (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } } else { // Pairwise instructions have a 7-bit signed offset field. Single // insns have a 12-bit unsigned offset field. If the resultant // immediate offset of merging these instructions is out of range for // a pairwise instruction, bail and keep looking. if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } // If the alignment requirements of the paired (scaled) instruction // can't express the offset of the unscaled input, bail and keep // looking. if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } } // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } // If the Rt of the second instruction was not modified or used between // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && !(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && !mayAlias(MI, MemInsns, TII)) { Flags.setMergeForward(false); return MBBI; } // Likewise, if the Rt of the first instruction is not modified or used // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && !mayAlias(FirstMI, MemInsns, TII)) { Flags.setMergeForward(true); return MBBI; } // Unable to combine these instructions due to interference in between. // Keep looking. } } // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI.isCall()) return E; // Update modified / uses register lists. trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. if (ModifiedRegs[BaseReg]) return E; // Update list of instructions that read/write memory. if (MI.mayLoadOrStore()) MemInsns.push_back(&MI); } return E; } MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx) { assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); MachineBasicBlock::iterator NextI = I; // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. if (++NextI == Update) ++NextI; int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) : getPostIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB; if (!isPairedLdSt(*I)) { // Non-paired instruction. MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(*Update)) .addOperand(getLdStRegOp(*I)) .addOperand(getLdStBaseOp(*I)) .addImm(Value) .setMemRefs(I->memoperands_begin(), I->memoperands_end()); } else { // Paired instruction. int Scale = getMemScale(*I); MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(*Update)) .addOperand(getLdStRegOp(*I, 0)) .addOperand(getLdStRegOp(*I, 1)) .addOperand(getLdStBaseOp(*I)) .addImm(Value / Scale) .setMemRefs(I->memoperands_begin(), I->memoperands_end()); } (void)MIB; if (IsPreIdx) DEBUG(dbgs() << "Creating pre-indexed load/store."); else DEBUG(dbgs() << "Creating post-indexed load/store."); DEBUG(dbgs() << " Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Update->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions for the block. I->eraseFromParent(); Update->eraseFromParent(); return NextI; } bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { switch (MI.getOpcode()) { default: break; case AArch64::SUBXri: case AArch64::ADDXri: // Make sure it's a vanilla immediate operand, not a relocation or // anything else we can't handle. if (!MI.getOperand(2).isImm()) break; // Watch out for 1 << 12 shifted value. if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm())) break; // The update instruction source and destination register must be the // same as the load/store base register. if (MI.getOperand(0).getReg() != BaseReg || MI.getOperand(1).getReg() != BaseReg) break; bool IsPairedInsn = isPairedLdSt(MemMI); int UpdateOffset = MI.getOperand(2).getImm(); if (MI.getOpcode() == AArch64::SUBXri) UpdateOffset = -UpdateOffset; // For non-paired load/store instructions, the immediate must fit in a // signed 9-bit integer. if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) break; // For paired load/store instructions, the immediate must be a multiple of // the scaling factor. The scaled offset must also fit into a signed 7-bit // integer. if (IsPairedInsn) { int Scale = getMemScale(MemMI); if (UpdateOffset % Scale != 0) break; int ScaledOffset = UpdateOffset / Scale; if (ScaledOffset > 63 || ScaledOffset < -64) break; } // If we have a non-zero Offset, we check that it matches the amount // we're adding to the register. if (!Offset || Offset == UpdateOffset) return true; break; } return false; } MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); // Scan forward looking for post-index opportunities. Updating instructions // can't be formed if the memory instruction doesn't have the offset we're // looking for. if (MIUnscaledOffset != UnscaledOffset) return E; // If the base register overlaps a destination register, we can't // merge the update. bool IsPairedInsn = isPairedLdSt(MemMI); for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) return E; } // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. ModifiedRegs.reset(); UsedRegs.reset(); ++MBBI; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) ++Count; // If we found a match, return it. if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) return MBBI; // Update the status of what the instruction clobbered and used. trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is used or modified, we have no match, so // return early. if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) return E; } return E; } MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( MachineBasicBlock::iterator I, unsigned Limit) { MachineBasicBlock::iterator B = I->getParent()->begin(); MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); int Offset = getLdStOffsetOp(MemMI).getImm(); // If the load/store is the first instruction in the block, there's obviously // not any matching update. Ditto if the memory offset isn't zero. if (MBBI == B || Offset != 0) return E; // If the base register overlaps a destination register, we can't // merge the update. bool IsPairedInsn = isPairedLdSt(MemMI); for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) return E; } // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. ModifiedRegs.reset(); UsedRegs.reset(); unsigned Count = 0; do { --MBBI; MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) ++Count; // If we found a match, return it. if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) return MBBI; // Update the status of what the instruction clobbered and used. trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is used or modified, we have no match, so // return early. if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) return E; } while (MBBI != B && Count < Limit); return E; } bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; // If this is a volatile load, don't mess with it. if (MI.hasOrderedMemoryRef()) return false; // Make sure this is a reg+imm. // FIXME: It is possible to extend it to handle reg+reg cases. if (!getLdStOffsetOp(MI).isImm()) return false; // Look backward up to LdStLimit instructions. MachineBasicBlock::iterator StoreI; if (findMatchingStore(MBBI, LdStLimit, StoreI)) { ++NumLoadsFromStoresPromoted; // Promote the load. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. MBBI = promoteLoadFromStore(MBBI, StoreI); return true; } return false; } // Merge adjacent zero stores into a wider store. bool AArch64LoadStoreOpt::tryToMergeZeroStInst( MachineBasicBlock::iterator &MBBI) { assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store."); MachineInstr &MI = *MBBI; MachineBasicBlock::iterator E = MI.getParent()->end(); if (!TII->isCandidateToMergeOrPair(MI)) return false; // Look ahead up to LdStLimit instructions for a mergable instruction. LdStPairFlags Flags; MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true); if (MergeMI != E) { ++NumZeroStoresPromoted; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags); return true; } return false; } // Find loads and stores that can be merged into a single load or store pair // instruction. bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator E = MI.getParent()->end(); if (!TII->isCandidateToMergeOrPair(MI)) return false; // Early exit if the offset is not possible to match. (6 bits of positive // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) bool IsUnscaled = TII->isUnscaledLdSt(MI); int Offset = getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; // Allow one more for offset. if (Offset > 0) Offset -= OffsetStride; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return false; // Look ahead up to LdStLimit instructions for a pairable instruction. LdStPairFlags Flags; MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false); if (Paired != E) { ++NumPairCreated; if (TII->isUnscaledLdSt(MI)) ++NumUnscaledPairCreated; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. MBBI = mergePairedInsns(MBBI, Paired, Flags); return true; } return false; } bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { bool Modified = false; // Four tranformations to do here: // 1) Find loads that directly read from stores and promote them by // replacing with mov instructions. If the store is wider than the load, // the load will be replaced with a bitfield extract. // e.g., // str w1, [x0, #4] // ldrh w2, [x0, #6] // ; becomes // str w1, [x0, #4] // lsr w2, w1, #16 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { MachineInstr &MI = *MBBI; switch (MI.getOpcode()) { default: // Just move on to the next instruction. ++MBBI; break; // Scaled instructions. case AArch64::LDRBBui: case AArch64::LDRHHui: case AArch64::LDRWui: case AArch64::LDRXui: // Unscaled instructions. case AArch64::LDURBBi: case AArch64::LDURHHi: case AArch64::LDURWi: case AArch64::LDURXi: { if (tryToPromoteLoadFromStore(MBBI)) { Modified = true; break; } ++MBBI; break; } } } // 2) Merge adjacent zero stores into a wider store. // e.g., // strh wzr, [x0] // strh wzr, [x0, #2] // ; becomes // str wzr, [x0] // e.g., // str wzr, [x0] // str wzr, [x0, #4] // ; becomes // str xzr, [x0] for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); EnableNarrowZeroStOpt && MBBI != E;) { if (isPromotableZeroStoreInst(*MBBI)) { if (tryToMergeZeroStInst(MBBI)) { Modified = true; } else ++MBBI; } else ++MBBI; } // 3) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) Modified = true; else ++MBBI; } // 4) Find base register updates that can be merged into the load or store // as a base-reg writeback. // e.g., // ldr x0, [x2] // add x2, x2, #4 // ; becomes // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { MachineInstr &MI = *MBBI; // Do update merging. It's simpler to keep this separate from the above // switchs, though not strictly necessary. unsigned Opc = MI.getOpcode(); switch (Opc) { default: // Just move on to the next instruction. ++MBBI; break; // Scaled instructions. case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: case AArch64::STRXui: case AArch64::STRWui: case AArch64::STRHHui: case AArch64::STRBBui: case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: case AArch64::LDRHHui: case AArch64::LDRBBui: // Unscaled instructions. case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: case AArch64::LDURXi: // Paired instructions. case AArch64::LDPSi: case AArch64::LDPSWi: case AArch64::LDPDi: case AArch64::LDPQi: case AArch64::LDPWi: case AArch64::LDPXi: case AArch64::STPSi: case AArch64::STPDi: case AArch64::STPQi: case AArch64::STPWi: case AArch64::STPXi: { // Make sure this is a reg+imm (as opposed to an address reloc). if (!getLdStOffsetOp(MI).isImm()) { ++MBBI; break; } // Look forward to try to form a post-index instruction. For example, // ldr x0, [x20] // add x20, x20, #32 // merged into: // ldr x0, [x20], #32 MachineBasicBlock::iterator Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); if (Update != E) { // Merge the update into the ld/st. MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); Modified = true; ++NumPostFolded; break; } // Don't know how to handle pre/post-index versions, so move to the next // instruction. if (TII->isUnscaledLdSt(Opc)) { ++MBBI; break; } // Look back to try to find a pre-index instruction. For example, // add x0, x0, #8 // ldr x1, [x0] // merged into: // ldr x1, [x0, #8]! Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); if (Update != E) { // Merge the update into the ld/st. MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); Modified = true; ++NumPreFolded; break; } // The immediate in the load/store is scaled by the size of the memory // operation. The immediate in the add we're looking for, // however, is not, so adjust here. int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); // Look forward to try to find a post-index instruction. For example, // ldr x1, [x0, #64] // add x0, x0, #64 // merged into: // ldr x1, [x0, #64]! Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); if (Update != E) { // Merge the update into the ld/st. MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); Modified = true; ++NumPreFolded; break; } // Nothing found. Just move to the next instruction. ++MBBI; break; } } } return Modified; } bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(*Fn.getFunction())) return false; Subtarget = &static_cast(Fn.getSubtarget()); TII = static_cast(Subtarget->getInstrInfo()); TRI = Subtarget->getRegisterInfo(); // Resize the modified and used register bitfield trackers. We do this once // per function and then clear the bitfield each time we optimize a load or // store. ModifiedRegs.resize(TRI->getNumRegs()); UsedRegs.resize(TRI->getNumRegs()); bool Modified = false; bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); for (auto &MBB : Fn) Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); return Modified; } // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and // stores near one another? Note: The pre-RA instruction scheduler already has // hooks to try and schedule pairable loads/stores together to improve pairing // opportunities. Thus, pre-RA pairing pass may not be worth the effort. // FIXME: When pairing store instructions it's very possible for this pass to // hoist a store with a KILL marker above another use (without a KILL marker). // The resulting IR is invalid, but nothing uses the KILL markers after this // pass, so it's never caused a problem in practice. /// createAArch64LoadStoreOptimizationPass - returns an instance of the /// load / store optimization pass. FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { return new AArch64LoadStoreOpt(); } Index: projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (revision 314177) @@ -1,1686 +1,1706 @@ //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains a pass that expands pseudo instructions into target // instructions to allow proper scheduling, if-conversion, and other late // optimizations. This pass should be run after register allocation but before // the post-regalloc scheduling pass. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "arm-pseudo" static cl::opt VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, cl::desc("Verify machine code after expanding ARM pseudos")); namespace { class ARMExpandPseudo : public MachineFunctionPass { public: static char ID; ARMExpandPseudo() : MachineFunctionPass(ID) {} const ARMBaseInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } StringRef getPassName() const override { return "ARM pseudo instruction expansion pass"; } private: void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool ExpandMBB(MachineBasicBlock &MBB); void ExpandVLD(MachineBasicBlock::iterator &MBBI); void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); bool ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, unsigned UxtOp, MachineBasicBlock::iterator &NextMBBI); bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); }; char ARMExpandPseudo::ID = 0; } /// TransferImpOps - Transfer implicit operands on the pseudo instruction to /// the instructions created from the expansion. void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI) { const MCInstrDesc &Desc = OldMI.getDesc(); for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = OldMI.getOperand(i); assert(MO.isReg() && MO.getReg()); if (MO.isUse()) UseMI.addOperand(MO); else DefMI.addOperand(MO); } } namespace { // Constants for register spacing in NEON load/store instructions. // For quad-register load-lane and store-lane pseudo instructors, the // spacing is initially assumed to be EvenDblSpc, and that is changed to // OddDblSpc depending on the lane number operand. enum NEONRegSpacing { SingleSpc, EvenDblSpc, OddDblSpc }; // Entries for NEON load/store information table. The table is sorted by // PseudoOpc for fast binary-search lookups. struct NEONLdStTableEntry { uint16_t PseudoOpc; uint16_t RealOpc; bool IsLoad; bool isUpdating; bool hasWritebackOperand; uint8_t RegSpacing; // One of type NEONRegSpacing uint8_t NumRegs; // D registers loaded or stored uint8_t RegElts; // elements per D register; used for lane ops // FIXME: Temporary flag to denote whether the real instruction takes // a single register (like the encoding) or all of the registers in // the list (like the asm syntax and the isel DAG). When all definitions // are converted to take only the single encoded register, this will // go away. bool copyAllListRegs; // Comparison methods for binary search of the table. bool operator<(const NEONLdStTableEntry &TE) const { return PseudoOpc < TE.PseudoOpc; } friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { return TE.PseudoOpc < PseudoOpc; } friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, const NEONLdStTableEntry &TE) { return PseudoOpc < TE.PseudoOpc; } }; } static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} }; /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON /// load or store pseudo instruction. static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { #ifndef NDEBUG // Make sure the table is sorted. static bool TableChecked = false; if (!TableChecked) { assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) && "NEONLdStTable is not sorted!"); TableChecked = true; } #endif auto I = std::lower_bound(std::begin(NEONLdStTable), std::end(NEONLdStTable), Opcode); if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode) return I; return nullptr; } /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, /// corresponding to the specified register spacing. Not all of the results /// are necessarily valid, e.g., a Q register only has 2 D subregisters. static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, const TargetRegisterInfo *TRI, unsigned &D0, unsigned &D1, unsigned &D2, unsigned &D3) { if (RegSpc == SingleSpc) { D0 = TRI->getSubReg(Reg, ARM::dsub_0); D1 = TRI->getSubReg(Reg, ARM::dsub_1); D2 = TRI->getSubReg(Reg, ARM::dsub_2); D3 = TRI->getSubReg(Reg, ARM::dsub_3); } else if (RegSpc == EvenDblSpc) { D0 = TRI->getSubReg(Reg, ARM::dsub_0); D1 = TRI->getSubReg(Reg, ARM::dsub_2); D2 = TRI->getSubReg(Reg, ARM::dsub_4); D3 = TRI->getSubReg(Reg, ARM::dsub_6); } else { assert(RegSpc == OddDblSpc && "unknown register spacing"); D0 = TRI->getSubReg(Reg, ARM::dsub_1); D1 = TRI->getSubReg(Reg, ARM::dsub_3); D2 = TRI->getSubReg(Reg, ARM::dsub_5); D3 = TRI->getSubReg(Reg, ARM::dsub_7); } } /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register /// operands to real VLD instructions with D register operands. void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; bool DstIsDead = MI.getOperand(OpIdx).isDead(); unsigned DstReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 1 && TableEntry->copyAllListRegs) MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 2 && TableEntry->copyAllListRegs) MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // For an instruction writing double-spaced subregs, the pseudo instruction // has an extra operand that is a use of the super-register. Record the // operand index and skip over it. unsigned SrcOpIdx = 0; if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc) SrcOpIdx = OpIdx++; // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the super-register source operand used for double-spaced subregs over // to the new instruction as an implicit operand. if (SrcOpIdx != 0) { MachineOperand MO = MI.getOperand(SrcOpIdx); MO.setImplicit(true); MIB.addOperand(MO); } // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); // Transfer memoperands. MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); } /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register /// operands to real VST instructions with D register operands. void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, getUndefRegState(SrcIsUndef)); if (NumRegs > 1 && TableEntry->copyAllListRegs) MIB.addReg(D1, getUndefRegState(SrcIsUndef)); if (NumRegs > 2 && TableEntry->copyAllListRegs) MIB.addReg(D2, getUndefRegState(SrcIsUndef)); if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); else if (!SrcIsUndef) MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg. TransferImpOps(MI, MIB, MIB); // Transfer memoperands. MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); } /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && "NEONLdStTable lookup failed"); NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; unsigned RegElts = TableEntry->RegElts; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; // The lane operand is always the 3rd from last operand, before the 2 // predicate operands. unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); // Adjust the lane and spacing as needed for Q registers. assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); if (RegSpc == EvenDblSpc && Lane >= RegElts) { RegSpc = OddDblSpc; Lane -= RegElts; } assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0; unsigned DstReg = 0; bool DstIsDead = false; if (TableEntry->IsLoad) { DstIsDead = MI.getOperand(OpIdx).isDead(); DstReg = MI.getOperand(OpIdx++).getReg(); GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 1) MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 2) MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 3) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); } if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Grab the super-register source. MachineOperand MO = MI.getOperand(OpIdx++); if (!TableEntry->IsLoad) GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); // Add the subregs as sources of the new instruction. unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | getKillRegState(MO.isKill())); MIB.addReg(D0, SrcFlags); if (NumRegs > 1) MIB.addReg(D1, SrcFlags); if (NumRegs > 2) MIB.addReg(D2, SrcFlags); if (NumRegs > 3) MIB.addReg(D3, SrcFlags); // Add the lane number operand. MIB.addImm(Lane); OpIdx += 1; // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the super-register source to be an implicit source. MO.setImplicit(true); MIB.addOperand(MO); if (TableEntry->IsLoad) // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); // Transfer memoperands. MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); } /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned Opc, bool IsExt) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); unsigned OpIdx = 0; // Transfer the destination register operand. MIB.addOperand(MI.getOperand(OpIdx++)); if (IsExt) MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0); // Copy the other source register operand. MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Add an implicit kill and use for the super-reg. MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } static bool IsAnAddressOperand(const MachineOperand &MO) { // This check is overly conservative. Unless we are certain that the machine // operand is not a symbol reference, we return that it is a symbol reference. // This is important as the load pair may not be split up Windows. switch (MO.getType()) { case MachineOperand::MO_Register: case MachineOperand::MO_Immediate: case MachineOperand::MO_CImmediate: case MachineOperand::MO_FPImmediate: return false; case MachineOperand::MO_MachineBasicBlock: return true; case MachineOperand::MO_FrameIndex: return false; case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_TargetIndex: case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ExternalSymbol: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_BlockAddress: return true; case MachineOperand::MO_RegisterMask: case MachineOperand::MO_RegisterLiveOut: return false; case MachineOperand::MO_Metadata: case MachineOperand::MO_MCSymbol: return true; case MachineOperand::MO_CFIIndex: return false; case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: llvm_unreachable("should not exist post-isel"); } llvm_unreachable("unhandled machine operand type"); } void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); MachineInstrBuilder LO16, HI16; if (!STI->hasV6T2Ops() && (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { // FIXME Windows CE supports older ARM CPUs assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); // Expand into a movi + orr. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg); assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); unsigned ImmVal = (unsigned)MO.getImm(); unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); LO16 = LO16.addImm(SOImmValV1); HI16 = HI16.addImm(SOImmValV2); LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg).addReg(0); HI16.addImm(Pred).addReg(PredReg).addReg(0); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); return; } unsigned LO16Opc = 0; unsigned HI16Opc = 0; if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { LO16Opc = ARM::t2MOVi16; HI16Opc = ARM::t2MOVTi16; } else { LO16Opc = ARM::MOVi16; HI16Opc = ARM::MOVTi16; } LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg); switch (MO.getType()) { case MachineOperand::MO_Immediate: { unsigned Imm = MO.getImm(); unsigned Lo16 = Imm & 0xffff; unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); break; } case MachineOperand::MO_ExternalSymbol: { const char *ES = MO.getSymbolName(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); break; } default: { const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); break; } } LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); if (RequiresBundling) finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); } static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) MBB->addLiveIn(*I); } /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as /// possible. This only gets used at -O0 so we don't care about efficiency of the /// generated code. bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, unsigned UxtOp, MachineBasicBlock::iterator &NextMBBI) { bool IsThumb = STI->isThumb(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); MachineOperand &Addr = MI.getOperand(2); MachineOperand &Desired = MI.getOperand(3); MachineOperand &New = MI.getOperand(4); LivePhysRegs LiveRegs(&TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); MF->insert(++MBB.getIterator(), LoadCmpBB); MF->insert(++LoadCmpBB->getIterator(), StoreBB); MF->insert(++StoreBB->getIterator(), DoneBB); if (UxtOp) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg()) .addReg(Desired.getReg(), RegState::Kill); if (!IsThumb) MIB.addImm(0); AddDefaultPred(MIB); } // .Lloadcmp: // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone LoadCmpBB->addLiveIn(Addr.getReg()); LoadCmpBB->addLiveIn(Dest.getReg()); LoadCmpBB->addLiveIn(Desired.getReg()); addPostLoopLiveIns(LoadCmpBB, LiveRegs); MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); MIB.addReg(Addr.getReg()); if (LdrexOp == ARM::t2LDREX) MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. AddDefaultPred(MIB); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) .addOperand(Desired)); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; BuildMI(LoadCmpBB, DL, TII->get(Bcc)) .addMBB(DoneBB) .addImm(ARMCC::NE) .addReg(ARM::CPSR, RegState::Kill); LoadCmpBB->addSuccessor(DoneBB); LoadCmpBB->addSuccessor(StoreBB); // .Lstore: // strex rStatus, rNew, [rAddr] // cmp rStatus, #0 // bne .Lloadcmp StoreBB->addLiveIn(Addr.getReg()); StoreBB->addLiveIn(New.getReg()); addPostLoopLiveIns(StoreBB, LiveRegs); MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); MIB.addOperand(New); MIB.addOperand(Addr); if (StrexOp == ARM::t2STREX) MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. AddDefaultPred(MIB); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) .addReg(StatusReg, RegState::Kill) .addImm(0)); BuildMI(StoreBB, DL, TII->get(Bcc)) .addMBB(LoadCmpBB) .addImm(ARMCC::NE) .addReg(ARM::CPSR, RegState::Kill); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); return true; } /// ARM's ldrexd/strexd take a consecutive register pair (represented as a /// single GPRPair register), Thumb's take two separate registers so we need to /// extract the subregs from the pair. static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, unsigned Flags, bool IsThumb, const TargetRegisterInfo *TRI) { if (IsThumb) { unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead())); MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead())); } else MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead())); } /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { bool IsThumb = STI->isThumb(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); MachineOperand &Addr = MI.getOperand(2); MachineOperand &Desired = MI.getOperand(3); MachineOperand &New = MI.getOperand(4); unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); LivePhysRegs LiveRegs(&TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); MF->insert(++MBB.getIterator(), LoadCmpBB); MF->insert(++LoadCmpBB->getIterator(), StoreBB); MF->insert(++StoreBB->getIterator(), DoneBB); // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo // sbcs rStatus, rDestHi, rDesiredHi // bne .Ldone LoadCmpBB->addLiveIn(Addr.getReg()); LoadCmpBB->addLiveIn(Dest.getReg()); LoadCmpBB->addLiveIn(Desired.getReg()); addPostLoopLiveIns(LoadCmpBB, LiveRegs); unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); MIB.addReg(Addr.getReg()); AddDefaultPred(MIB); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestLo, getKillRegState(Dest.isDead())) .addReg(DesiredLo, getKillRegState(Desired.isDead()))); BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestHi, getKillRegState(Dest.isDead())) .addReg(DesiredHi, getKillRegState(Desired.isDead())) .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; BuildMI(LoadCmpBB, DL, TII->get(Bcc)) .addMBB(DoneBB) .addImm(ARMCC::NE) .addReg(ARM::CPSR, RegState::Kill); LoadCmpBB->addSuccessor(DoneBB); LoadCmpBB->addSuccessor(StoreBB); // .Lstore: // strexd rStatus, rNewLo, rNewHi, [rAddr] // cmp rStatus, #0 // bne .Lloadcmp StoreBB->addLiveIn(Addr.getReg()); StoreBB->addLiveIn(New.getReg()); addPostLoopLiveIns(StoreBB, LiveRegs); unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); MIB.addOperand(Addr); AddDefaultPred(MIB); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) .addReg(StatusReg, RegState::Kill) .addImm(0)); BuildMI(StoreBB, DL, TII->get(Bcc)) .addMBB(LoadCmpBB) .addImm(ARMCC::NE) .addReg(ARM::CPSR, RegState::Kill); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); return true; } bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); switch (Opcode) { default: return false; case ARM::TCRETURNdi: case ARM::TCRETURNri: { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); const ARMBaseInstrInfo &TII = *static_cast( MBB.getParent()->getSubtarget().getInstrInfo()); // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { unsigned TCOpcode = STI->isThumb() ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } // Add the default predicate in Thumb mode. if (STI->isThumb()) MIB.addImm(ARMCC::AL).addReg(0); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) .addReg(JumpTarget.getReg(), RegState::Kill); } auto NewMI = std::prev(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); MBBI = NewMI; return true; } case ARM::VMOVScc: case ARM::VMOVDcc: { unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' .addOperand(MI.getOperand(4)); MI.eraseFromParent(); return true; } case ARM::t2MOVCCr: case ARM::MOVCCr: { unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } case ARM::MOVCCsi: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), (MI.getOperand(1).getReg())) .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) .addImm(MI.getOperand(4).getImm()) // 'pred' .addOperand(MI.getOperand(5)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } case ARM::MOVCCsr: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) .addOperand(MI.getOperand(2)) .addOperand(MI.getOperand(3)) .addImm(MI.getOperand(4).getImm()) .addImm(MI.getOperand(5).getImm()) // 'pred' .addOperand(MI.getOperand(6)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } case ARM::t2MOVCCi16: case ARM::MOVCCi16: { unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' .addOperand(MI.getOperand(4)); MI.eraseFromParent(); return true; } case ARM::t2MOVCCi: case ARM::MOVCCi: { unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } case ARM::t2MVNCCi: case ARM::MVNCCi: { unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } case ARM::t2MOVCClsl: case ARM::t2MOVCClsr: case ARM::t2MOVCCasr: case ARM::t2MOVCCror: { unsigned NewOpc; switch (Opcode) { case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; default: llvm_unreachable("unexpeced conditional move"); } BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) .addImm(MI.getOperand(4).getImm()) // 'pred' .addOperand(MI.getOperand(5)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); // For functions using a base pointer, we rematerialize it (via the frame // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it // for us. Otherwise, expand to nothing. if (RI.hasBasePointer(MF)) { int32_t NumBytes = AFI->getFramePtrSpillOffset(); unsigned FramePtr = RI.getFrameRegister(MF); assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, *TII, RI); } else { emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } // If there's dynamic realignment, adjust for it. if (RI.needsStackRealignment(MF)) { MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned MaxAlign = MFI.getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); // Emit bic r6, r6, MaxAlign assert(MaxAlign <= 256 && "The BIC instruction cannot encode " "immediates larger than 256 with all lower " "bits set."); unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) .addReg(ARM::R6, RegState::Kill) .addImm(MaxAlign-1))); } } MI.eraseFromParent(); return true; } case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { // These are just fancy MOVs instructions. AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))) .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); return true; } case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) .addReg(0); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; } case ARM::tTPsoft: case ARM::TPsoft: { + const bool Thumb = Opcode == ARM::tTPsoft; + MachineInstrBuilder MIB; - if (Opcode == ARM::tTPsoft) + if (STI->genLongCalls()) { + MachineFunction *MF = MBB.getParent(); + MachineConstantPool *MCP = MF->getConstantPool(); + unsigned PCLabelID = AFI->createPICLabelUId(); + MachineConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(), + "__aeabi_read_tp", PCLabelID, 0); + unsigned Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get( ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__aeabi_read_tp", 0); - else + TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) + .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); + if (!Thumb) + MIB.addImm(0); + MIB.addImm(static_cast(ARMCC::AL)).addReg(0); + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get( ARM::BL)) - .addExternalSymbol("__aeabi_read_tp", 0); + TII->get(Thumb ? ARM::tBLXr : ARM::BLX)); + if (Thumb) + MIB.addImm(static_cast(ARMCC::AL)).addReg(0); + MIB.addReg(Reg, RegState::Kill); + } else { + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Thumb ? ARM::tBL : ARM::BL)); + if (Thumb) + MIB.addImm(static_cast(ARMCC::AL)).addReg(0); + MIB.addExternalSymbol("__aeabi_read_tp", 0); + } MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; } case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) .addOperand(MI.getOperand(1))); MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); return true; } case ARM::LDRLIT_ga_abs: case ARM::LDRLIT_ga_pcrel: case ARM::LDRLIT_ga_pcrel_ldr: case ARM::tLDRLIT_ga_abs: case ARM::tLDRLIT_ga_pcrel: { unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs; bool IsPIC = Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; unsigned PICAddOpc = IsARM ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; // We need a new const-pool entry to load from. MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); unsigned ARMPCLabelIndex = 0; MachineConstantPoolValue *CPV; if (IsPIC) { unsigned PCAdj = IsARM ? 8 : 4; ARMPCLabelIndex = AFI->createPICLabelUId(); CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); } else CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); if (IsARM) MIB.addImm(0); AddDefaultPred(MIB); if (IsPIC) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) .addImm(ARMPCLabelIndex); if (IsARM) AddDefaultPred(MIB); } MI.eraseFromParent(); return true; } case ARM::MOV_ga_pcrel: case ARM::MOV_ga_pcrel_ldr: case ARM::t2MOV_ga_pcrel: { // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. unsigned LabelId = AFI->createPICLabelUId(); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); bool isARM = Opcode != ARM::t2MOV_ga_pcrel; unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; unsigned LO16TF = TF | ARMII::MO_LO16; unsigned HI16TF = TF | ARMII::MO_HI16; unsigned PICAddOpc = isARM ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg) .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) .addImm(LabelId); BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) .addReg(DstReg) .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) .addImm(LabelId); MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg).addImm(LabelId); if (isARM) { AddDefaultPred(MIB3); if (Opcode == ARM::MOV_ga_pcrel_ldr) MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); return true; } case ARM::MOVi32imm: case ARM::MOVCCi32imm: case ARM::t2MOVi32imm: case ARM::t2MOVCCi32imm: ExpandMOV32BitImm(MBB, MBBI); return true; case ARM::SUBS_PC_LR: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) .addReg(ARM::LR) .addOperand(MI.getOperand(0)) .addOperand(MI.getOperand(1)) .addOperand(MI.getOperand(2)) .addReg(ARM::CPSR, RegState::Undef); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; } case ARM::VLDMQIA: { unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; // Grab the Q register destination. bool DstIsDead = MI.getOperand(OpIdx).isDead(); unsigned DstReg = MI.getOperand(OpIdx++).getReg(); // Copy the source register. MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Add the destination operands (D subregs). unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } case ARM::VSTMQIA: { unsigned NewOpc = ARM::VSTMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; // Grab the Q register source. bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Add the source operands (D subregs). unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) .addReg(D1, SrcIsKill ? RegState::Kill : 0); if (SrcIsKill) // Add an implicit kill for the Q register. MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: case ARM::VLD3q8Pseudo_UPD: case ARM::VLD3q16Pseudo_UPD: case ARM::VLD3q32Pseudo_UPD: case ARM::VLD3q8oddPseudo: case ARM::VLD3q16oddPseudo: case ARM::VLD3q32oddPseudo: case ARM::VLD3q8oddPseudo_UPD: case ARM::VLD3q16oddPseudo_UPD: case ARM::VLD3q32oddPseudo_UPD: case ARM::VLD4d8Pseudo: case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: case ARM::VLD4q8Pseudo_UPD: case ARM::VLD4q16Pseudo_UPD: case ARM::VLD4q32Pseudo_UPD: case ARM::VLD4q8oddPseudo: case ARM::VLD4q16oddPseudo: case ARM::VLD4q32oddPseudo: case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: case ARM::VLD3DUPd8Pseudo_UPD: case ARM::VLD3DUPd16Pseudo_UPD: case ARM::VLD3DUPd32Pseudo_UPD: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: case ARM::VLD4DUPd8Pseudo_UPD: case ARM::VLD4DUPd16Pseudo_UPD: case ARM::VLD4DUPd32Pseudo_UPD: ExpandVLD(MBBI); return true; case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: case ARM::VST2q8PseudoWB_fixed: case ARM::VST2q16PseudoWB_fixed: case ARM::VST2q32PseudoWB_fixed: case ARM::VST2q8PseudoWB_register: case ARM::VST2q16PseudoWB_register: case ARM::VST2q32PseudoWB_register: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: case ARM::VST1d64TPseudo: case ARM::VST3d8Pseudo_UPD: case ARM::VST3d16Pseudo_UPD: case ARM::VST3d32Pseudo_UPD: case ARM::VST1d64TPseudoWB_fixed: case ARM::VST1d64TPseudoWB_register: case ARM::VST3q8Pseudo_UPD: case ARM::VST3q16Pseudo_UPD: case ARM::VST3q32Pseudo_UPD: case ARM::VST3q8oddPseudo: case ARM::VST3q16oddPseudo: case ARM::VST3q32oddPseudo: case ARM::VST3q8oddPseudo_UPD: case ARM::VST3q16oddPseudo_UPD: case ARM::VST3q32oddPseudo_UPD: case ARM::VST4d8Pseudo: case ARM::VST4d16Pseudo: case ARM::VST4d32Pseudo: case ARM::VST1d64QPseudo: case ARM::VST4d8Pseudo_UPD: case ARM::VST4d16Pseudo_UPD: case ARM::VST4d32Pseudo_UPD: case ARM::VST1d64QPseudoWB_fixed: case ARM::VST1d64QPseudoWB_register: case ARM::VST4q8Pseudo_UPD: case ARM::VST4q16Pseudo_UPD: case ARM::VST4q32Pseudo_UPD: case ARM::VST4q8oddPseudo: case ARM::VST4q16oddPseudo: case ARM::VST4q32oddPseudo: case ARM::VST4q8oddPseudo_UPD: case ARM::VST4q16oddPseudo_UPD: case ARM::VST4q32oddPseudo_UPD: ExpandVST(MBBI); return true; case ARM::VLD1LNq8Pseudo: case ARM::VLD1LNq16Pseudo: case ARM::VLD1LNq32Pseudo: case ARM::VLD1LNq8Pseudo_UPD: case ARM::VLD1LNq16Pseudo_UPD: case ARM::VLD1LNq32Pseudo_UPD: case ARM::VLD2LNd8Pseudo: case ARM::VLD2LNd16Pseudo: case ARM::VLD2LNd32Pseudo: case ARM::VLD2LNq16Pseudo: case ARM::VLD2LNq32Pseudo: case ARM::VLD2LNd8Pseudo_UPD: case ARM::VLD2LNd16Pseudo_UPD: case ARM::VLD2LNd32Pseudo_UPD: case ARM::VLD2LNq16Pseudo_UPD: case ARM::VLD2LNq32Pseudo_UPD: case ARM::VLD3LNd8Pseudo: case ARM::VLD3LNd16Pseudo: case ARM::VLD3LNd32Pseudo: case ARM::VLD3LNq16Pseudo: case ARM::VLD3LNq32Pseudo: case ARM::VLD3LNd8Pseudo_UPD: case ARM::VLD3LNd16Pseudo_UPD: case ARM::VLD3LNd32Pseudo_UPD: case ARM::VLD3LNq16Pseudo_UPD: case ARM::VLD3LNq32Pseudo_UPD: case ARM::VLD4LNd8Pseudo: case ARM::VLD4LNd16Pseudo: case ARM::VLD4LNd32Pseudo: case ARM::VLD4LNq16Pseudo: case ARM::VLD4LNq32Pseudo: case ARM::VLD4LNd8Pseudo_UPD: case ARM::VLD4LNd16Pseudo_UPD: case ARM::VLD4LNd32Pseudo_UPD: case ARM::VLD4LNq16Pseudo_UPD: case ARM::VLD4LNq32Pseudo_UPD: case ARM::VST1LNq8Pseudo: case ARM::VST1LNq16Pseudo: case ARM::VST1LNq32Pseudo: case ARM::VST1LNq8Pseudo_UPD: case ARM::VST1LNq16Pseudo_UPD: case ARM::VST1LNq32Pseudo_UPD: case ARM::VST2LNd8Pseudo: case ARM::VST2LNd16Pseudo: case ARM::VST2LNd32Pseudo: case ARM::VST2LNq16Pseudo: case ARM::VST2LNq32Pseudo: case ARM::VST2LNd8Pseudo_UPD: case ARM::VST2LNd16Pseudo_UPD: case ARM::VST2LNd32Pseudo_UPD: case ARM::VST2LNq16Pseudo_UPD: case ARM::VST2LNq32Pseudo_UPD: case ARM::VST3LNd8Pseudo: case ARM::VST3LNd16Pseudo: case ARM::VST3LNd32Pseudo: case ARM::VST3LNq16Pseudo: case ARM::VST3LNq32Pseudo: case ARM::VST3LNd8Pseudo_UPD: case ARM::VST3LNd16Pseudo_UPD: case ARM::VST3LNd32Pseudo_UPD: case ARM::VST3LNq16Pseudo_UPD: case ARM::VST3LNq32Pseudo_UPD: case ARM::VST4LNd8Pseudo: case ARM::VST4LNd16Pseudo: case ARM::VST4LNd32Pseudo: case ARM::VST4LNq16Pseudo: case ARM::VST4LNq32Pseudo: case ARM::VST4LNd8Pseudo_UPD: case ARM::VST4LNd16Pseudo_UPD: case ARM::VST4LNd32Pseudo_UPD: case ARM::VST4LNq16Pseudo_UPD: case ARM::VST4LNq32Pseudo_UPD: ExpandLaneOp(MBBI); return true; case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; case ARM::CMP_SWAP_8: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, NextMBBI); else return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, NextMBBI); case ARM::CMP_SWAP_16: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, NextMBBI); else return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, NextMBBI); case ARM::CMP_SWAP_32: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI); else return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); } } bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineBasicBlock::iterator NMBBI = std::next(MBBI); Modified |= ExpandMI(MBB, MBBI, NMBBI); MBBI = NMBBI; } return Modified; } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { STI = &static_cast(MF.getSubtarget()); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); AFI = MF.getInfo(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) Modified |= ExpandMBB(*MFI); if (VerifyARMPseudo) MF.verify(this, "After expanding ARM pseudo instructions."); return Modified; } /// createARMExpandPseudoPass - returns an instance of the pseudo instruction /// expansion pass. FunctionPass *llvm::createARMExpandPseudoPass() { return new ARMExpandPseudo(); } Index: projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 314177) @@ -1,13483 +1,13486 @@ //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that ARM uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "ARMISelLowering.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; #define DEBUG_TYPE "arm-isel" STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); STATISTIC(NumConstpoolPromoted, "Number of constants with their storage promoted into constant pools"); static cl::opt ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); static cl::opt EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(true)); static cl::opt ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64)); static cl::opt ConstpoolPromotionMaxTotal( "arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128)); namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, SmallVectorImpl &locs, LLVMContext &C, ParmContext PC) : CCState(CC, isVarArg, MF, locs, C) { assert(((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call" "or prologue generation."); CallOrPrologue = PC; } }; } // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); } MVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); if (ElemTy == MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); } else { setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); setOperationAction(ISD::OR, VT, Promote); AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); setOperationAction(ISD::XOR, VT, Promote); AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); } // Neon does not support vector divide/remainder operations. setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPRRegClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPairRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { RegInfo = Subtarget->getRegisterInfo(); Itins = Subtarget->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && !Subtarget->isTargetWatchOS()) { const auto &E = Subtarget->getTargetTriple().getEnvironment(); bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF || E == Triple::MuslEABIHF; // Windows is a special case. Technically, we will replace all of the "GNU" // calls with calls to MSVCRT if appropriate and adjust the calling // convention then. IsHFTarget = IsHFTarget || Subtarget->isTargetWindows(); for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast(LCID), IsHFTarget ? CallingConv::ARM_AAPCS_VFP : CallingConv::ARM_AAPCS); } if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const ISD::CondCode Cond; } LibraryCalls[] = { // Single-precision floating-point arithmetic. { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, // Double-precision floating-point arithmetic. { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, // Single-precision comparisons. { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ }, // Double-precision comparisons. { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ }, // Floating-point to integer conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, // Conversions between floating types. { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, // Integer to floating-point conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. // FIXME: There appears to be some naming inconsistency in ARM libgcc: // e.g., __floatunsidf vs. __floatunssidfvfp. { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } } // Set the correct calling convention for ARMv7k WatchOS. It's just // AAPCS_VFP for functions as simple as libcalls. if (Subtarget->isTargetWatchABI()) { for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); } } // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); // RTLIB if (Subtarget->isAAPCS_ABI() && (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; const ISD::CondCode Cond; } LibraryCalls[] = { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Double-precision floating-point comparison helper functions // RTABI chapter 4.1.2, Table 3 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, // Single-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 4 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Single-precision floating-point comparison helper functions // RTABI chapter 4.1.2, Table 5 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, // Floating-point to integer conversions. // RTABI chapter 4.1.2, Table 6 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Conversions between floating types. // RTABI chapter 4.1.2, Table 7 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer to floating-point conversions. // RTABI chapter 4.1.2, Table 8 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Long long helper functions // RTABI chapter 4.2, Table 9 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer division functions // RTABI chapter 4.3.1 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } // EABI dependent RTLIB if (TM.Options.EABIVersion == EABI::EABI4 || TM.Options.EABIVersion == EABI::EABI5) { static const struct { const RTLIB::Libcall Op; const char *const Name; const CallingConv::ID CC; const ISD::CondCode Cond; } MemOpsLibraryCalls[] = { // Memory operations // RTABI chapter 4.3.4 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : MemOpsLibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } } } if (Subtarget->isTargetWindows()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } // Use divmod compiler-rt calls for iOS 5.0 and later. if (Subtarget->isTargetWatchOS() || (Subtarget->isTargetIOS() && !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } // The half <-> float conversion functions are always soft-float on // non-watchos platforms, but are needed for some targets which use a // hard-float calling convention by default. if (!Subtarget->isTargetWatchABI()) { if (Subtarget->isAAPCS_ABI()) { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); } else { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); } } // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have // a __gnu_ prefix (which is the default). if (Subtarget->isTargetAEABI()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else addRegisterClass(MVT::i32, &ARM::GPRRegClass); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); addRegisterClass(MVT::f64, &ARM::DPRRegClass); } for (MVT VT : MVT::vector_valuetypes()) { for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); addDRTypeForNEON(MVT::v4i16); addDRTypeForNEON(MVT::v2i32); addDRTypeForNEON(MVT::v1i64); addQRTypeForNEON(MVT::v4f32); addQRTypeForNEON(MVT::v2f64); addQRTypeForNEON(MVT::v16i8); addQRTypeForNEON(MVT::v8i16); addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); // FIXME: Code duplication: FDIV and FREM are expanded always, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); // FIXME: Create unittest. // In another words, find a way when "copysign" appears in DAG with vector // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); // FIXME: Code duplication: SETCC has custom operation action, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); setOperationAction(ISD::FMA, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); setOperationAction(ISD::FRINT, MVT::v4f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Mark v2f32 intrinsics. setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); setOperationAction(ISD::FEXP, MVT::v2f32, Expand); setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); setOperationAction(ISD::FRINT, MVT::v2f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); // Custom handling for some quad-vector types to detect VMULL. setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); // Custom handling for some vector types to avoid expensive expansions setOperationAction(ISD::SDIV, MVT::v4i16, Custom); setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source, and nor does // it have a FP_TO_[SU]INT instruction with a narrower destination than // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); setOperationAction(ISD::CTPOP, MVT::v1i64, Expand); setOperationAction(ISD::CTPOP, MVT::v2i64, Expand); setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); // NEON does not have single instruction CTTZ for vectors. setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); // NEON only has FMA instructions as of VFP4. if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::v2f32, Expand); setOperationAction(ISD::FMA, MVT::v4f32, Expand); } setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}) { for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } } // ARM and Thumb2 support UMLAL/SMLAL. if (!Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::ADDC); if (Subtarget->isFPOnlySP()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, // loads and stores are provided by the hardware. setOperationAction(ISD::FADD, MVT::f64, Expand); setOperationAction(ISD::FSUB, MVT::f64, Expand); setOperationAction(ISD::FMUL, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FDIV, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); setOperationAction(ISD::FNEG, MVT::f64, Expand); setOperationAction(ISD::FABS, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); setOperationAction(ISD::FLOG10, MVT::f64, Expand); setOperationAction(ISD::FEXP, MVT::f64, Expand); setOperationAction(ISD::FEXP2, MVT::f64, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } computeRegisterProperties(Subtarget->getRegisterInfo()); // ARM does not have floating-point extending loads. for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); } // ... or truncating stores setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. if (!Subtarget->isThumb1Only()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, MVT::i1, Legal); setIndexedLoadAction(im, MVT::i8, Legal); setIndexedLoadAction(im, MVT::i16, Legal); setIndexedLoadAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i1, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); } } else { // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); if (Subtarget->isThumb1Only()) { setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); } if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() || (Subtarget->isThumb2() && !Subtarget->hasDSP())) setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); if (!Subtarget->isThumb1Only()) { // FIXME: We should do this for Thumb1 as well. setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); setOperationAction(ISD::SUBC, MVT::i32, Custom); setOperationAction(ISD::SUBE, MVT::i32, Custom); } if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); // @llvm.readcyclecounter requires the Performance Monitors extension. // Default to the 0 expansion on unsupported platforms. // FIXME: Technically there are older ARM CPUs that have // implementation-specific ways of obtaining this information. if (Subtarget->hasPerfMon()) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, LibCall); setOperationAction(ISD::UDIV, MVT::i32, LibCall); } if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); setOperationAction(ISD::SDIV, MVT::i64, Custom); setOperationAction(ISD::UDIV, MVT::i64, Custom); } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; if (Subtarget->isTargetWindows()) { const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } else { const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::SDIVREM, MVT::i64, Custom); setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } else { setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); } if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT()) for (auto &VT : {MVT::f32, MVT::f64}) setOperationAction(ISD::FPOWI, VT, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. InsertFencesForAtomic = false; if (Subtarget->hasAnyDataBarrier() && (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); if (!Subtarget->isThumb() || !Subtarget->isMClass()) setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. InsertFencesForAtomic = true; } } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. // If target has DMB in thumb, Fences can be inserted. if (Subtarget->hasDataBarrier()) InsertFencesForAtomic = true; setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); // Set them all for expansion, which will force libcalls. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. if (!InsertFencesForAtomic) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); } } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); if (Subtarget->useSjLjEH()) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // Thumb-1 cannot currently select ARMISD::SUBE. if (!Subtarget->isThumb1Only()) setOperationAction(ISD::SETCCE, MVT::i32, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); // We don't support sin/cos/fmod/copysign/pow setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); } // Various VFP goodness if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); if (Subtarget->isTargetWatchABI()) { setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); } if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } } // FP-ARMv8 implements a lot of rounding-like FP operations. if (Subtarget->hasFPARMv8()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); if (!Subtarget->isFPOnlySP()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); } } if (Subtarget->hasNEON()) { // vmin and vmax aren't available in a scalar form, so we use // a NEON instruction with an undef lane instead. setOperationAction(ISD::FMINNAN, MVT::f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal); setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type MaxStoresPerMemset = 8; MaxStoresPerMemsetOptSize = 4; MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores MaxStoresPerMemcpyOptSize = 2; MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } bool ARMTargetLowering::useSoftFloat() const { return Subtarget->useSoftFloat(); } // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, // SPR's representative would be DPR_VFP2. This should work well if register // pressure tracking were modified such that a register use would increment the // pressure of the register class's representative and all of it's super // classes' representatives transitively. We have not implemented this because // of the difficulty prior to coalescing of modeling operand register classes // due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(TRI, VT); // Use DPR as representative register class for all floating point // and vector types. Since there are 32 SPR registers and 32 DPR registers so // the cost is 1 for both f32 and f64. case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: RRC = &ARM::DPRRegClass; // When NEON is used for SP, only half of the register file is available // because operations that define both SP and DP results will be constrained // to the VFP2 class (D0-D15). We currently model this constraint prior to // coalescing by double-counting the SP regs. See the FIXME above. if (Subtarget->useNEONForSinglePrecisionFP()) Cost = 2; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: RRC = &ARM::DPRRegClass; Cost = 2; break; case MVT::v4i64: RRC = &ARM::DPRRegClass; Cost = 4; break; case MVT::v8i64: RRC = &ARM::DPRRegClass; Cost = 8; break; } return std::make_pair(RRC, Cost); } const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((ARMISD::NodeType)Opcode) { case ARMISD::FIRST_NUMBER: break; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL"; case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::SSAT: return "ARMISD::SSAT"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; case ARMISD::ADDC: return "ARMISD::ADDC"; case ARMISD::ADDE: return "ARMISD::ADDE"; case ARMISD::SUBC: return "ARMISD::SUBC"; case ARMISD::SUBE: return "ARMISD::SUBE"; case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP"; case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; case ARMISD::VCGT: return "ARMISD::VCGT"; case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; case ARMISD::VCGTU: return "ARMISD::VCGTU"; case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VSHL: return "ARMISD::VSHL"; case ARMISD::VSHRs: return "ARMISD::VSHRs"; case ARMISD::VSHRu: return "ARMISD::VSHRu"; case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VSLI: return "ARMISD::VSLI"; case ARMISD::VSRI: return "ARMISD::VSRI"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; case ARMISD::VREV64: return "ARMISD::VREV64"; case ARMISD::VREV32: return "ARMISD::VREV32"; case ARMISD::VREV16: return "ARMISD::VREV16"; case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; case ARMISD::VTBL1: return "ARMISD::VTBL1"; case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD"; case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; } return nullptr; } EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); return VT.changeVectorElementTypeToInteger(); } /// getRegClassFor - Return the register class that should be used for the /// specified value type. const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. if (Subtarget->hasNEON()) { if (VT == MVT::v4i64) return &ARM::QQPRRegClass; if (VT == MVT::v8i64) return &ARM::QQQQPRRegClass; } return TargetLowering::getRegClassFor(VT); } // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const { if (!isa(CI)) return false; MinSize = 8; // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 // cycle faster than 4-byte aligned LDM. PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); return true; } // Create a fast isel object. FastISel * ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { return ARM::createFastISel(funcInfo, libInfo); } Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) return Sched::RegPressure; for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (VT.isFloatingPoint() || VT.isVector()) return Sched::ILP; } if (!N->isMachineOpcode()) return Sched::RegPressure; // Load are scheduled for latency even if there instruction itinerary // is not available. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) return Sched::RegPressure; if (!Itins->isEmpty() && Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) return Sched::ILP; return Sched::RegPressure; } //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: return ARMCC::NE; case ISD::SETEQ: return ARMCC::EQ; case ISD::SETGT: return ARMCC::GT; case ISD::SETGE: return ARMCC::GE; case ISD::SETLT: return ARMCC::LT; case ISD::SETLE: return ARMCC::LE; case ISD::SETUGT: return ARMCC::HI; case ISD::SETUGE: return ARMCC::HS; case ISD::SETULT: return ARMCC::LO; case ISD::SETULE: return ARMCC::LS; } } /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2) { CondCode2 = ARMCC::AL; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: case ISD::SETULT: CondCode = ARMCC::LT; break; case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: case ISD::SETUNE: CondCode = ARMCC::NE; break; } } //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// #include "ARMGenCallingConv.inc" /// getEffectiveCallingConv - Get the effective calling convention, taking into /// account presence of floating point hardware and calling convention /// limitations, such as support for variadic functions. CallingConv::ID ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_AAPCS: case CallingConv::ARM_APCS: case CallingConv::GHC: return CC; case CallingConv::PreserveMost: return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: case CallingConv::Swift: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) return CallingConv::ARM_APCS; else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; case CallingConv::Fast: case CallingConv::CXX_FAST_TLS: if (!Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; return CallingConv::ARM_APCS; } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; } } CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const { return CCAssignFnForNode(CC, false, isVarArg); } CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const { return CCAssignFnForNode(CC, true, isVarArg); } /// CCAssignFnForNode - Selects the correct CCAssignFn for the given /// CallingConvention. CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const { switch (getEffectiveCallingConv(CC, isVarArg)) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_AAPCS_VFP: return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); case CallingConv::Fast: return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); case CallingConv::PreserveMost: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue ARMTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector RVLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext(), Call); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference if (i == 0 && isThisReturn) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); continue; } SDValue Val; if (VA.needsCustom()) { // Handle f64 or half of a v2f64. SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(0, dl, MVT::i32)); VA = RVLocs[++i]; // skip ahead to next loc Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, dl, MVT::i32)); } } else { Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } /// LowerMemOpCallTo - Store the argument to the stack. SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); return DAG.getStore( Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); } void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); unsigned id = Subtarget->isLittle() ? 0 : 1; RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } } /// LowerCall - Lowering a call into a callseq_start <- /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. SDValue ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool doesNotRet = CLI.DoesNotReturn; bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { ++NumTailCalls; isSibCall = true; } } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); // For tail calls, memory operands are available in our caller's stack. if (isSibCall) NumBytes = 0; // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); RegsToPassVector RegsToPass; SmallVector MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization, arguments are handled later. for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } // f64 and v2f64 might be passed in i32 pairs and must be split into pieces if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, dl, MVT::i32)); SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, dl, MVT::i32)); PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); VA = ArgLocs[++i]; // skip ahead to next loc if (VA.isRegLoc()) { PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } else { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags)); } } else { PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && Outs[0].VT == MVT::i32) { assert(VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"); isThisReturn = true; } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); unsigned offset = 0; // True if this byval aggregate will be split between registers // and memory. unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); if (CurByValIdx < ByValArgsCount) { unsigned RegBegin, RegEnd; CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned int i, j; for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } // If parameter size outsides register area, "offset" value // helps us to calculate stack slot for remained part properly. offset = RegEnd - RegBegin; CCInfo.nextInRegsParam(); } if (Flags.getByValSize() > 4*offset) { auto PtrVT = getPointerTy(DAG.getDataLayout()); unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, MVT::i32); SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, MVT::i32); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; // Tail call byval lowering might overwrite argument registers so in case of // tail call optimization the copies to registers are lowered later. if (!isTailCall) for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // For tail calls lower the arguments to the 'real' stack slot. if (isTailCall) { // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and // the alias isn't otherwise explicit. This is slightly more conservative // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } InFlag = SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool isDirect = false; const TargetMachine &TM = getTargetMachine(); const Module *Mod = MF.getFunction()->getParent(); const GlobalValue *GV = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) GV = G->getGlobal(); bool isStub = !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo(); auto PtrVt = getPointerTy(DAG.getDataLayout()); if (Subtarget->genLongCalls()) { assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && "long-calls codegen is not position independent!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. if (isa(Callee)) { // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { const char *Sym = S->getSymbol(); // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } else if (isa(Callee)) { // If we're optimizing for minimum size and the function is called three or // more times in this block, we can improve codesize by calling indirectly // as BLXr has a 16-bit encoding. auto *GV = cast(Callee)->getGlobal(); auto *BB = CLI.CS->getParent(); bool PreferIndirect = Subtarget->isThumb() && MF.getFunction()->optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; if (!PreferIndirect) { isDirect = true; bool isDef = GV->isStrongDefinitionForLinker(); // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode( ARMISD::WrapperPIC, dl, PtrVt, DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 0, MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); unsigned TargetFlags = GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); if (GV->hasDLLImportStorageClass()) Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), MachinePointerInfo::getGOT(DAG.getMachineFunction())); } else { Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); } } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); } } // FIXME: handle tail calls differently. unsigned CallOpc; if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else CallOpc = ARMISD::CALL; } else { if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority !MF.getFunction()->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { // Set isThisReturn to false if the calling convention is not one that // allows 'returned' to be modeled in this way, so LowerCallResult does // not try to pass 'this' straight through isThisReturn = false; Mask = ARI->getCallPreservedMask(MF, CallConv); } } else Mask = ARI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); } if (InFlag.getNode()) Ops.push_back(InFlag); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals, isThisReturn, isThisReturn ? OutVals[0] : SDValue()); } /// HandleByVal - Every parameter *after* a byval parameter is passed /// on the stack. Remember the next parameter register to allocate, /// and then confiscate the rest of the parameter registers to insure /// this. void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, unsigned Align) const { assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); // Byval (as with any stack) slots are always at least 4 byte aligned. Align = std::max(Align, 4U); unsigned Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; for (unsigned i = 0; i < Waste; ++i) Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; unsigned Excess = 4 * (ARM::R4 - Reg); // Special case when NSAA != SP and parameter size greater than size of // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. const unsigned NSAAOffset = State->getNextStackOffset(); if (NSAAOffset != 0 && Size > Excess) { while (State->AllocateReg(GPRArgRegs)) ; return; } // First register for byval parameter is the first register that wasn't // allocated before this method call, so it would be "reg". // If parameter is small enough to be saved in range [reg, r4), then // the end (first after last) register would be reg + param-size-in-regs, // else parameter would be splitted between registers and stack, // end register would be r4 in this case. unsigned ByValRegBegin = Reg; unsigned ByValRegEnd = std::min(Reg + Size / 4, ARM::R4); State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); // Note, first register is allocated in the beginning of function already, // allocate remained amount of registers we need. for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs); // A byval parameter that is split between registers and memory needs its // size truncated here. // In the case where the entire structure fits in registers, we set the // size in memory to zero. Size = std::max(Size - Excess, 0); } /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) return false; if (!Flags.isByVal()) { if (!TII->isLoadFromStackSlot(*Def, FI)) return false; } else { return false; } } else if (LoadSDNode *Ld = dyn_cast(Arg)) { if (Flags.isByVal()) // ByVal argument is passed in as a pointer but it's now being // dereferenced. e.g. // define @foo(%struct.X* %A) { // tail call @bar(%struct.X* byval %A) // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast(Ptr); if (!FINode) return false; FI = FINode->getIndex(); } else return false; assert(FI != INT_MAX); if (!MFI.isFixedObjectIndex(FI)) return false; return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); assert(Subtarget->supportsTailCall()); // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. if (CallerF->hasFnAttribute("interrupt")) return false; // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) return false; // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic // pre-emption of symbols, as the AAELF spec requires normal calls // to undefined weak functions to be replaced with a NOP or jump to the // next instruction. The behaviour of branch instructions in this // situation (as used for tail calls) is implementation-defined, so we // cannot rely on the linker replacing the tail call with a return. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) return false; } // Check that the call results are passed in the same way. LLVMContext &C = *DAG.getContext(); if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), CCAssignFnForReturn(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo(); if (AFI_Caller->getArgRegsSaveSize()) return false; // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector ArgLocs; ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (VA.needsCustom()) { // f64 and vector types are split into multiple registers or // register/stack-slot combinations. The types will not match // the registers; give up on memory f64 refs until we figure // out what to do about this. if (!VA.isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) return false; if (RegVT == MVT::v2f64) { if (!ArgLocs[++i].isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) return false; } } else if (!VA.isRegLoc()) { if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, TII)) return false; } } } const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) return false; } return true; } bool ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); } static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset // version of the "preferred return address". These offsets affect the return // instruction if this is a return from PL1 without hypervisor extensions. // IRQ/FIQ: +4 "subs pc, lr, #4" // SWI: 0 "subs pc, lr, #0" // ABORT: +4 "subs pc, lr, #4" // UNDEF: +4/+2 "subs pc, lr, #0" // UNDEF varies depending on where the exception came from ARM or Thumb // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. int64_t LROffset; if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || IntKind == "ABORT") LROffset = 4; else if (IntKind == "SWI" || IntKind == "UNDEF") LROffset = 0; else report_fatal_error("Unsupported interrupt attribute. If present, value " "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, DL, MVT::i32, false)); return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; // CCState - Info about the registers and stack slots. ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext(), Call); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) bool isLittleEndian = Subtarget->isLittle(); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); AFI->setReturnRegsCount(RVLocs.size()); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[realRVLocIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { // Extract the first half and return it in two registers. SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, dl, MVT::i32)); SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, dl, MVT::i32)); } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (ARM::GPRRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i32)); else if (ARM::DPRRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } // Update chain and glue. RetOps[0] = Chain; if (Flag.getNode()) RetOps.push_back(Flag); // CPUs which aren't M-class use a special sequence to return from // exceptions (roughly, any instruction setting pc and cpsr simultaneously, // though we use "subs pc, lr, #N"). // // M-class CPUs actually use a normal return sequence with a special // (hardware-provided) value in LR, so the normal code path works. if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && !Subtarget->isMClass()) { if (Subtarget->isThumb1Only()) report_fatal_error("interrupt attribute is not supported in Thumb1"); return LowerInterruptReturn(RetOps, dl, DAG); } return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { SDNode *VMov = Copy; // f64 returned in a pair of GPRs. SmallPtrSet Copies; for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; Copies.insert(*UI); } if (Copies.size() > 2) return false; for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { SDValue UseChain = UI->getOperand(0); if (Copies.count(UseChain.getNode())) // Second CopyToReg Copy = *UI; else { // We are at the top of this chain. // If the copy has a glue operand, we conservatively assume it // isn't safe to perform a tail call. if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) return false; // First CopyToReg TCChain = UseChain; } } } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. if (!Copy->hasOneUse()) return false; Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else { return false; } bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ARMISD::RET_FLAG && UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } if (!HasRet) return false; Chain = TCChain; return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; return true; } // Trying to write a 64 bit value so need to split into two 32 bit values first, // and pass the lower and high parts through. static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); SDValue WriteValue = Op->getOperand(2); // This function is only supposed to be called for i64 type argument. assert(WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, DAG.getConstant(1, DL, MVT::i32)); SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast(Op); SDValue Res; if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); else Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment()); return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } unsigned ARMTargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_Inline; } SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = 0; SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast(Op)->getBlockAddress(); SDValue CPAddr; bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, ARMCP::CPBlockAddress, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); if (!IsPositionIndependent) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } /// \brief Convert a TLS address reference into the correct sequence of loads /// and calls to compute the variable's address for Darwin, and return an /// SDValue containing the final node. /// Darwin only has one TLS scheme which must be capable of dealing with the /// fully general situation, in the worst case. This means: /// + "extern __thread" declaration. /// + Defined in a possibly unknown dynamic library. /// /// The general system is that each __thread variable has a [3 x i32] descriptor /// which contains information used by the runtime to calculate the address. The /// only part of this the compiler needs to know about is the first word, which /// contains a function pointer that must be called with the address of the /// entire descriptor in "r0". /// /// Since this descriptor may be in a different unit, in general access must /// proceed along the usual ARM rules. A common sequence to produce is: /// /// movw rT1, :lower16:_var$non_lazy_ptr /// movt rT1, :upper16:_var$non_lazy_ptr /// ldr r0, [rT1] /// ldr rT2, [r0] /// blx rT2 /// [...address now in r0...] SDValue ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); SDLoc DL(Op); // First step is to get the address of the actua global symbol. This is where // the TLS descriptor lives. SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); // The first entry in the descriptor is a function pointer that we must call // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( MVT::i32, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 4, MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); MachineFunction &F = DAG.getMachineFunction(); MachineFrameInfo &MFI = F.getFrameInfo(); MFI.setAdjustsStack(true); // TLS calls preserve all registers except those that absolutely must be // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be // silly). auto TRI = getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); auto ARI = static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); // Finally, we can make the call. This is just a degenerate version of a // normal AArch64 call node: r0 takes the address of the descriptor, and // returns the address of the variable in this thread. Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); Chain = DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), DAG.getRegisterMask(Mask), Chain.getValue(1)); return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); } SDValue ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); // Load the current TEB (thread environment block) SDValue Ops[] = {Chain, DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(2, DL, MVT::i32)}; SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); SDValue TEB = CurrentTEB.getValue(0); Chain = CurrentTEB.getValue(1); // Load the ThreadLocalStoragePointer from the TEB // A pointer to the TLS array is located at offset 0x2c from the TEB. SDValue TLSArray = DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 // offset into the TLSArray. // Load the TLS index from the C runtime SDValue TLSIndex = DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(2, DL, MVT::i32)); SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), MachinePointerInfo()); // Get the offset of the start of the .tls section (section base) const auto *GA = cast(Op); auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); SDValue Offset = DAG.getLoad( PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, DAG.getTargetConstantPool(CPV, PtrVT, 4)), MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { SDLoc dl(GA); EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), Argument, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); // call __tls_get_addr. ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } // Lower ISD::GlobalTLSAddress using the "initial exec" or // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); if (model == TLSModel::InitialExec) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else { // local exec model assert(model == TLSModel::LocalExec); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); if (Subtarget->isTargetWindows()) return LowerGlobalTLSAddressWindows(Op, DAG); // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: return LowerToTLSGeneralDynamicModel(GA, DAG); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModels(GA, DAG, model); } llvm_unreachable("bogus TLS model"); } /// Return true if all users of V are within function F, looking through /// ConstantExprs. static bool allUsersAreInFunction(const Value *V, const Function *F) { SmallVector Worklist; for (auto *U : V->users()) Worklist.push_back(U); while (!Worklist.empty()) { auto *U = Worklist.pop_back_val(); if (isa(U)) { for (auto *UU : U->users()) Worklist.push_back(UU); continue; } auto *I = dyn_cast(U); if (!I || I->getParent()->getParent() != F) return false; } return true; } /// Return true if all users of V are within some (any) function, looking through /// ConstantExprs. In other words, are there any global constant users? static bool allUsersAreInFunctions(const Value *V) { SmallVector Worklist; for (auto *U : V->users()) Worklist.push_back(U); while (!Worklist.empty()) { auto *U = Worklist.pop_back_val(); if (isa(U)) { for (auto *UU : U->users()) Worklist.push_back(UU); continue; } if (!isa(U)) return false; } return true; } // Return true if T is an integer, float or an array/vector of either. static bool isSimpleType(Type *T) { if (T->isIntegerTy() || T->isFloatingPointTy()) return true; Type *SubT = nullptr; if (T->isArrayTy()) SubT = T->getArrayElementType(); else if (T->isVectorTy()) SubT = T->getVectorElementType(); else return false; return SubT->isIntegerTy() || SubT->isFloatingPointTy(); } static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, SDLoc dl) { // If we're creating a pool entry for a constant global with unnamed address, // and the global is small enough, we can emit it inline into the constant pool // to save ourselves an indirection. // // This is a win if the constant is only used in one function (so it doesn't // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). const Function *F = DAG.getMachineFunction().getFunction(); // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll // inline it elsewhere too (and reuse the constant pool entry). Fast-isel // doesn't know about this optimization, so bail out if it's enabled else // we could decide to inline here (and thus never emit the GV) but require // the GV from fast-isel generated code. if (!EnableConstpoolPromotion || DAG.getMachineFunction().getTarget().Options.EnableFastISel) return SDValue(); auto *GVar = dyn_cast(GV); if (!GVar || !GVar->hasInitializer() || !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || !GVar->hasLocalLinkage()) return SDValue(); // Ensure that we don't try and inline any type that contains pointers. If // we inline a value that contains relocations, we move the relocations from // .data to .text which is not ideal. auto *Init = GVar->getInitializer(); if (!isSimpleType(Init->getType())) return SDValue(); // The constant islands pass can only really deal with alignment requests // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote // any type wanting greater alignment requirements than 4 bytes. We also // can only promote constants that are multiples of 4 bytes in size or // are paddable to a multiple of 4. Currently we only try and pad constants // that are strings for simplicity. auto *CDAInit = dyn_cast(Init); unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); unsigned Align = GVar->getAlignment(); unsigned RequiredPadding = 4 - (Size % 4); bool PaddingPossible = RequiredPadding == 4 || (CDAInit && CDAInit->isString()); if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize) return SDValue(); unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); // We can't bloat the constant pool too much, else the ConstantIslands pass // may fail to converge. If we haven't promoted this global yet (it may have // multiple uses), and promoting it would increase the constant pool size (Sz // > 4), ensure we have space to do so up to MaxTotal. if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= ConstpoolPromotionMaxTotal) return SDValue(); // This is only valid if all users are in a single function OR it has users // in multiple functions but it no larger than a pointer. We also check if // GVar has constant (non-ConstantExpr) users. If so, it essentially has its // address taken. if (!allUsersAreInFunction(GVar, F) && !(Size <= 4 && allUsersAreInFunctions(GVar))) return SDValue(); // We're going to inline this global. Pad it out if needed. if (RequiredPadding != 4) { StringRef S = CDAInit->getAsString(); SmallVector V(S.size()); std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); while (RequiredPadding--) V.push_back(0); Init = ConstantDataArray::get(*DAG.getContext(), V); } auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4); if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { AFI->markGlobalAsPromotedToConstantPool(GVar); AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + PaddedSize - 4); } ++NumConstpoolPromoted; return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } +static bool isReadOnly(const GlobalValue *GV) { + if (const GlobalAlias *GA = dyn_cast(GV)) + GV = GA->getBaseObject(); + return (isa(GV) && cast(GV)->isConstant()) || + isa(GV); +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->getBaseObject(); - bool IsRO = - (isa(GV) && cast(GV)->isConstant()) || - isa(GV); + bool IsRO = isReadOnly(GV); // promoteToConstantPool only if not generating XO text section if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl)) return V; if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, /*AddCurrentAddress=*/UseGOT_PREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Result.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); if (UseGOT_PREL) Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } else if (Subtarget->isROPI() && IsRO) { // PC-relative. SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); return Result; } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue G = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G); return Result; } // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into multiple nodes unsigned Wrapper = isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); if (Subtarget->isGVIndirectSymbol(GV)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); assert(Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"); assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"); const GlobalValue *GV = cast(Op)->getGlobal(); const ARMII::TOF TargetFlags = (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; SDLoc DL(Op); ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, TargetFlags)); if (GV->hasDLLImportStorageClass()) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Val = DAG.getConstant(0, dl, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1), Val); } SDValue ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); } SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, Op.getOperand(0)); } SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue CPAddr; bool IsPositionIndependent = isPositionIndependent(); unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); if (IsPositionIndependent) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } return Result; } case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) ? ARMISD::VMULLs : ARMISD::VMULLu; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vminnm: case Intrinsic::arm_neon_vmaxnm: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) ? ISD::FMINNUM : ISD::FMAXNUM; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vminu: case Intrinsic::arm_neon_vmaxu: { if (Op.getValueType().isFloatingPoint()) return SDValue(); unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) ? ISD::UMIN : ISD::UMAX; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vmins: case Intrinsic::arm_neon_vmaxs: { // v{min,max}s is overloaded between signed integers and floats. if (!Op.getValueType().isFloatingPoint()) { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::SMIN : ISD::SMAX; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::FMINNAN : ISD::FMAXNAN; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } } } static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, dl, MVT::i32)); } ConstantSDNode *OrdN = cast(Op.getOperand(1)); AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); ARM_MB::MemBOpt Domain = ARM_MB::ISH; if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; } else if (Subtarget->preferISHSTBarriers() && Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! Domain = ARM_MB::ISHST; } return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), DAG.getConstant(Domain, dl, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. if (!(Subtarget->isThumb2() || (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) // Just preserve the chain. return Op.getOperand(0); SDLoc dl(Op); unsigned isRead = ~cast(Op.getOperand(2))->getZExtValue() & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) // ARMv7 with MP extension has PLDW. return Op.getOperand(0); unsigned isData = cast(Op.getOperand(4))->getZExtValue(); if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; isData = ~isData & 1; } return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), DAG.getConstant(isData, dl, MVT::i32)); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *FuncInfo = MF.getInfo(); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; else RC = &ARM::GPRRegClass; // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; if (NextVA.isMemLoc()) { MachineFrameInfo &MFI = MF.getFrameInfo(); int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad( MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } if (!Subtarget->isLittle()) std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } // The remaining GPRs hold either the beginning of variable-argument // data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. // Return: The frame index registers were stored into. int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, int ArgOffset, unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; // eat all remained registers // (these two actions are performed by HandleByVal method). // Then, here, we initialize stack frame with // "store-reg" instructions. // Case #2. Var-args function, that doesn't contain byval parameters. // The same: eat all remained unallocated registers, // initialize stack frame. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); } else { unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; REnd = ARM::R4; } if (REnd != RBegin) ArgOffset = -4 * (ARM::R4 - RBegin); auto PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); SmallVector MemOps; const TargetRegisterClass *RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { unsigned VReg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(OrigArg, 4 * i)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } // Setup stack frame, the va_list pointer will start from. void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, unsigned ArgOffset, unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); // Try to store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(), CCInfo.getNextStackOffset(), 4); AFI->setVarArgsFrameIndex(FrameIndex); } SDValue ARMTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); SmallVector ArgValues; SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; // Initially ArgRegsSaveSize is zero. // Then we increase this value each time we meet byval parameter. // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); // Calculate the amount of stack space that we need to allocate to store // byval and variadic arguments that are passed in registers. // We need to know this before we allocate the first byval or variadic // argument, as they will be allocated a stack slot below the CFA (Canonical // Frame Address, the stack pointer at entry to the function). unsigned ArgRegBegin = ARM::R4; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) break; CCValAssign &VA = ArgLocs[i]; unsigned Index = VA.getValNo(); ISD::ArgFlagsTy Flags = Ins[Index].Flags; if (!Flags.isByVal()) continue; assert(VA.isMemLoc() && "unexpected byval pointer in reg"); unsigned RBegin, REnd; CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); ArgRegBegin = std::min(ArgRegBegin, RBegin); CCInfo.nextInRegsParam(); } CCInfo.rewindByValRegsInfo(); int lastInsIndex = -1; if (isVarArg && MFI.hasVAStart()) { unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); if (RegIdx != array_lengthof(GPRArgRegs)) ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); } unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); auto PtrVT = getPointerTy(DAG.getDataLayout()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (Ins[VA.getValNo()].isOrigArg()) { std::advance(CurOrigArg, Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); } // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI)); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0, dl)); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue2, DAG.getIntPtrConstant(1, dl)); } else ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { const TargetRegisterClass *RC; if (RegVT == MVT::f32) RC = &ARM::SPRRegClass; else if (RegVT == MVT::f64) RC = &ARM::DPRRegClass; else if (RegVT == MVT::v2f64) RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } // If this is an 8 or 16-bit value, it is really passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); break; case CCValAssign::SExt: ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; case CCValAssign::ZExt: ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; } InVals.push_back(ArgValue); } else { // VA.isRegLoc() // sanity check assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); int index = VA.getValNo(); // Some Ins[] entries become multiple ArgLoc[] entries. // Process them only once. if (index != lastInsIndex) { ISD::ArgFlagsTy Flags = Ins[index].Flags; // FIXME: For now, all byval parameter objects are marked mutable. // This can be changed with more analysis. // In case of tail call optimization mark all arguments mutable. // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { assert(Ins[index].isOrigArg() && "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, VA.getLocMemOffset(), Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI))); } lastInsIndex = index; } } } // varargs if (isVarArg && MFI.hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); return Chain; } /// isFloatingPointZero - Return true if this is +0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isPosZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast(WrapperOp)) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } } else if (Op->getOpcode() == ISD::BITCAST && Op->getValueType(0) == MVT::f64) { // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) // created by LowerConstantFP(). SDValue BitcastOp = Op->getOperand(0); if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && isNullConstant(BitcastOp->getOperand(0))) return true; } return false; } /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C - 1, dl, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: if (C != 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C - 1, dl, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C + 1, dl, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C + 1, dl, MVT::i32); } break; } } } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMISD::NodeType CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; break; case ARMCC::EQ: case ARMCC::NE: // Uses only Z Flag CompareType = ARMISD::CMPZ; break; } ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); else Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } /// duplicateCmp - Glue values can have only one use, so this function /// duplicates a comparison node. SDValue ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { unsigned Opc = Cmp.getOpcode(); SDLoc DL(Cmp); if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } std::pair ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const { assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); SDValue Value, OverflowCmp; SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDLoc dl(Op); // FIXME: We are currently always generating CMPs because we don't support // generating CMN through the backend. This is not as good as the natural // CMP case because it causes a register dependency and cannot be folded // later. switch (Op.getOpcode()) { default: llvm_unreachable("Unknown overflow instruction!"); case ISD::SADDO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::SSUBO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; case ISD::USUBO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; } // switch (...) return std::make_pair(Value, OverflowCmp); } SDValue ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDLoc dl(Op); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); EVT VT = Op.getValueType(); SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, CCR, OverflowCmp); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); unsigned Opc = Cond.getOpcode(); if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO)) { if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) return SDValue(); SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, OverflowCmp, DAG); } // Convert: // // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) // if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { const ConstantSDNode *CMOVTrue = dyn_cast(Cond.getOperand(0)); const ConstantSDNode *CMOVFalse = dyn_cast(Cond.getOperand(1)); if (CMOVTrue && CMOVFalse) { unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); SDValue True; SDValue False; if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { True = SelectTrue; False = SelectFalse; } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { True = SelectFalse; False = SelectTrue; } if (True.getNode() && False.getNode()) { EVT VT = Op.getValueType(); SDValue ARMcc = Cond.getOperand(2); SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the // undefined bits before doing a full-word comparison with zero. Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, DAG.getConstant(1, dl, Cond.getValueType())); return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, dl, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); } static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps) { // Start by selecting the GE condition code for opcodes that return true for // 'equality' if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || CC == ISD::SETULE) CondCode = ARMCC::GE; // and GT for opcodes that return false for 'equality'. else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || CC == ISD::SETULT) CondCode = ARMCC::GT; // Since we are constrained to GE/GT, if the opcode contains 'less', we need // to swap the compare operands. if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || CC == ISD::SETULT) swpCmpOps = true; // Both GT and GE are ordered comparisons, and return false for 'unordered'. // If we have an unordered opcode, we need to swap the operands to the VSEL // instruction (effectively negating the condition). // // This also has the effect of swapping which one of 'less' or 'greater' // returns true, so we also swap the compare operands. It also switches // whether we return true for 'equality', so we compensate by picking the // opposite condition code to our original choice. if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || CC == ISD::SETUGT) { swpCmpOps = !swpCmpOps; swpVselOps = !swpVselOps; CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; } // 'ordered' is 'anything but unordered', so use the VS condition code and // swap the VSEL operands. if (CC == ISD::SETO) { CondCode = ARMCC::VS; swpVselOps = true; } // 'unordered or not equal' is 'anything but equal', so use the EQ condition // code and swap the VSEL operands. if (CC == ISD::SETUNE) { CondCode = ARMCC::EQ; swpVselOps = true; } } SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const { if (Subtarget->isFPOnlySP() && VT == MVT::f64) { FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), FalseVal); TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), TrueVal); SDValue TrueLow = TrueVal.getValue(0); SDValue TrueHigh = TrueVal.getValue(1); SDValue FalseLow = FalseVal.getValue(0); SDValue FalseHigh = FalseVal.getValue(1); SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, ARMcc, CCR, Cmp); SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, ARMcc, CCR, duplicateCmp(Cmp, DAG)); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); } else { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); } } static bool isGTorGE(ISD::CondCode CC) { return CC == ISD::SETGT || CC == ISD::SETGE; } static bool isLTorLE(ISD::CondCode CC) { return CC == ISD::SETLT || CC == ISD::SETLE; } // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. // All of these conditions (and their <= and >= counterparts) will do: // x < k ? k : x // x > k ? x : k // k < x ? x : k // k > x ? k : x static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K) { return (isGTorGE(CC) && ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || (isLTorLE(CC) && ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); } // Similar to isLowerSaturate(), but checks for upper-saturating conditions. static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K) { return (isGTorGE(CC) && ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || (isLTorLE(CC) && ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); } // Check if two chained conditionals could be converted into SSAT. // // SSAT can replace a set of two conditional selectors that bound a number to an // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: // // x < -k ? -k : (x > k ? k : x) // x < -k ? -k : (x < k ? x : k) // x > -k ? (x > k ? k : x) : -k // x < k ? (x < -k ? -k : x) : k // etc. // // It returns true if the conversion can be done, false otherwise. // Additionally, the variable is returned in parameter V and the constant in K. static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K) { SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); SDValue FalseVal1 = Op.getOperand(3); ISD::CondCode CC1 = cast(Op.getOperand(4))->get(); const SDValue Op2 = isa(TrueVal1) ? FalseVal1 : TrueVal1; if (Op2.getOpcode() != ISD::SELECT_CC) return false; SDValue LHS2 = Op2.getOperand(0); SDValue RHS2 = Op2.getOperand(1); SDValue TrueVal2 = Op2.getOperand(2); SDValue FalseVal2 = Op2.getOperand(3); ISD::CondCode CC2 = cast(Op2.getOperand(4))->get(); // Find out which are the constants and which are the variables // in each conditional SDValue *K1 = isa(LHS1) ? &LHS1 : isa(RHS1) ? &RHS1 : NULL; SDValue *K2 = isa(LHS2) ? &LHS2 : isa(RHS2) ? &RHS2 : NULL; SDValue K2Tmp = isa(TrueVal2) ? TrueVal2 : FalseVal2; SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; // We must detect cases where the original operations worked with 16- or // 8-bit values. In such case, V2Tmp != V2 because the comparison operations // must work with sign-extended values but the select operations return // the original non-extended value. SDValue V2TmpReg = V2Tmp; if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) V2TmpReg = V2Tmp->getOperand(0); // Check that the registers and the constants have the correct values // in both conditionals if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || V2TmpReg != V2) return false; // Figure out which conditional is saturating the lower/upper bound. const SDValue *LowerCheckOp = isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 : NULL; const SDValue *UpperCheckOp = isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 : NULL; if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) return false; // Check that the constant in the lower-bound check is // the opposite of the constant in the upper-bound check // in 1's complement. int64_t Val1 = cast(*K1)->getSExtValue(); int64_t Val2 = cast(*K2)->getSExtValue(); int64_t PosVal = std::max(Val1, Val2); if (((Val1 > Val2 && UpperCheckOp == &Op) || (Val1 < Val2 && UpperCheckOp == &Op2)) && Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { V = V2; K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive return true; } return false; } SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // Try to convert two saturating conditional selects into a single SSAT SDValue SatValue; uint64_t SatConstant; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && isSaturatingConditional(Op, SatValue, SatConstant)) return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition // codes: it only has two bits to select the condition code, so it's // constrained to use only GE, GT, VS and EQ. // // To implement all the various ISD::SETXXX opcodes, we sometimes need to // swap the operands of the previous compare instruction (effectively // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || CondCode == ARMCC::VC || CondCode == ARMCC::NE) { CC = ISD::getSetCCInverse(CC, true); std::swap(TrueVal, FalseVal); } } SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { if (swpCmpOps) std::swap(LHS, RHS); if (swpVselOps) std::swap(TrueVal, FalseVal); } } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } /// canChangeToInt - Given the fp compare operand, return true if it is suitable /// to morph to an integer compare sequence. static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget) { SDNode *N = Op.getNode(); if (!N->hasOneUse()) // Otherwise it requires moving the value from fp to integer registers. return false; if (!N->getNumValues()) return false; EVT VT = Op.getValueType(); if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) // f32 case is generally profitable. f64 case only makes sense when vcmpe + // vmrs are very slow, e.g. cortex-a8. return false; if (isFloatingPointZero(Op)) { SeenZero = true; return true; } return ISD::isNormalLoad(N); } static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (isFloatingPointZero(Op)) return DAG.getConstant(0, SDLoc(Op), MVT::i32); if (LoadSDNode *Ld = dyn_cast(Op)) return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); } static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2) { SDLoc dl(Op); if (isFloatingPointZero(Op)) { RetVal1 = DAG.getConstant(0, dl, MVT::i32); RetVal2 = DAG.getConstant(0, dl, MVT::i32); return; } if (LoadSDNode *Ld = dyn_cast(Op)) { SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), NewAlign, Ld->getMemOperand()->getFlags()); return; } llvm_unreachable("Unknown VFP cmp argument!"); } /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some /// f32 and even f64 comparisons to integer ones. SDValue ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); bool LHSSeenZero = false; bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); bool RHSSeenZero = false; bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. if (CC == ISD::SETOEQ) CC = ISD::SETEQ; else if (CC == ISD::SETUNE) CC = ISD::SETNE; SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { LHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(LHS, DAG), Mask); RHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, Cmp); } SDValue LHS1, LHS2; SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); } SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) return Result; } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); SDLoc dl(Op); EVT PTy = getPointerTy(DAG.getDataLayout()); JumpTableSDNode *JT = cast(Table); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table // which does another jump to the destination. This also makes it easier // to translate it to TBB / TBH later (Thumb2 only). // FIXME: This might not work if the function is extremely large. return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } if (isPositionIndependent() || Subtarget->isROPI()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } } static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); if (Op.getValueType().getVectorElementType() == MVT::i32) { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) return Op; return DAG.UnrollVectorOp(Op.getNode()); } assert(Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"); if (VT != MVT::v4i16) return DAG.UnrollVectorOp(Op.getNode()); Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } return Op; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { if (VT.getVectorElementType() == MVT::f32) return Op; return DAG.UnrollVectorOp(Op.getNode()); } assert(Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"); if (VT != MVT::v4f32) return DAG.UnrollVectorOp(Op.getNode()); unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; break; case ISD::UINT_TO_FP: CastOpc = ISD::ZERO_EXTEND; Opc = ISD::UINT_TO_FP; break; } Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); return DAG.getNode(Opc, dl, VT, Op); } SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } return Op; } SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; bool UseNEON = !InGPR && Subtarget->hasNEON(); if (UseNEON) { // Use VBSL to copy the sign bit. unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; if (VT == MVT::f64) Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), DAG.getConstant(32, dl, MVT::i32)); else /*if (VT == MVT::f32)*/ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); if (SrcVT == MVT::f32) { Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); if (VT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), DAG.getConstant(32, dl, MVT::i32)); } else if (VT == MVT::f32) Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), DAG.getConstant(32, dl, MVT::i32)); Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), dl, MVT::i32); AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); if (VT == MVT::f32) { Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, DAG.getConstant(0, dl, MVT::i32)); } else { Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); } return Res; } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32); SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32); Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); if (VT == MVT::f32) { Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { const ARMBaseRegisterInfo &ARI = *static_cast(RegInfo); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { unsigned Reg = StringSwitch(RegName) .Case("sp", ARM::SP) .Default(0); if (Reg) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); } // Result is 64 bit value so split into two 32 bit values and return as a // pair of values. static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { SDLoc DL(N); // This function is only supposed to be called for i64 type destination. assert(N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."); SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL, DAG.getVTList(MVT::i32, MVT::i32, MVT::Other), N->getOperand(0), N->getOperand(1)); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), Read.getValue(1))); Results.push_back(Read.getOperand(0)); } /// \p BC is a bitcast that is about to be turned into a VMOVDRR. /// When \p DstVT, the destination type of \p BC, is on the vector /// register bank and the source of bitcast, \p Op, operates on the same bank, /// it might be possible to combine them, such that everything stays on the /// vector register bank. /// \p return The node that would replace \p BT, if the combine /// is possible. static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG) { SDValue Op = BC->getOperand(0); EVT DstVT = BC->getValueType(0); // The only vector instruction that can produce a scalar (remember, // since the bitcast was about to be turned into VMOVDRR, the source // type is i64) from a vector is EXTRACT_VECTOR_ELT. // Moreover, we can do this combine only if there is one use. // Finally, if the destination type is not a vector, there is not // much point on forcing everything on the vector bank. if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !Op.hasOneUse()) return SDValue(); // If the index is not constant, we will introduce an additional // multiply that will stick. // Give up in that case. ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); if (!Index) return SDValue(); unsigned DstNumElt = DstVT.getVectorNumElements(); // Compute the new index. const APInt &APIntIndex = Index->getAPIntValue(); APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); NewIndex *= APIntIndex; // Check if the new constant index fits into i32. if (NewIndex.getBitWidth() > 32) return SDValue(); // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) SDLoc dl(Op); SDValue ExtractSrc = Op.getOperand(0); EVT VecVT = EVT::getVectorVT( *DAG.getContext(), DstVT.getScalarType(), ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); } /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 /// operand type is illegal (e.g., v2f32 for a target that doesn't support /// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); SDValue Op = N->getOperand(0); // This function is only supposed to be called for i64 types, either as the // source or destination of the bit convert. EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"); // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { // Do not force values to GPRs (this is what VMOVDRR does for the inputs) // if we can combine the bitcast with its source. if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) return Val; SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, DstVT, DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { SDValue Cvt; if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() && SrcVT.getVectorNumElements() > 1) Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); else Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } return SDValue(); } /// getZeroVector - Returns a vector of specified type with all zero elements. /// Zero vectors are used to represent vector negation and in those cases /// will be implemented with the NEON VNEG instruction. However, VNEG does /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32); EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CCR, CmpLo); SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, dl, VT)) : DAG.getConstant(0, dl, VT); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CCR, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CCR, CmpHi); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { // The rounding mode is in bits 23:22 of the FPSCR. // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, DAG.getConstant(22, dl, MVT::i32)); return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, DAG.getConstant(3, dl, MVT::i32)); } static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); EVT VT = N->getValueType(0); if (VT.isVector()) { assert(ST->hasNEON()); // Compute the least significant set bit: LSB = X & -X SDValue X = N->getOperand(0); SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X); SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX); EVT ElemTy = VT.getVectorElementType(); if (ElemTy == MVT::i8) { // Compute with: cttz(x) = ctpop(lsb - 1) SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(1, dl, ElemTy)); SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); return DAG.getNode(ISD::CTPOP, dl, VT, Bits); } if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) && (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) { // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0 unsigned NumBits = ElemTy.getSizeInBits(); SDValue WidthMinus1 = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(NumBits - 1, dl, ElemTy)); SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB); return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ); } // Compute with: cttz(x) = ctpop(lsb - 1) // Since we can only compute the number of bits in a byte with vcnt.8, we // have to gather the result with pairwise addition (vpaddl) for i16, i32, // and i64. // Compute LSB - 1. SDValue Bits; if (ElemTy == MVT::i64) { // Load constant 0xffff'ffff'ffff'ffff to register. SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(0x1eff, dl, MVT::i32)); Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF); } else { SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(1, dl, ElemTy)); Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); } // Count #bits with vcnt.8. EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits); SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8); // Gather the #bits with vpaddl (pairwise add.) EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit, DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), Cnt8); if (ElemTy == MVT::i16) return Cnt16; EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32; SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit, DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), Cnt16); if (ElemTy == MVT::i32) return Cnt32; assert(ElemTy == MVT::i64); SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), Cnt32); return Cnt64; } if (!ST->hasV6T2Ops()) return SDValue(); SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0)); return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count /// for each 16-bit element from operand, repeated. The basic idea is to /// leverage vcnt to get the 8-bit counts, gather and add the results. /// /// Trace for v4i16: /// input = [v0 v1 v2 v3 ] (vi 16-bit element) /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] /// [b0 b1 b2 b3 b4 b5 b6 b7] /// +[b1 b0 b3 b2 b5 b4 b7 b6] /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); } /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the /// bit-count for each 16-bit element from the operand. We need slightly /// different sequencing for v4i16 and v8i16 to stay within NEON's available /// 64/128-bit registers. /// /// Trace for v4i16: /// input = [v0 v1 v2 v3 ] (vi 16-bit element) /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] /// v4i16:Extracted = [k0 k1 k2 k3 ] static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); SDValue BitCounts = getCTPOP16BitCounts(N, DAG); if (VT.is64BitVector()) { SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, DAG.getIntPtrConstant(0, DL)); } else { SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, BitCounts, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); } } /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the /// bit-count for each 32-bit element from the operand. The idea here is /// to split the vector into 16-bit elements, leverage the 16-bit count /// routine, and then combine the results. /// /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): /// input = [v0 v1 ] (vi: 32-bit elements) /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) /// vrev: N0 = [k1 k0 k3 k2 ] /// [k0 k1 k2 k3 ] /// N1 =+[k1 k0 k3 k2 ] /// [k0 k2 k1 k3 ] /// N2 =+[k1 k3 k0 k2 ] /// [k0 k2 k1 k3 ] /// Extended =+[k1 k3 k0 k2 ] /// [k0 k2 ] /// Extracted=+[k1 k3 ] /// static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); if (VT.is64BitVector()) { SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, DAG.getIntPtrConstant(0, DL)); } else { SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); } } static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"); if (VT.getVectorElementType() == MVT::i32) return lowerCTPOP32BitElements(N, DAG); else return lowerCTPOP16BitElements(N, DAG); } static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc dl(N); if (!VT.isVector()) return SDValue(); // Lower vector shifts on NEON to use VSHL. assert(ST->hasNEON() && "unexpected vector shift"); // Left shifts translate directly to the vshiftu intrinsic. if (N->getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl, MVT::i32), N->getOperand(0), N->getOperand(1)); assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); // NEON uses the same intrinsics for both left and right shifts. For // right shifts, the shift amounts are negative, so negate the vector of // shift amounts. EVT ShiftVT = N->getOperand(1).getValueType(); SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1)); Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? Intrinsic::arm_neon_vshifts : Intrinsic::arm_neon_vshiftu); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(vshiftInt, dl, MVT::i32), N->getOperand(0), NegatedCount); } static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc dl(N); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) return SDValue(); assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. if (!isOneConstant(N->getOperand(1))) return SDValue(); // If we are in thumb mode, we don't have RRX. if (ST->isThumb1Only()) return SDValue(); // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(1, dl, MVT::i32)); // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue TmpOp0, TmpOp1; bool Invert = false; bool Swap = false; unsigned Opc = 0; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); if (Op0.getValueType().getVectorElementType() == MVT::i64 && (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { // Special-case integer 64-bit equality comparisons. They aren't legal, // but they can be lowered with a few vector instructions. unsigned CmpElements = CmpVT.getVectorNumElements() * 2; EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements); SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0); SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1); SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1, DAG.getCondCode(ISD::SETEQ)); SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp); SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed); Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged); if (SetCCOpcode == ISD::SETNE) Merged = DAG.getNOT(dl, Merged, CmpVT); Merged = DAG.getSExtOrTrunc(Merged, dl, VT); return Merged; } if (CmpVT.getVectorElementType() == MVT::i64) // 64-bit comparisons are not legal in general. return SDValue(); if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETOLT: case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETOLE: case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; case ISD::SETONE: // Expand this to (OLT | OGT). TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); break; case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; case ISD::SETO: // Expand this to (OLT | OGE). TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); break; } } else { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETLE: Swap = true; case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETULT: Swap = true; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; case ISD::SETULE: Swap = true; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). if (Opc == ARMISD::VCEQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) AndOp = Op1; // Ignore bitconvert. if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); Invert = !Invert; } } } if (Swap) std::swap(Op0, Op1); // If one of the operands is a constant vector zero, attempt to fold the // comparison to a specialized compare-against-zero form. SDValue SingleOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) SingleOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { if (Opc == ARMISD::VCGE) Opc = ARMISD::VCLEZ; else if (Opc == ARMISD::VCGT) Opc = ARMISD::VCLTZ; SingleOp = Op1; } SDValue Result; if (SingleOp.getNode()) { switch (Opc) { case ARMISD::VCEQ: Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGE: Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLEZ: Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGT: Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLTZ: Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; default: Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } } else { Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } Result = DAG.getSExtOrTrunc(Result, dl, VT); if (Invert) Result = DAG.getNOT(dl, Result, VT); return Result; } static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); assert(Carry.getOpcode() != ISD::CARRY_FALSE); SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry); SDValue FVal = DAG.getConstant(0, DL, MVT::i32); SDValue TVal = DAG.getConstant(1, DL, MVT::i32); SDValue ARMcc = DAG.getConstant( IntCCToARMCC(cast(Cond)->get()), DL, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, Cmp.getValue(1), SDValue()); return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, CCR, Chain.getValue(1)); } /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified // immediate instructions others than VMOV do not support the 8-bit encoding // of a zero vector, and the default encoding of zero is supposed to be the // 32-bit version. if (SplatBits == 0) SplatBitSize = 32; switch (SplatBitSize) { case 8: if (type != VMOVModImm) return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); OpCmode = 0xe; Imm = SplatBits; VT = is128Bits ? MVT::v16i8 : MVT::v8i8; break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. VT = is128Bits ? MVT::v8i16 : MVT::v4i16; if ((SplatBits & ~0xff) == 0) { // Value = 0x00nn: Op=x, Cmode=100x. OpCmode = 0x8; Imm = SplatBits; break; } if ((SplatBits & ~0xff00) == 0) { // Value = 0xnn00: Op=x, Cmode=101x. OpCmode = 0xa; Imm = SplatBits >> 8; break; } return SDValue(); case 32: // NEON's 32-bit VMOV supports splat values where: // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. VT = is128Bits ? MVT::v4i32 : MVT::v2i32; if ((SplatBits & ~0xff) == 0) { // Value = 0x000000nn: Op=x, Cmode=000x. OpCmode = 0; Imm = SplatBits; break; } if ((SplatBits & ~0xff00) == 0) { // Value = 0x0000nn00: Op=x, Cmode=001x. OpCmode = 0x2; Imm = SplatBits >> 8; break; } if ((SplatBits & ~0xff0000) == 0) { // Value = 0x00nn0000: Op=x, Cmode=010x. OpCmode = 0x4; Imm = SplatBits >> 16; break; } if ((SplatBits & ~0xff000000) == 0) { // Value = 0xnn000000: Op=x, Cmode=011x. OpCmode = 0x6; Imm = SplatBits >> 24; break; } // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC if (type == OtherModImm) return SDValue(); if ((SplatBits & ~0xffff) == 0 && ((SplatBits | SplatUndef) & 0xff) == 0xff) { // Value = 0x0000nnff: Op=x, Cmode=1100. OpCmode = 0xc; Imm = SplatBits >> 8; break; } if ((SplatBits & ~0xffffff) == 0 && ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { // Value = 0x00nnffff: Op=x, Cmode=1101. OpCmode = 0xd; Imm = SplatBits >> 16; break; } // Note: there are a few 32-bit splat values (specifically: 00ffff00, // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not // VMOV.I32. A (very) minor optimization would be to replicate the value // and fall through here to test for a valid 64-bit splat. But, then the // caller would also need to check and handle the change in size. return SDValue(); case 64: { if (type != VMOVModImm) return SDValue(); // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; uint64_t Val = 0; unsigned ImmMask = 1; Imm = 0; for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { if (((SplatBits | SplatUndef) & BitMask) == BitMask) { Val |= BitMask; Imm |= ImmMask; } else if ((SplatBits & BitMask) != 0) { return SDValue(); } BitMask <<= 8; ImmMask <<= 1; } if (DAG.getDataLayout().isBigEndian()) // swap higher and lower 32 bit word Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } default: llvm_unreachable("unexpected size for isNEONModifiedImm"); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, dl, MVT::i32); } SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); const APFloat &FPVal = CFP->getValueAPF(); // Prevent floating-point constants from using literal loads // when execute-only is enabled. if (ST->genExecuteOnly()) { APInt INTVal = FPVal.bitcastToAPInt(); SDLoc DL(CFP); if (IsDouble) { SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32); SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32); if (!ST->isLittle()) std::swap(Lo, Hi); return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi); } else { return DAG.getConstant(INTVal, DL, MVT::i32); } } if (!ST->hasVFP3()) return SDValue(); // Use the default (constant pool) lowering for double constants when we have // an SP-only FPU if (IsDouble && Subtarget->isFPOnlySP()) return SDValue(); // Try splatting with a VMOV.f32... int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { // We have code in place to select a valid ConstantFP already, no need to // do any mangling. return Op; } // It's a float and we are trying to use NEON operations where // possible. Lower it to a splat followed by an extract. SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, DAG.getConstant(0, DL, MVT::i32)); } // The rest of our options are NEON only, make sure that's allowed before // proceeding.. if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) return SDValue(); EVT VMovVT; uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); // It wouldn't really be worth bothering for doubles except for one very // important value, which does happen to match: 0.0. So make sure we don't do // anything stupid. if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) return SDValue(); // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); if (IsDouble) return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, DAG.getConstant(0, DL, MVT::i32)); } // Finally, try a VMVN.i32 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); if (IsDouble) return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, DAG.getConstant(0, DL, MVT::i32)); } return SDValue(); } // check if an VEXT instruction can handle the shuffle mask when the // vector sources of the shuffle are the same. static bool isSingletonVEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, just follow it // back to index zero and keep going. ++ExpectedElt; if (ExpectedElt == NumElts) ExpectedElt = 0; if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } return true; } static bool isVEXTMask(ArrayRef M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, it may still be // a VEXT but the source vectors must be swapped. ExpectedElt += 1; if (ExpectedElt == NumElts * 2) { ExpectedElt = 0; ReverseVEXT = true; } if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } // Adjust the index value if the source operands will be swapped. if (ReverseVEXT) Imm -= NumElts; return true; } /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) static bool isVREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; // If the first shuffle index is UNDEF, be optimistic. if (M[0] < 0) BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } return true; } static bool isVTBLMask(ArrayRef M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } // Checks whether the shuffle mask represents a vector transpose (VTRN) by // checking that pairs of elements in the shuffle mask represent the same index // in each vector, incrementing the expected index by 2 at each step. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g} // v2={e,f,g,h} // WhichResult gives the offset for each element in the mask based on which // of the two results it belongs to. // // The transpose can be represented either as: // result1 = shufflevector v1, v2, result1_shuffle_mask // result2 = shufflevector v1, v2, result2_shuffle_mask // where v1/v2 and the shuffle masks have the same number of elements // (here WhichResult (see below) indicates which result is being checked) // // or as: // results = shufflevector v1, v2, shuffle_mask // where both results are returned in one vector and the shuffle mask has twice // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we // want to check the low half and high half of the shuffle mask as if it were // the other case static bool isVTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; // If the mask is twice as long as the input vector then we need to check the // upper and lower parts of the mask with a matching value for WhichResult // FIXME: A mask with only even values will be rejected in case the first // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { if (M.size() == NumElts * 2) WhichResult = i / NumElts; else WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) return false; } } if (M.size() == NumElts*2) WhichResult = 0; return true; } /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. static bool isVTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { if (M.size() == NumElts * 2) WhichResult = i / NumElts; else WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) return false; } } if (M.size() == NumElts*2) WhichResult = 0; return true; } // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking // that the mask elements are either all even and in steps of size 2 or all odd // and in steps of size 2. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g} // v2={e,f,g,h} // Requires similar checks to that of isVTRNMask with // respect the how results are returned. static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; } } if (M.size() == NumElts*2) WhichResult = 0; // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { int MIdx = M[i + j + k]; if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } } } if (M.size() == NumElts*2) WhichResult = 0; // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } // Checks whether the shuffle mask represents a vector zip (VZIP) by checking // that pairs of elements of the shufflemask represent the same index in each // vector incrementing sequentially through the vectors. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f} // v2={e,f,g,h} // Requires similar checks to that of isVTRNMask with respect the how results // are returned. static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts)) return false; Idx += 1; } } if (M.size() == NumElts*2) WhichResult = 0; // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx)) return false; Idx += 1; } } if (M.size() == NumElts*2) WhichResult = 0; // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't. static unsigned isNEONTwoResultShuffleMask(ArrayRef ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF) { isV_UNDEF = false; if (isVTRNMask(ShuffleMask, VT, WhichResult)) return ARMISD::VTRN; if (isVUZPMask(ShuffleMask, VT, WhichResult)) return ARMISD::VUZP; if (isVZIPMask(ShuffleMask, VT, WhichResult)) return ARMISD::VZIP; isV_UNDEF = true; if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VTRN; if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VUZP; if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VZIP; return 0; } /// \return true if this is a reverse operation on an vector. static bool isReverseMask(ArrayRef M, EVT VT) { unsigned NumElts = VT.getVectorNumElements(); // Make sure the mask has the right size. if (NumElts != M.size()) return false; // Look for <15, ..., 3, -1, 1, 0>. for (unsigned i = 0; i != NumElts; ++i) if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) return false; return true; } // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl) { uint64_t Val; if (!isa(N)) return SDValue(); Val = cast(N)->getZExtValue(); if (ST->isThumb1Only()) { if (Val <= 255 || ~Val <= 255) return DAG.getConstant(Val, dl, MVT::i32); } else { if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) return DAG.getConstant(Val, dl, MVT::i32); } return SDValue(); } // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast(Op.getNode()); SDLoc dl(Op); EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatUndef.isAllOnesValue()) return DAG.getUNDEF(VT); if (SplatBitSize <= 64) { // Check if an immediate VMOV works. EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { int ImmVal = ARM_AM::getFP32Imm(SplatBits); if (ImmVal != -1) { SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32); return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); } } } } // Scan through the operands to see if only one value is used. // // As an optimisation, even if more than one value is used it may be more // profitable to splat with one value then change some lanes. // // Heuristically we decide to do this if the vector has a "dominant" value, // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; bool hasDominantValue = false; bool isConstant = true; // Map of the number of times a particular SDValue appears in the // element list. DenseMap ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; if (!isa(V) && !isa(V)) isConstant = false; ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; // Is this value dominant? (takes up more than half of the lanes) if (++Count > (NumElts / 2)) { hasDominantValue = true; Value = V; } } if (ValueCounts.size() != 1) usesOnlyOneValue = false; if (!Value.getNode() && ValueCounts.size() > 0) Value = ValueCounts.begin()->first; if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. // Keep going if we are hitting this case. if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); unsigned EltSize = VT.getScalarSizeInBits(); // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. if (hasDominantValue && EltSize <= 32) { if (!isConstant) { SDValue N; // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could // just use VDUPLANE. We can only do this if the lane being extracted // is at a constant index, as the VDUP from lane instructions only have // constant-index forms. ConstantSDNode *constIndex; if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && (constIndex = dyn_cast(Value->getOperand(1)))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element // such that the register coalescer will remove unnecessary copies. if (VT != Value->getOperand(0).getValueType()) { unsigned index = constIndex->getAPIntValue().getLimitedValue() % VT.getVectorNumElements(); N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), Value, DAG.getConstant(index, dl, MVT::i32)), DAG.getConstant(index, dl, MVT::i32)); } else N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); if (!usesOnlyOneValue) { // The dominant value was splatted as 'N', but we now have to insert // all differing elements. for (unsigned I = 0; I < NumElts; ++I) { if (Op.getOperand(I) == Value) continue; SmallVector Ops; Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, dl, MVT::i32)); N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } if (usesOnlyOneValue) { SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); if (isConstant && Val.getNode()) return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } } // If all elements are constants and the case above didn't get hit, fall back // to the default expansion, which will generate a load from the constant // pool. if (isConstant) return SDValue(); // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { SDValue shuffle = ReconstructShuffle(Op, DAG); if (shuffle != SDValue()) return shuffle; } if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) { // If we haven't found an efficient lowering, try splitting a 128-bit vector // into two 64-bit vectors; we might discover a better way to lower it. SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElts); EVT ExtVT = VT.getVectorElementType(); EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2); SDValue Lower = DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2)); if (Lower.getOpcode() == ISD::BUILD_VECTOR) Lower = LowerBUILD_VECTOR(Lower, DAG, ST); SDValue Upper = DAG.getBuildVector( HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2)); if (Upper.getOpcode() == ISD::BUILD_VECTOR) Upper = LowerBUILD_VECTOR(Upper, DAG, ST); if (Lower && Upper) return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper); } // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands // will be legalized. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // know the default expansion would otherwise fall back on something even // worse. For a vector with one or two non-undef values, that's // scalar_to_vector for the elements followed by a shuffle (provided the // shuffle is valid for the target) and materialization element by element // on the stack followed by a load for everything else. if (!isConstant && !usesOnlyOneValue) { SDValue Vec = DAG.getUNDEF(VT); for (unsigned i = 0 ; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); } return Vec; } return SDValue(); } // Gather data to see if the operation can be modelled as a // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); struct ShuffleSourceInfo { SDValue Vec; unsigned MinElt; unsigned MaxElt; // We may insert some combination of BITCASTs and VEXT nodes to force Vec to // be compatible with the shuffle we intend to construct. As a result // ShuffleVec will be some sliding window into the original Vec. SDValue ShuffleVec; // Code should guarantee that element i in Vec starts at element "WindowBase // + i * WindowScale in ShuffleVec". int WindowBase; int WindowScale; bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } ShuffleSourceInfo(SDValue Vec) : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} }; // First gather all vectors used as an immediate source for this BUILD_VECTOR // node. SmallVector Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); } else if (!isa(V.getOperand(1))) { // Furthermore, shuffles require a constant mask, whereas extractelts // accept variable indices. return SDValue(); } // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); auto Source = find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } // Currently only do something sane when at most two source vectors // are involved. if (Sources.size() > 2) return SDValue(); // Find out the smallest element size among result and two sources, and use // it as element size to build the shuffle_vector. EVT SmallestEltTy = VT.getVectorElementType(); for (auto &Source : Sources) { EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); if (SrcEltTy.bitsLT(SmallestEltTy)) SmallestEltTy = SrcEltTy; } unsigned ResMultiplier = VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits(); NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); // If the source vector is too wide or too narrow, we may nevertheless be able // to construct a compatible shuffle either by concatenating it with UNDEF or // extracting a suitable range of elements. for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); if (SrcVT.getSizeInBits() == VT.getSizeInBits()) continue; // This stage of the search produces a source with the same element type as // the original, but with a total width matching the BUILD_VECTOR output. EVT EltVT = SrcVT.getVectorElementType(); unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); if (SrcVT.getSizeInBits() < VT.getSizeInBits()) { if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits()) return SDValue(); // We can pad out the smaller vector for free, so if it's part of a // shuffle... Src.ShuffleVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, DAG.getUNDEF(Src.ShuffleVec.getValueType())); continue; } if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits()) return SDValue(); if (Src.MaxElt - Src.MinElt >= NumSrcElts) { // Span too large for a VEXT to cope return SDValue(); } if (Src.MinElt >= NumSrcElts) { // The extraction can just take the second half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); Src.WindowBase = -NumSrcElts; } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i32)); } else { // An actual VEXT is needed SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i32)); SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Src.MinElt, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } // Another possible incompatibility occurs from the vector element types. We // can fix this by bitcasting the source vectors to the same type we intend // for the shuffle. for (auto &Src : Sources) { EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); if (SrcEltTy == SmallestEltTy) continue; assert(ShuffleVT.getVectorElementType() == SmallestEltTy); Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits(); Src.WindowBase *= Src.WindowScale; } // Final sanity check before we try to actually produce a shuffle. DEBUG( for (auto Src : Sources) assert(Src.ShuffleVec.getValueType() == ShuffleVT); ); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); if (Entry.isUndef()) continue; auto Src = find(Sources, Entry.getOperand(0)); int EltNo = cast(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit // trunc. So only std::min(SrcBits, DestBits) actually get defined in this // segment. EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); int BitsDefined = std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits()); int LanesDefined = BitsDefined / BitsPerShuffleLane; // This source is expected to fill ResMultiplier lanes of the final shuffle, // starting at the appropriate offset. int *LaneMask = &Mask[i * ResMultiplier]; int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; ExtractBase += NumElts * (Src - Sources.begin()); for (int j = 0; j < LanesDefined; ++j) LaneMask[j] = ExtractBase + j; } // Final check before we try to produce nonsense... if (!isShuffleMaskLegal(Mask, ShuffleVT)) return SDValue(); // We can't handle more than two sources. This should have already // been checked before this point. assert(Sources.size() <= 2 && "Too many sources!"); SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (M[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = M[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return true; } bool ReverseVEXT, isV_UNDEF; unsigned Imm, WhichResult; unsigned EltSize = VT.getScalarSizeInBits(); return (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || isVEXTMask(M, VT, ReverseVEXT, Imm) || isVTBLMask(M, VT) || isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) || ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VREV, OP_VDUP0, OP_VDUP1, OP_VDUP2, OP_VDUP3, OP_VEXT1, OP_VEXT2, OP_VEXT3, OP_VUZPL, // VUZP, left result OP_VUZPR, // VUZP, right result OP_VZIPL, // VZIP, left result OP_VZIPR, // VZIP, right result OP_VTRNL, // VTRN, left result OP_VTRNR // VTRN, right result }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); EVT VT = OpLHS.getValueType(); switch (OpNum) { default: llvm_unreachable("Unknown shuffle opcode!"); case OP_VREV: // VREV divides the vector in half and swaps within the half. if (VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::f32) return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); // vrev <4 x i16> -> VREV32 if (VT.getVectorElementType() == MVT::i16) return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); // vrev <4 x i8> -> VREV16 assert(VT.getVectorElementType() == MVT::i8); return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); case OP_VDUP0: case OP_VDUP1: case OP_VDUP2: case OP_VDUP3: return DAG.getNode(ARMISD::VDUPLANE, dl, VT, OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32)); case OP_VEXT1: case OP_VEXT2: case OP_VEXT3: return DAG.getNode(ARMISD::VEXT, dl, VT, OpLHS, OpRHS, DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32)); case OP_VUZPL: case OP_VUZPR: return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); case OP_VZIPL: case OP_VZIPR: return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); case OP_VTRNL: case OP_VTRNR: return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); } } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc DL(Op); SmallVector VTBLMask; for (ArrayRef::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); SDValue OpLHS = Op.getOperand(0); EVT VT = OpLHS.getValueType(); assert((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"); OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, // extract the first 8 bytes into the top double word and the last 8 bytes // into the bottom double word. The v8i16 case is similar. unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, DAG.getConstant(ExtractNum, DL, MVT::i32)); } static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again // during code selection. This is more efficient and avoids the possibility // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. ArrayRef ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize <= 32) { if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR // (and probably will turn into a SCALAR_TO_VECTOR once legalization // reaches it). if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && !isa(V1.getOperand(0))) { bool IsScalarToVector = true; for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } if (IsScalarToVector) return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i32)); } bool ReverseVEXT; unsigned Imm; if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { if (ReverseVEXT) std::swap(V1, V2); return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, DAG.getConstant(Imm, dl, MVT::i32)); } if (isVREVMask(ShuffleMask, VT, 64)) return DAG.getNode(ARMISD::VREV64, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 32)) return DAG.getNode(ARMISD::VREV32, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) { return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, DAG.getConstant(Imm, dl, MVT::i32)); } // Check for Neon shuffles that modify both input vectors in place. // If both results are used, i.e., if there are two shuffles with the same // source operands and with masks corresponding to both results of one of // these operations, DAG memoization will ensure that a single node is // used for both shuffles. unsigned WhichResult; bool isV_UNDEF; if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( ShuffleMask, VT, WhichResult, isV_UNDEF)) { if (isV_UNDEF) V2 = V1; return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2) .getValue(WhichResult); } // Also check for these shuffles through CONCAT_VECTORS: we canonicalize // shuffles that produce a result larger than their operands with: // shuffle(concat(v1, undef), concat(v2, undef)) // -> // shuffle(concat(v1, v2), undef) // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine). // // This is useful in the general case, but there are special cases where // native shuffles produce larger results: the two-result ops. // // Look through the concat when lowering them: // shuffle(concat(v1, v2), undef) // -> // concat(VZIP(v1, v2):0, :1) // if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) { SDValue SubV1 = V1->getOperand(0); SDValue SubV2 = V1->getOperand(1); EVT SubVT = SubV1.getValueType(); // We expect these to have been canonicalized to -1. assert(all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"); if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( ShuffleMask, SubVT, WhichResult, isV_UNDEF)) { if (isV_UNDEF) SubV2 = SubV1; assert((WhichResult == 0) && "In-place shuffle of concat can only have one result!"); SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT), SubV1, SubV2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0), Res.getValue(1)); } } } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. unsigned NumElts = VT.getVectorNumElements(); if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = ShuffleMask[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) Ops.push_back(DAG.getUNDEF(EltVT)); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ShuffleMask[i] < (int)NumElts ? V1 : V2, DAG.getConstant(ShuffleMask[i] & (NumElts-1), dl, MVT::i32))); } SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); if (VT == MVT::v8i8) if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG)) return NewOp; return SDValue(); } static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // INSERT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(2); if (!isa(Lane)) return SDValue(); return Op; } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); if (!isa(Lane)) return SDValue(); SDValue Vec = Op.getOperand(0); if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) { SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } return Op; } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"); SDLoc dl(Op); SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (!Op0.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0, dl)); if (!Op1.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1, dl)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); } /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each /// element has been zero/sign-extended, depending on the isSigned parameter, /// from an integer type half its size. static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned) { // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. EVT VT = N->getValueType(0); if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { SDNode *BVN = N->getOperand(0).getNode(); if (BVN->getValueType(0) != MVT::v4i32 || BVN->getOpcode() != ISD::BUILD_VECTOR) return false; unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; unsigned HiElt = 1 - LoElt; ConstantSDNode *Lo0 = dyn_cast(BVN->getOperand(LoElt)); ConstantSDNode *Hi0 = dyn_cast(BVN->getOperand(HiElt)); ConstantSDNode *Lo1 = dyn_cast(BVN->getOperand(LoElt+2)); ConstantSDNode *Hi1 = dyn_cast(BVN->getOperand(HiElt+2)); if (!Lo0 || !Hi0 || !Lo1 || !Hi1) return false; if (isSigned) { if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) return true; } else { if (Hi0->isNullValue() && Hi1->isNullValue()) return true; } return false; } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ConstantSDNode *C = dyn_cast(Elt)) { unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfSize = EltSize / 2; if (isSigned) { if (!isIntN(HalfSize, C->getSExtValue())) return false; } else { if (!isUIntN(HalfSize, C->getZExtValue())) return false; } continue; } return false; } return true; } /// isSignExtended - Check if a node is a vector value that is sign-extended /// or a constant BUILD_VECTOR with sign-extended elements. static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) return true; if (isExtendedBUILD_VECTOR(N, DAG, true)) return true; return false; } /// isZeroExtended - Check if a node is a vector value that is zero-extended /// or a constant BUILD_VECTOR with zero-extended elements. static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) return true; if (isExtendedBUILD_VECTOR(N, DAG, false)) return true; return false; } static EVT getExtensionTo64Bits(const EVT &OrigVT) { if (OrigVT.getSizeInBits() >= 64) return OrigVT; assert(OrigVT.isSimple() && "Expecting a simple value type"); MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; switch (OrigSimpleTy) { default: llvm_unreachable("Unexpected Vector Type"); case MVT::v2i8: case MVT::v2i16: return MVT::v2i32; case MVT::v4i8: return MVT::v4i16; } } /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode) { // The vector originally had a size of OrigTy. It was then extended to ExtTy. // We expect the ExtTy to be 128-bits total. If the OrigTy is less than // 64-bits we need to insert a new extension so that it will be 64-bits. assert(ExtTy.is128BitVector() && "Unexpected extension size"); if (OrigTy.getSizeInBits() >= 64) return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. EVT NewVT = getExtensionTo64Bits(OrigTy); return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } /// SkipLoadExtensionForVMULL - return a load of the original vector size that /// does not do any sign/zero extension. If the original vector is less /// than 64 bits, an appropriate extension will be added after the load to /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getAlignment(), LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getMemoryVT(), LD->getAlignment(), LD->getMemOperand()->getFlags()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, /// extending load, or BUILD_VECTOR with extended elements, return the /// unextended value. The unextended vector should be 64 bits so that it can /// be used as an operand to a VMULL instruction. If the original vector size /// before extension is less than 64 bits we add a an extension to resize /// the vector to 64 bits. static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, N->getOperand(0)->getValueType(0), N->getValueType(0), N->getOpcode()); if (LoadSDNode *LD = dyn_cast(N)) return SkipLoadExtensionForVMULL(LD, DAG); // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { SDNode *BVN = N->getOperand(0).getNode(); assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; return DAG.getBuildVector( MVT::v2i32, SDLoc(N), {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)}); } // Construct a new BUILD_VECTOR with elements truncated to half the size. assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); unsigned EltSize = VT.getScalarSizeInBits() / 2; unsigned NumElts = VT.getVectorNumElements(); MVT TruncVT = MVT::getIntegerVT(EltSize); SmallVector Ops; SDLoc dl(N); for (unsigned i = 0; i != NumElts; ++i) { ConstantSDNode *C = cast(N->getOperand(i)); const APInt &CInt = C->getAPIntValue(); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) && isSignExtended(N1, DAG); } return false; } static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); } return false; } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. EVT VT = Op.getValueType(); assert(VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"); SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; bool isMLA = false; bool isN0SExt = isSignExtended(N0, DAG); bool isN1SExt = isSignExtended(N1, DAG); if (isN0SExt && isN1SExt) NewOpc = ARMISD::VMULLs; else { bool isN0ZExt = isZeroExtended(N0, DAG); bool isN1ZExt = isZeroExtended(N1, DAG); if (isN0ZExt && isN1ZExt) NewOpc = ARMISD::VMULLu; else if (isN1SExt || isN1ZExt) { // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these // into (s/zext A * s/zext C) + (s/zext B * s/zext C) if (isN1SExt && isAddSubSExt(N0, DAG)) { NewOpc = ARMISD::VMULLs; isMLA = true; } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { NewOpc = ARMISD::VMULLu; isMLA = true; } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { std::swap(N0, N1); NewOpc = ARMISD::VMULLu; isMLA = true; } } if (!NewOpc) { if (VT == MVT::v2i64) // Fall through to expand this. It is not legal. return SDValue(); else // Other vector multiplications are legal. return Op; } } // Legalize to a VMULL instruction. SDLoc DL(Op); SDValue Op0; SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { Op0 = SkipExtensionForVMULL(N0, DAG); assert(Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"); return DAG.getNode(NewOpc, DL, VT, Op0, Op1); } // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during // isel lowering to take advantage of no-stall back to back vmul + vmla. // vmull q0, d4, d6 // vmlal q0, d5, d6 // is faster than // vaddl q0, d4, d5 // vmovl q1, d6 // vmul q0, q0, q1 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); return DAG.getNode(N0->getOpcode(), DL, VT, DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); // Get reciprocal estimate. // float4 recip = vrecpeq_f32(yf); Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), Y); // Because char has a smaller range than uchar, we can actually get away // without any newton steps. This requires that we use a weird bias // of 0xb000, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0xb000); X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); Y = DAG.getConstant(0xb000, dl, MVT::v4i32); X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); // Convert back to short. X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); return X; } static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); // float4 xf = vcvt_f32_s32(vmovl_s16(x)); N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); // Use reciprocal estimate and one refinement step. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), N1); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); // Because short has a smaller range than ushort, we can actually get away // with only a single newton step. This requires that we use a weird bias // of 89, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0x89); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(0x89, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. // return vmovn_s32(vcvt_s32_f32(result)); N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); return N0; } static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4, dl)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(4, dl)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0, dl)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0, dl)); N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; } return LowerSDIV_v4i16(N0, N1, dl, DAG); } static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4, dl)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(4, dl)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0, dl)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0, dl)); N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl, MVT::i32), N0); return N0; } // v4i16 sdiv ... Convert to float. // float4 yf = vcvt_f32_s32(vmovl_u16(y)); // float4 xf = vcvt_f32_s32(vmovl_u16(x)); N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); // Use reciprocal estimate and two refinement steps. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); // recip *= vrecpsq_f32(yf, recip); N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), BN1); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); // Simply multiplying by the reciprocal estimate can leave us a few ulps // too low, so we add 2 ulps (exhaustive testing shows that this is enough, // and that it will never cause us to return an answer too large). // float4 result = as_float4(as_int4(xf*recip) + 2); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(2, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. // return vmovn_u32(vcvt_s32_f32(result)); N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); return N0; } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getNode()->getValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = ARMISD::ADDC; break; case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; case ISD::SUBC: Opc = ARMISD::SUBC; break; case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; } if (!ExtraOp) return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); // For iOS, we want to call an alternative entry point: __sincos_stret, // return values are passed via sret. SDLoc dl(Op); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); auto &DL = DAG.getDataLayout(); ArgListTy Args; bool ShouldUseSRet = Subtarget->isAPCS_ABI(); SDValue SRet; if (ShouldUseSRet) { // Create stack object for sret. const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); ArgListEntry Entry; Entry.Node = SRet; Entry.Ty = RetTy->getPointerTo(); Entry.isSExt = false; Entry.isZExt = false; Entry.isSRet = true; Args.push_back(Entry); RetTy = Type::getVoidTy(*DAG.getContext()); } ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; RTLIB::Libcall LC = (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setCallee(CC, RetTy, Callee, std::move(Args)) .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); if (!ShouldUseSRet) return CallResult.first; SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); // Address of cos field. SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0), LoadCos.getValue(0)); } SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed, SDValue &Chain) const { EVT VT = Op.getValueType(); assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"); SDLoc dl(Op); const auto &DL = DAG.getDataLayout(); const auto &TLI = DAG.getTargetLoweringInfo(); const char *Name = nullptr; if (Signed) Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64"; else Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64"; SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL)); ARMTargetLowering::ArgListTy Args; for (auto AI : {1, 0}) { ArgListEntry Arg; Arg.Node = Op.getOperand(AI); Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext()); Args.push_back(Arg); } CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), ES, std::move(Args)); return LowerCallTo(CLI).first; } SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const { assert(Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"); SDLoc dl(Op); SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Op.getOperand(1)); return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); } static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) { SDLoc DL(N); SDValue Op = N->getOperand(1); if (N->getValueType(0) == MVT::i32) return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, DAG.getConstant(1, DL, MVT::i32)); return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi)); } void ARMTargetLowering::ExpandDIV_Windows( SDValue Op, SelectionDAG &DAG, bool Signed, SmallVectorImpl &Results) const { const auto &DL = DAG.getDataLayout(); const auto &TLI = DAG.getTargetLoweringInfo(); assert(Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"); SDLoc dl(Op); SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode()); SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result); SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result, DAG.getConstant(32, dl, TLI.getPointerTy(DL))); Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper); Results.push_back(Lower); Results.push_back(Upper); } static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or // equivalent available. return SDValue(); // Monotonic load/store is legal for all targets. return Op; } static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc DL(N); // Under Power Management extensions, the cycle-count is: // mrc p15, #0, , c9, c13, #0 SDValue Ops[] = { N->getOperand(0), // Chain DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(9, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32) }; SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32, DAG.getConstant(0, DL, MVT::i32))); Results.push_back(Cycles32.getValue(1)); } static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDLoc dl(V.getNode()); SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32); SDValue VHi = DAG.getAnyExtOrTrunc( DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), dl, MVT::i32); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); } static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl & Results, SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"); SDValue Ops[] = {N->getOperand(1), createGPRPairNode(DAG, N->getOperand(2)), createGPRPairNode(DAG, N->getOperand(3)), N->getOperand(0)}; SDNode *CmpSwap = DAG.getMachineNode( ARM::CMP_SWAP_64, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); MachineFunction &MF = DAG.getMachineFunction(); MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1); Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(SDValue(CmpSwap, 2)); } static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SelectionDAG &DAG) { const auto &TLI = DAG.getTargetLoweringInfo(); assert(Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"); SDLoc dl(Op); SDValue Val = Op.getOperand(0); MVT Ty = Val->getSimpleValueType(0); SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1)); SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow", TLI.getPointerTy(DAG.getDataLayout())); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Val; Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isZExt = true; Args.push_back(Entry); Entry.Node = Exponent; Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isZExt = true; Args.push_back(Entry); Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext()); // In the in-chain to the call is the entry node If we are emitting a // tailcall, the chain will be mutated if the node has a non-entry input // chain. SDValue InChain = DAG.getEntryNode(); SDValue TCChain = InChain; const auto *F = DAG.getMachineFunction().getFunction(); bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && F->getReturnType() == LCRTy; if (IsTC) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(InChain) .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args)) .setTailCall(IsTC); std::pair CI = TLI.LowerCallTo(CLI); // Return the chain (the DAG root) if it is a tail call return !CI.second.getNode() ? DAG.getRoot() : CI.first; } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::ConstantPool: if (Subtarget->genExecuteOnly()) llvm_unreachable("execute-only should not generate constant pools"); return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: switch (Subtarget->getTargetTriple().getObjectFormat()) { default: llvm_unreachable("unknown object format"); case Triple::COFF: return LowerGlobalAddressWindows(Op, DAG); case Triple::ELF: return LowerGlobalAddressELF(Op, DAG); case Triple::MachO: return LowerGlobalAddressDarwin(Op, DAG); } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); case ISD::SREM: return LowerREM(Op.getNode(), DAG); case ISD::UREM: return LowerREM(Op.getNode(), DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerSDIV(Op, DAG); case ISD::UDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerUDIV(Op, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: case ISD::USUBO: return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); case ISD::DYNAMIC_STACKALLOC: if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res; switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); case ISD::READ_REGISTER: ExpandREAD_REGISTER(N, Results, DAG); break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; case ISD::SREM: case ISD::UREM: Res = LowerREM(N, DAG); break; case ISD::SDIVREM: case ISD::UDIVREM: Res = LowerDivRem(SDValue(N, 0), DAG); assert(Res.getNumOperands() == 2 && "DivRem needs two values"); Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; case ISD::UDIV: case ISD::SDIV: assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, Results); case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; } if (Res.getNode()) Results.push_back(Res); } //===----------------------------------------------------------------------===// // ARM Scheduler Hooks //===----------------------------------------------------------------------===// /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); ARMFunctionInfo *AFI = MF->getInfo(); const Function *F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); unsigned PCLabelId = AFI->createPICLabelUId(); unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, 4, 4); MachineMemOperand *FIMMOSt = MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 4); // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf // ldr.n r5, LCPI1_1 // orr r5, r5, #1 // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO)); // Set the low bit because of thumb mode. unsigned NewVReg2 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(0x01))); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) .addReg(NewVReg3, RegState::Kill) .addFrameIndex(FI) .addImm(36) // &jbuf[1] :: pc .addMemOperand(FIMMOSt)); } else if (isThumb) { // Incoming value: jbuf // ldr.n r1, LCPI1_4 // add r1, pc // mov r2, #1 // orrs r1, r2 // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) .addReg(ARM::CPSR, RegState::Define) .addImm(1)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) .addImm(0) .addMemOperand(FIMMOSt)); } else { // Incoming value: jbuf // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) .addConstantPoolIndex(CPI) .addImm(0) .addMemOperand(CPMMO)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId)); AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) .addReg(NewVReg2, RegState::Kill) .addFrameIndex(FI) .addImm(36) // &jbuf[1] :: pc .addMemOperand(FIMMOSt)); } } void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineFrameInfo &MFI = MF->getFrameInfo(); int FI = MFI.getFunctionContextIndex(); const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass : &ARM::GPRnopcRegClass; // Get a mapping of the call site numbers to all of the landing pads they're // associated with. DenseMap > CallSiteNumToLPad; unsigned MaxCSNum = 0; for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { if (!BB->isEHPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing // pad. for (MachineBasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { if (!II->isEHLabel()) continue; MCSymbol *Sym = II->getOperand(0).getMCSymbol(); if (!MF->hasCallSiteLandingPad(Sym)) continue; SmallVectorImpl &CallSiteIdxs = MF->getCallSiteLandingPad(Sym); for (SmallVectorImpl::iterator CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); CSI != CSE; ++CSI) { CallSiteNumToLPad[*CSI].push_back(&*BB); MaxCSNum = std::max(MaxCSNum, *CSI); } break; } } // Get an ordered list of the machine basic blocks for the jump table. std::vector LPadList; SmallPtrSet InvokeBBs; LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned I = 1; I <= MaxCSNum; ++I) { SmallVectorImpl &MBBList = CallSiteNumToLPad[I]; for (SmallVectorImpl::iterator II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { LPadList.push_back(*II); InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); } } assert(!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"); // Create the jump table and associated information. MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); // Create the MBBs for the dispatch code. // Shove the dispatch's address into the return slot in the function context. MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); DispatchBB->setIsEHPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); unsigned trap_opcode; if (Subtarget->isThumb()) trap_opcode = ARM::tTRAP; else trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); DispatchBB->addSuccessor(DispContBB); // Insert and MBBs. MF->insert(MF->end(), DispatchBB); MF->insert(MF->end(), DispContBB); MF->insert(MF->end(), TrapBB); // Insert code into the entry block that creates and registers the function // context. SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); MachineMemOperand *FIMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); MachineInstrBuilder MIB; MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); const ARMBaseInstrInfo *AII = static_cast(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); // Add a register mask with no preserved registers. This results in all // registers being marked as clobbered. This can't work if the dispatch block // is in a Thumb1 function and is linked with ARM code which uses the FP // registers, as there is no way to preserve the FP registers in Thumb1 mode. MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF)); bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd)); if (NumLPads < 256) { AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) .addReg(NewVReg1) .addImm(LPadList.size())); } else { unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) .addImm(NumLPads & 0xFFFF)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) .addReg(VReg1) .addImm(NumLPads >> 16)); } AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) .addReg(NewVReg1) .addReg(VReg2)); } BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) .addJumpTableIndex(MJTI)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred( BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg1) .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) .addFrameIndex(FI) .addImm(1) .addMemOperand(FIMMOLd)); if (NumLPads < 256) { AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) .addReg(NewVReg1) .addImm(NumLPads)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx)); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) .addReg(NewVReg1) .addReg(VReg1)); } BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg1) .addImm(2)); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) .addReg(NewVReg4, RegState::Kill) .addImm(0) .addMemOperand(JTMMOLd)); unsigned NewVReg6 = NewVReg5; if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg3)); } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) .addReg(NewVReg6, RegState::Kill) .addJumpTableIndex(MJTI); } else { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd)); if (NumLPads < 256) { AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) .addReg(NewVReg1) .addImm(NumLPads)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) .addImm(NumLPads & 0xFFFF)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) .addReg(VReg1) .addImm(NumLPads >> 16)); } AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg2)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) .addImm(0)); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg1, RegState::Kill)); } BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred( BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg4) .addImm(0) .addMemOperand(JTMMOLd)); if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg4) .addJumpTableIndex(MJTI); } else { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) .addReg(NewVReg5, RegState::Kill) .addJumpTableIndex(MJTI); } } // Add the jump table entries as successors to the MBB. SmallPtrSet SeenMBBs; for (std::vector::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; if (SeenMBBs.insert(CurMBB).second) DispContBB->addSuccessor(CurMBB); } // N.B. the order the invoke BBs are processed in doesn't matter here. const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (MachineBasicBlock *BB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. SmallVector Successors(BB->succ_begin(), BB->succ_end()); while (!Successors.empty()) { MachineBasicBlock *SMBB = Successors.pop_back_val(); if (SMBB->isEHPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); } } BB->addSuccessor(DispatchBB, BranchProbability::getZero()); BB->normalizeSuccProbs(); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from // moving instructions to before the EH block, where they will never be // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { if (!II->isCall()) continue; DenseMap DefRegs; for (MachineInstr::mop_iterator OI = II->operands_begin(), OE = II->operands_end(); OI != OE; ++OI) { if (!OI->isReg()) continue; DefRegs[OI->getReg()] = true; } MachineInstrBuilder MIB(*MF, &*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; if (Subtarget->isThumb2() && !ARM::tGPRRegClass.contains(Reg) && !ARM::hGPRRegClass.contains(Reg)) continue; if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) continue; if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) continue; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; } } // Mark all former landing pads as non-landing pads. The dispatch is the only // landing pad now. for (SmallVectorImpl::iterator I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) (*I)->setIsEHPad(false); // The instruction is gone now. MI.eraseFromParent(); } static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I != Succ) return *I; llvm_unreachable("Expecting a BB with two successors!"); } /// Return the load opcode for a given load size. If load size >= 8, /// neon opcode will be returned. static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { if (LdSize >= 8) return LdSize == 16 ? ARM::VLD1q32wb_fixed : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; if (IsThumb1) return LdSize == 4 ? ARM::tLDRi : LdSize == 2 ? ARM::tLDRHi : LdSize == 1 ? ARM::tLDRBi : 0; if (IsThumb2) return LdSize == 4 ? ARM::t2LDR_POST : LdSize == 2 ? ARM::t2LDRH_POST : LdSize == 1 ? ARM::t2LDRB_POST : 0; return LdSize == 4 ? ARM::LDR_POST_IMM : LdSize == 2 ? ARM::LDRH_POST : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; } /// Return the store opcode for a given store size. If store size >= 8, /// neon opcode will be returned. static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { if (StSize >= 8) return StSize == 16 ? ARM::VST1q32wb_fixed : StSize == 8 ? ARM::VST1d32wb_fixed : 0; if (IsThumb1) return StSize == 4 ? ARM::tSTRi : StSize == 2 ? ARM::tSTRHi : StSize == 1 ? ARM::tSTRBi : 0; if (IsThumb2) return StSize == 4 ? ARM::t2STR_POST : StSize == 2 ? ARM::t2STRH_POST : StSize == 1 ? ARM::t2STRB_POST : 0; return StSize == 4 ? ARM::STR_POST_IMM : StSize == 2 ? ARM::STRH_POST : StSize == 1 ? ARM::STRB_POST_IMM : 0; } /// Emit a post-increment load operation with given size. The instructions /// will be added to BB at Pos. static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); assert(LdOpc != 0 && "Should have a load opcode"); if (LdSize >= 8) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define).addReg(AddrIn) .addImm(0)); } else if (IsThumb1) { // load + update AddrIn AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrIn).addImm(0)); MachineInstrBuilder MIB = BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); MIB = AddDefaultT1CC(MIB); MIB.addReg(AddrIn).addImm(LdSize); AddDefaultPred(MIB); } else if (IsThumb2) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define).addReg(AddrIn) .addImm(LdSize)); } else { // arm AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define).addReg(AddrIn) .addReg(0).addImm(LdSize)); } } /// Emit a post-increment store operation with given size. The instructions /// will be added to BB at Pos. static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); assert(StOpc != 0 && "Should have a store opcode"); if (StSize >= 8) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(AddrIn).addImm(0).addReg(Data)); } else if (IsThumb1) { // store + update AddrIn AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) .addReg(AddrIn).addImm(0)); MachineInstrBuilder MIB = BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); MIB = AddDefaultT1CC(MIB); MIB.addReg(AddrIn).addImm(StSize); AddDefaultPred(MIB); } else if (IsThumb2) { AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(Data).addReg(AddrIn).addImm(StSize)); } else { // arm AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(Data).addReg(AddrIn).addReg(0) .addImm(StSize)); } } MachineBasicBlock * ARMTargetLowering::EmitStructByval(MachineInstr &MI, MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned src = MI.getOperand(1).getReg(); unsigned SizeVal = MI.getOperand(2).getImm(); unsigned Align = MI.getOperand(3).getImm(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; const TargetRegisterClass *TRC = nullptr; const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); bool IsThumb = Subtarget->isThumb(); if (Align & 1) { UnitSize = 1; } else if (Align & 2) { UnitSize = 2; } else { // Check whether we can use NEON instructions. if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; else if ((Align % 8 == 0) && SizeVal >= 8) UnitSize = 8; } // Can't use NEON instructions. if (UnitSize == 0) UnitSize = 4; } // Select the correct opcode and register class for unit size load/store bool IsNeon = UnitSize >= 8; TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; if (IsNeon) VecTRC = UnitSize == 16 ? &ARM::DPairRegClass : UnitSize == 8 ? &ARM::DPRRegClass : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { // Use LDR and STR to copy. // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) // [destOut] = STR_POST(scratch, destIn, UnitSize) unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } // Handle the leftover bytes with LDRB and STRB. // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) for (unsigned i = 0; i < BytesLeft; i++) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } MI.eraseFromParent(); // The instruction is gone now. return BB; } // Expand the pseudo op to a loop. // thisMBB: // ... // movw varEnd, # --> with thumb2 // movt varEnd, # // ldrcp varEnd, idx --> without thumb2 // fallthrough --> loopMBB // loopMBB: // PHI varPhi, varEnd, varLoop // PHI srcPhi, src, srcLoop // PHI destPhi, dst, destLoop // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSize) // subs varLoop, varPhi, #UnitSize // bne loopMBB // fallthrough --> exitMBB // exitMBB: // epilogue to handle left-over bytes // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); if (Subtarget->useMovt(*MF)) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); AddDefaultPred(BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp).addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) AddDefaultPred(BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd) .addReg(Vtmp) .addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb) AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( varEnd, RegState::Define).addConstantPoolIndex(Idx)); else AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); } BB->addSuccessor(loopMBB); // Generate the loop body: // varPhi = PHI(varLoop, varEnd) // srcPhi = PHI(srcLoop, src) // destPhi = PHI(destLoop, dst) MachineBasicBlock *entryBB = BB; BB = loopMBB; unsigned varLoop = MRI.createVirtualRegister(TRC); unsigned varPhi = MRI.createVirtualRegister(TRC); unsigned srcLoop = MRI.createVirtualRegister(TRC); unsigned srcPhi = MRI.createVirtualRegister(TRC); unsigned destLoop = MRI.createVirtualRegister(TRC); unsigned destPhi = MRI.createVirtualRegister(TRC); BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) .addReg(varLoop).addMBB(loopMBB) .addReg(varEnd).addMBB(entryBB); BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) .addReg(srcLoop).addMBB(loopMBB) .addReg(src).addMBB(entryBB); BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) .addReg(destLoop).addMBB(loopMBB) .addReg(dest).addMBB(entryBB); // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, IsThumb1, IsThumb2); emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, IsThumb1, IsThumb2); // Decrement loop variable by UnitSize. if (IsThumb1) { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); MIB = AddDefaultT1CC(MIB); MIB.addReg(varPhi).addImm(UnitSize); AddDefaultPred(MIB); } else { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); MIB->getOperand(5).setReg(ARM::CPSR); MIB->getOperand(5).setIsDef(true); } BuildMI(*BB, BB->end(), dl, TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); // loopMBB can loop back to loopMBB or fall through to exitMBB. BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // Add epilogue to handle BytesLeft. BB = exitMBB; auto StartOfExit = exitMBB->begin(); // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } MI.eraseFromParent(); // The instruction is gone now. return BB; } MachineBasicBlock * ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); assert(Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"); assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); // __chkstk takes the number of words to allocate on the stack in R4, and // returns the stack adjustment in number of bytes in R4. This will not // clober any other registers (other than the obvious lr). // // Although, technically, IP should be considered a register which may be // clobbered, the call itself will not touch it. Windows on ARM is a pure // thumb-2 environment, so there is no interworking required. As a result, we // do not expect a veneer to be emitted by the linker, clobbering IP. // // Each module receives its own copy of __chkstk, so no import thunk is // required, again, ensuring that IP is not clobbered. // // Finally, although some linkers may theoretically provide a trampoline for // out of range calls (which is quite common due to a 32M range limitation of // branches for Thumb), we can generate the long-call version via // -mcmodel=large, alleviating the need for the trampoline which may clobber // IP. switch (TM.getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: case CodeModel::Default: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) .addImm((unsigned)ARMCC::AL).addReg(0) .addExternalSymbol("__chkstk") .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: case CodeModel::JITDefault: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) .addImm((unsigned)ARMCC::AL).addReg(0) .addReg(Reg, RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); break; } } AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addReg(ARM::R4, RegState::Kill) .setMIFlags(MachineInstr::FrameSetup))); MI.eraseFromParent(); return MBB; } MachineBasicBlock * ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); MF->insert(++MBB->getIterator(), ContBB); ContBB->splice(ContBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); ContBB->transferSuccessorsAndUpdatePHIs(MBB); MBB->addSuccessor(ContBB); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0)); MF->push_back(TrapBB); MBB->addSuccessor(TrapBB); AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) .addReg(MI.getOperand(0).getReg()) .addImm(0)); BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::EQ) .addReg(ARM::CPSR); MI.eraseFromParent(); return ContBB; } MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI.getOpcode()) { default: { MI.dump(); llvm_unreachable("Unexpected instr type to insert"); } // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) .addOperand(MI.getOperand(1)) // Rn_wb .addOperand(MI.getOperand(2)) // Rn .addOperand(MI.getOperand(3)) // PredImm .addOperand(MI.getOperand(4)) // PredReg .addOperand(MI.getOperand(0)); // Rt MI.eraseFromParent(); return BB; } // The Thumb2 pre-indexed stores have the same MI operands, they just // define them differently in the .td files from the isel patterns, so // they need pseudos. case ARM::t2STR_preidx: MI.setDesc(TII->get(ARM::t2STR_PRE)); return BB; case ARM::t2STRB_preidx: MI.setDesc(TII->get(ARM::t2STRB_PRE)); return BB; case ARM::t2STRH_preidx: MI.setDesc(TII->get(ARM::t2STRH_PRE)); return BB; case ARM::STRi_preidx: case ARM::STRBi_preidx: { unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; // Decode the offset. unsigned Offset = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; Offset = ARM_AM::getAM2Offset(Offset); if (isSub) Offset = -Offset; MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) .addOperand(MI.getOperand(0)) // Rn_wb .addOperand(MI.getOperand(1)) // Rt .addOperand(MI.getOperand(2)) // Rn .addImm(Offset) // offset (skip GPR==zero_reg) .addOperand(MI.getOperand(5)) // pred .addOperand(MI.getOperand(6)) .addMemOperand(MMO); MI.eraseFromParent(); return BB; } case ARM::STRr_preidx: case ARM::STRBr_preidx: case ARM::STRH_preidx: { unsigned NewOpc; switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); for (unsigned i = 0; i < MI.getNumOperands(); ++i) MIB.addOperand(MI.getOperand(i)); MI.eraseFromParent(); return BB; } case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); BuildMI(BB, dl, TII->get(ARM::tBcc)) .addMBB(sinkMBB) .addImm(MI.getOperand(3).getImm()) .addReg(MI.getOperand(4).getReg()); // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(1).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } case ARM::BCCi64: case ARM::BCCZi64: { // If there is an unconditional branch to the other successor, remove it. BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); // Compare both parts that make up the double comparison separately for // equality. bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; unsigned LHS1 = MI.getOperand(1).getReg(); unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS1).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { unsigned RHS1 = MI.getOperand(3).getReg(); unsigned RHS2 = MI.getOperand(4).getReg(); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1).addReg(RHS1)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS2).addReg(RHS2) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB(); MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); if (MI.getOperand(0).getImm() == ARMCC::NE) std::swap(destMBB, exitMBB); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); if (isThumb2) AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return BB; case ARM::Int_eh_sjlj_setup_dispatch: EmitSjLjDispatchBlock(MI, BB); return BB; case ARM::ABS: case ARM::t2ABS: { // To insert an ABS instruction, we have to insert the // diamond control-flow pattern. The incoming instruction knows the // source vreg to test against 0, the destination vreg to set, // the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. // It transforms // V1 = ABS V0 // into // V2 = MOVS V0 // BCC (branch to SinkBB if V0 >= 0) // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) // SinkBB: V1 = PHI(V2, V3) const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator BBI = ++BB->getIterator(); MachineFunction *Fn = BB->getParent(); MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); unsigned int ABSSrcReg = MI.getOperand(1).getReg(); unsigned int ABSDstReg = MI.getOperand(0).getReg(); bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); SinkBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RSBBB); BB->addSuccessor(SinkBB); // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); // insert a cmp at the end of BB AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(ABSSrcReg).addImm(0)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); // insert rsbri in RSBBB // Note: BCC and rsbri will be converted into predicated rsbmi // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); // insert PHI in SinkBB, // reuse ABSDstReg to not change uses of ABS instruction BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI.eraseFromParent(); // return last added BB return SinkBB; } case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); case ARM::WIN__CHKSTK: return EmitLowered__chkstk(MI, BB); case ARM::WIN__DBZCHK: return EmitLowered__dbzchk(MI, BB); } } /// \brief Attaches vregs to MEMCPY that it will use as scratch registers /// when it is expanded into LDM/STM. This is done as a post-isel lowering /// instead of as a custom inserter because we need the use list from the SDNode. static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node) { bool isThumb1 = Subtarget->isThumb1Only(); DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstrBuilder MIB(*MF, MI); // If the new dst/src is unused mark it as dead. if (!Node->hasAnyUseOfValue(0)) { MI.getOperand(0).setIsDead(true); } if (!Node->hasAnyUseOfValue(1)) { MI.getOperand(1).setIsDead(true); } // The MEMCPY both defines and kills the scratch registers. for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { if (MI.getOpcode() == ARM::MEMCPY) { attachMEMCPYScratchRegs(Subtarget, MI, Node); return; } const MCInstrDesc *MCID = &MI.getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // // e.g. ADCS (..., CPSR) -> ADC (... opt:CPSR). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 && "converted opcode should be the same except for cc_out"); MI.setDesc(*MCID); // Add the optional cc_out operand MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); } unsigned ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it // since we already have an optional CPSR def. bool definesCPSR = false; bool deadCPSR = false; for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { definesCPSR = true; if (MO.isDead()) deadCPSR = true; MI.RemoveOperand(i); break; } } if (!definesCPSR) { assert(!NewOpc && "Optional cc_out operand required"); return; } assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); if (deadCPSR) { assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); return; } // If this instruction was defined with an optional CPSR def and its dag node // had a live implicit CPSR def, then activate the optional CPSR def. MachineOperand &MO = MI.getOperand(ccOutIdx); MO.setReg(ARM::CPSR); MO.setIsDef(true); } //===----------------------------------------------------------------------===// // ARM Optimization Hooks //===----------------------------------------------------------------------===// // Helper function that checks if N is a null or all ones constant. static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); } // Return true if N is conditionally 0 or all ones. // Detects these expressions where cc is an i1 value: // // (select cc 0, y) [AllOnes=0] // (select cc y, 0) [AllOnes=0] // (zext cc) [AllOnes=0] // (sext cc) [AllOnes=0/1] // (select cc -1, y) [AllOnes=1] // (select cc y, -1) [AllOnes=1] // // Invert is set when N is the null/all ones constant when CC is false. // OtherOp is set to the alternative value of N. static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG) { switch (N->getOpcode()) { default: return false; case ISD::SELECT: { CC = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); if (isZeroOrAllOnes(N1, AllOnes)) { Invert = false; OtherOp = N2; return true; } if (isZeroOrAllOnes(N2, AllOnes)) { Invert = true; OtherOp = N1; return true; } return false; } case ISD::ZERO_EXTEND: // (zext cc) can never be the all ones value. if (AllOnes) return false; LLVM_FALLTHROUGH; case ISD::SIGN_EXTEND: { SDLoc dl(N); EVT VT = N->getValueType(0); CC = N->getOperand(0); if (CC.getValueType() != MVT::i1) return false; Invert = !AllOnes; if (AllOnes) // When looking for an AllOnes constant, N is an sext, and the 'other' // value is 0. OtherOp = DAG.getConstant(0, dl, VT); else if (N->getOpcode() == ISD::ZERO_EXTEND) // When looking for a 0 constant, N can be zext or sext. OtherOp = DAG.getConstant(1, dl, VT); else OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT); return true; } } } // Combine a constant select operand into its use: // // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) // // The transform is rejected if the select doesn't have a constant operand that // is null, or all ones when AllOnes is set. // // Also recognize sext/zext from i1: // // (add (zext cc), x) -> (select cc (add x, 1), x) // (add (sext cc), x) -> (select cc (add x, -1), x) // // These transformations eventually create predicated instructions. // // @param N The node to transform. // @param Slct The N operand that is a select. // @param OtherOp The other N operand (x above). // @param DCI Context. // @param AllOnes Require the select constant to be all ones instead of null. // @returns The new node, or SDValue() on failure. static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes = false) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); SDValue NonConstantVal; SDValue CCOp; bool SwapSelectOps; if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, NonConstantVal, DAG)) return SDValue(); // Slct is now know to be the desired identity constant when CC is true. SDValue TrueVal = OtherOp; SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); // Unless SwapSelectOps says CC should be false. if (SwapSelectOps) std::swap(TrueVal, FalseVal); return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal); } // Attempt combineSelectAndUse on each operand of a commutative operator N. static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes)) return Result; if (N1.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes)) return Result; return SDValue(); } static bool IsVUZPShuffleNode(SDNode *N) { // VUZP shuffle node. if (N->getOpcode() == ARMISD::VUZP) return true; // "VUZP" on i32 is an alias for VTRN. if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32) return true; return false; } static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for ADD(VUZP.0, VUZP.1). if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() || N0 == N1) return SDValue(); // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD. if (!N->getValueType(0).is64BitVector()) return SDValue(); // Generate vpadd. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); SDNode *Unzip = N0.getNode(); EVT VT = N->getValueType(0); SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl, TLI.getPointerTy(DAG.getDataLayout()))); Ops.push_back(Unzip->getOperand(0)); Ops.push_back(Unzip->getOperand(1)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); } static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Check for two extended operands. if (!(N0.getOpcode() == ISD::SIGN_EXTEND && N1.getOpcode() == ISD::SIGN_EXTEND) && !(N0.getOpcode() == ISD::ZERO_EXTEND && N1.getOpcode() == ISD::ZERO_EXTEND)) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N10 = N1.getOperand(0); // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1)) if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() || N00 == N10) return SDValue(); // We only recognize Q register paddl here; this can't be reached until // after type legalization. if (!N00.getValueType().is64BitVector() || !N0.getValueType().is128BitVector()) return SDValue(); // Generate vpaddl. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); EVT VT = N->getValueType(0); SmallVector Ops; // Form vpaddl.sN or vpaddl.uN depending on the kind of extension. unsigned Opcode; if (N0.getOpcode() == ISD::SIGN_EXTEND) Opcode = Intrinsic::arm_neon_vpaddls; else Opcode = Intrinsic::arm_neon_vpaddlu; Ops.push_back(DAG.getConstant(Opcode, dl, TLI.getPointerTy(DAG.getDataLayout()))); EVT ElemTy = N00.getValueType().getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT, N00.getOperand(0), N00.getOperand(1)); Ops.push_back(Concat); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); } // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is // much easier to match. static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Only perform optimization if after legalize, and if NEON is available. We // also expected both operands to be BUILD_VECTORs. if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() || N0.getOpcode() != ISD::BUILD_VECTOR || N1.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); // Check output type since VPADDL operand elements can only be 8, 16, or 32. EVT VT = N->getValueType(0); if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) return SDValue(); // Check that the vector operands are of the right form. // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR // operands, where N is the size of the formed vector. // Each EXTRACT_VECTOR should have the same input vector and odd or even // index such that we have a pair wise add pattern. // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); SDValue Vec = N0->getOperand(0)->getOperand(0); SDNode *V = Vec.getNode(); unsigned nextIndex = 0; // For each operands to the ADD which are BUILD_VECTORs, // check to see if each of their operands are an EXTRACT_VECTOR with // the same vector and appropriate index. for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue ExtVec0 = N0->getOperand(i); SDValue ExtVec1 = N1->getOperand(i); // First operand is the vector, verify its the same. if (V != ExtVec0->getOperand(0).getNode() || V != ExtVec1->getOperand(0).getNode()) return SDValue(); // Second is the constant, verify its correct. ConstantSDNode *C0 = dyn_cast(ExtVec0->getOperand(1)); ConstantSDNode *C1 = dyn_cast(ExtVec1->getOperand(1)); // For the constant, we want to see all the even or all the odd. if (!C0 || !C1 || C0->getZExtValue() != nextIndex || C1->getZExtValue() != nextIndex+1) return SDValue(); // Increment index. nextIndex+=2; } else return SDValue(); } // Don't generate vpaddl+vmovn; we'll match it to vpadd later. if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); // Build operand list. SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl, TLI.getPointerTy(DAG.getDataLayout()))); // Input is the vector. Ops.push_back(Vec); // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); EVT inputLaneType = Vec.getValueType().getVectorElementType(); switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, dl, VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { if (V->getOpcode() == ISD::UMUL_LOHI || V->getOpcode() == ISD::SMUL_LOHI) return V; return SDValue(); } static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is // a glue link from the first add to the second add. // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi // / \ [no multiline comment] // loAdd -> ADDE | // \ :glue / // \ / // ADDC <- hiAdd // assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); SDValue AddcOp0 = AddcNode->getOperand(0); SDValue AddcOp1 = AddcNode->getOperand(1); // Check if the two operands are from the same mul_lohi node. if (AddcOp0.getNode() == AddcOp1.getNode()) return SDValue(); assert(AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); // Check that we have a glued ADDC node. if (AddcNode->getValueType(1) != MVT::Glue) return SDValue(); // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && AddcOp0->getOpcode() != ISD::SMUL_LOHI && AddcOp1->getOpcode() != ISD::UMUL_LOHI && AddcOp1->getOpcode() != ISD::SMUL_LOHI) return SDValue(); // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. if (AddeNode->getOpcode() != ISD::ADDE) return SDValue(); assert(AddeNode->getNumOperands() == 3 && AddeNode->getOperand(2).getValueType() == MVT::Glue && "ADDE node has the wrong inputs"); // Check for the triangle shape. SDValue AddeOp0 = AddeNode->getOperand(0); SDValue AddeOp1 = AddeNode->getOperand(1); // Make sure that the ADDE operands are not coming from the same node. if (AddeOp0.getNode() == AddeOp1.getNode()) return SDValue(); // Find the MUL_LOHI node walking up ADDE's operands. bool IsLeftOperandMUL = false; SDValue MULOp = findMUL_LOHI(AddeOp0); if (MULOp == SDValue()) MULOp = findMUL_LOHI(AddeOp1); else IsLeftOperandMUL = true; if (MULOp == SDValue()) return SDValue(); // Figure out the right opcode. unsigned Opc = MULOp->getOpcode(); unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; // Figure out the high and low input values to the MLAL node. SDValue* HiAdd = nullptr; SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; // Ensure that ADDE is from high result of ISD::SMUL_LOHI. if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) return SDValue(); if (IsLeftOperandMUL) HiAdd = &AddeOp1; else HiAdd = &AddeOp0; // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node // whose low result is fed to the ADDC we are checking. if (AddcOp0 == MULOp.getValue(0)) { LoMul = &AddcOp0; LowAdd = &AddcOp1; } if (AddcOp1 == MULOp.getValue(0)) { LoMul = &AddcOp1; LowAdd = &AddcOp0; } if (!LoMul) return SDValue(); // Create the merged node. SelectionDAG &DAG = DCI.DAG; // Build operand list. SmallVector Ops; Ops.push_back(LoMul->getOperand(0)); Ops.push_back(LoMul->getOperand(1)); Ops.push_back(*LowAdd); Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); SDValue LoMLALResult(MLALNode.getNode(), 0); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. SDValue resNode(AddcNode, 0); return resNode; } static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // UMAAL is similar to UMLAL except that it adds two unsigned values. // While trying to combine for the other MLAL nodes, first search for the // chance to use UMAAL. Check if Addc uses another addc node which can first // be combined into a UMLAL. The other pattern is AddcNode being combined // into an UMLAL and then using another addc is handled in ISelDAGToDAG. if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || (Subtarget->isThumb() && !Subtarget->hasThumb2())) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); SDNode *PrevAddc = nullptr; if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) PrevAddc = AddcNode->getOperand(0).getNode(); else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) PrevAddc = AddcNode->getOperand(1).getNode(); // If there's no addc chains, just return a search for any MLAL. if (PrevAddc == nullptr) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); // Try to convert the addc operand to an MLAL and if that fails try to // combine AddcNode. SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); if (MLAL != SDValue(PrevAddc, 0)) return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); // Find the converted UMAAL or quit if it doesn't exist. SDNode *UmlalNode = nullptr; SDValue AddHi; if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) { UmlalNode = AddcNode->getOperand(0).getNode(); AddHi = AddcNode->getOperand(1); } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) { UmlalNode = AddcNode->getOperand(1).getNode(); AddHi = AddcNode->getOperand(0); } else { return SDValue(); } // The ADDC should be glued to an ADDE node, which uses the same UMLAL as // the ADDC as well as Zero. auto *Zero = dyn_cast(UmlalNode->getOperand(3)); if (!Zero || Zero->getZExtValue() != 0) return SDValue(); // Check that we have a glued ADDC node. if (AddcNode->getValueType(1) != MVT::Glue) return SDValue(); // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); if (!AddeNode) return SDValue(); if ((AddeNode->getOperand(0).getNode() == Zero && AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && AddeNode->getOperand(1).getNode() == Zero)) { SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), UmlalNode->getOperand(2), AddHi }; SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the UMAAL node's values. DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); // Return original node to notify the driver to stop replacing. return SDValue(AddcNode, 0); } return SDValue(); } /// PerformADDCCombine - Target-specific dag combine transform from /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb1Only()) return SDValue(); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted /// operands. static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget){ // Attempt to create vpadd for this add. if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget)) return Result; // Attempt to create vpaddl for this add. if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI)) return Result; return SDValue(); } /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // First try with the default operand order. if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; // If that didn't work, try again with the operands commuted. return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); } /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. /// static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) if (N1.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI)) return Result; return SDValue(); } /// PerformVMULCombine /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the /// special multiplier accumulator forwarding. /// vmul d3, d0, d2 /// vmla d3, d1, d2 /// is faster than /// vadd d3, d0, d1 /// vmul d3, d3, d2 // However, for (A + B) * (A + B), // vadd d2, d0, d1 // vmul d3, d0, d2 // vmla d3, d1, d2 // is slower than // vadd d2, d0, d1 // vmul d3, d2, d2 static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (!Subtarget->hasVMLxForwarding()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) { Opcode = N1.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) return SDValue(); std::swap(N0, N1); } if (N0 == N1) return SDValue(); EVT VT = N->getValueType(0); SDLoc DL(N); SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); return DAG.getNode(Opcode, DL, VT, DAG.getNode(ISD::MUL, DL, VT, N00, N1), DAG.getNode(ISD::MUL, DL, VT, N01, N1)); } static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; if (Subtarget->isThumb1Only()) return SDValue(); if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); EVT VT = N->getValueType(0); if (VT.is64BitVector() || VT.is128BitVector()) return PerformVMULCombine(N, DCI, Subtarget); if (VT != MVT::i32) return SDValue(); ConstantSDNode *C = dyn_cast(N->getOperand(1)); if (!C) return SDValue(); int64_t MulAmt = C->getSExtValue(); unsigned ShiftAmt = countTrailingZeros(MulAmt); ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); SDLoc DL(N); SDValue Res; MulAmt >>= ShiftAmt; if (MulAmt >= 0) { if (isPowerOf2_32(MulAmt - 1)) { // (mul x, 2^N + 1) => (add (shl x, N), x) Res = DAG.getNode(ISD::ADD, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmt - 1), DL, MVT::i32))); } else if (isPowerOf2_32(MulAmt + 1)) { // (mul x, 2^N - 1) => (sub (shl x, N), x) Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmt + 1), DL, MVT::i32)), V); } else return SDValue(); } else { uint64_t MulAmtAbs = -MulAmt; if (isPowerOf2_32(MulAmtAbs + 1)) { // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) Res = DAG.getNode(ISD::SUB, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmtAbs + 1), DL, MVT::i32))); } else if (isPowerOf2_32(MulAmtAbs - 1)) { // (mul x, -(2^N + 1)) => - (add (shl x, N), x) Res = DAG.getNode(ISD::ADD, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmtAbs - 1), DL, MVT::i32))); Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i32), Res); } else return SDValue(); } if (ShiftAmt != 0) Res = DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getConstant(ShiftAmt, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VbicVT, VT.is128BitVector(), OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); } } } if (!Subtarget->isThumb1Only()) { // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; } return SDValue(); } /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VORR BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN && Subtarget->hasNEON() && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VorrVT, VT.is128BitVector(), OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); } } } if (!Subtarget->isThumb1Only()) { // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } // The code below optimizes (or (and X, Y), Z). // The AND operand needs to have a single user to make these optimizations // profitable. SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); SDValue N1 = N->getOperand(1); // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { APInt SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs) { if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs) { // Ensure that the bit width of the constants are the same and that // the splat arguments are logical inverses as per the pattern we // are trying to simplify. if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && SplatBits0 == ~SplatBits1) { // Canonicalize the vector type to make instruction selection // simpler. EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, N0->getOperand(1), N0->getOperand(0), N1->getOperand(0)); return DAG.getNode(ISD::BITCAST, dl, VT, Result); } } } } // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) // && mask == ~mask2 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) // && ~mask == mask2 // (i.e., copy a bitfield value into another bitfield of the same width) if (VT != MVT::i32) return SDValue(); SDValue N00 = N0.getOperand(0); // The value and the mask need to be constants so we can verify this is // actually a bitfield set. If the mask is 0xffff, we can do better // via a movt instruction, so don't use BFI in that case. SDValue MaskOp = N0.getOperand(1); ConstantSDNode *MaskC = dyn_cast(MaskOp); if (!MaskC) return SDValue(); unsigned Mask = MaskC->getZExtValue(); if (Mask == 0xffff) return SDValue(); SDValue Res; // Case (1): or (and A, mask), val => ARMbfi A, val, mask ConstantSDNode *N1C = dyn_cast(N1); if (N1C) { unsigned Val = N1C->getZExtValue(); if ((Val & ~Mask) != Val) return SDValue(); if (ARM::isBitFieldInvertedMask(Mask)) { Val >>= countTrailingZeros(~Mask); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); unsigned Mask2 = N11C->getZExtValue(); // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern // as is to match. if (ARM::isBitFieldInvertedMask(Mask) && (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a unsigned amt = countTrailingZeros(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b unsigned lsb = countTrailingZeros(Mask); Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } } if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && N00.getOpcode() == ISD::SHL && isa(N00.getOperand(1)) && ARM::isBitFieldInvertedMask(~Mask)) { // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); unsigned ShAmtC = cast(ShAmt)->getZExtValue(); unsigned LSB = countTrailingZeros(Mask); if (ShAmtC != LSB) return SDValue(); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); } return SDValue(); } static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); if (!Subtarget->isThumb1Only()) { // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } return SDValue(); } // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it, // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and // their position in "to" (Rd). static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { assert(N->getOpcode() == ARMISD::BFI); SDValue From = N->getOperand(1); ToMask = ~cast(N->getOperand(2))->getAPIntValue(); FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation()); // If the Base came from a SHR #C, we can deduce that it is really testing bit // #C in the base of the SHR. if (From->getOpcode() == ISD::SRL && isa(From->getOperand(1))) { APInt Shift = cast(From->getOperand(1))->getAPIntValue(); assert(Shift.getLimitedValue() < 32 && "Shift too large!"); FromMask <<= Shift.getLimitedValue(31); From = From->getOperand(0); } return From; } // If A and B contain one contiguous set of bits, does A | B == A . B? // // Neither A nor B must be zero. static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) { unsigned LastActiveBitInA = A.countTrailingZeros(); unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1; return LastActiveBitInA - 1 == FirstActiveBitInB; } static SDValue FindBFIToCombineWith(SDNode *N) { // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with, // if one exists. APInt ToMask, FromMask; SDValue From = ParseBFI(N, ToMask, FromMask); SDValue To = N->getOperand(0); // Now check for a compatible BFI to merge with. We can pass through BFIs that // aren't compatible, but not if they set the same bit in their destination as // we do (or that of any BFI we're going to combine with). SDValue V = To; APInt CombinedToMask = ToMask; while (V.getOpcode() == ARMISD::BFI) { APInt NewToMask, NewFromMask; SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask); if (NewFrom != From) { // This BFI has a different base. Keep going. CombinedToMask |= NewToMask; V = V.getOperand(0); continue; } // Do the written bits conflict with any we've seen so far? if ((NewToMask & CombinedToMask).getBoolValue()) // Conflicting bits - bail out because going further is unsafe. return SDValue(); // Are the new bits contiguous when combined with the old bits? if (BitsProperlyConcatenate(ToMask, NewToMask) && BitsProperlyConcatenate(FromMask, NewFromMask)) return V; if (BitsProperlyConcatenate(NewToMask, ToMask) && BitsProperlyConcatenate(NewFromMask, FromMask)) return V; // We've seen a write to some bits, so track it. CombinedToMask |= NewToMask; // Keep going... V = V.getOperand(0); } return SDValue(); } static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); if (N1.getOpcode() == ISD::AND) { // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff // the bits being cleared by the AND are not demanded by the BFI. ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); unsigned InvMask = cast(N->getOperand(2))->getZExtValue(); unsigned LSB = countTrailingZeros(~InvMask); unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; assert(Width < static_cast(std::numeric_limits::digits) && "undefined behavior"); unsigned Mask = (1u << Width) - 1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) { // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes. // Keep track of any consecutive bits set that all come from the same base // value. We can combine these together into a single BFI. SDValue CombineBFI = FindBFIToCombineWith(N); if (CombineBFI == SDValue()) return SDValue(); // We've found a BFI. APInt ToMask1, FromMask1; SDValue From1 = ParseBFI(N, ToMask1, FromMask1); APInt ToMask2, FromMask2; SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); assert(From1 == From2); (void)From2; // First, unlink CombineBFI. DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0)); // Then create a new BFI, combining the two together. APInt NewFromMask = FromMask1 | FromMask2; APInt NewToMask = ToMask1 | ToMask2; EVT VT = N->getValueType(0); SDLoc dl(N); if (NewFromMask[0] == 0) From1 = DCI.DAG.getNode( ISD::SRL, dl, VT, From1, DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1, DCI.DAG.getConstant(~NewToMask, dl, VT)); } return SDValue(); } /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) SDNode *InNode = InDouble.getNode(); if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && InNode->getValueType(0) == MVT::f64 && InNode->getOperand(1).getOpcode() == ISD::FrameIndex && !cast(InNode)->isVolatile()) { // TODO: Should this be done for non-FrameIndex operands? LoadSDNode *LD = cast(InNode); SelectionDAG &DAG = DCI.DAG; SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); SDValue NewLD2 = DAG.getLoad( MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); if (DCI.DAG.getDataLayout().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); return Result; } return SDValue(); } /// PerformVMOVDRRCombine - Target-specific dag combine xforms for /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::BITCAST) Op1 = Op1.getOperand(0); if (Op0.getOpcode() == ARMISD::VMOVRRD && Op0.getNode() == Op1.getNode() && Op0.getResNo() == 0 && Op1.getResNo() == 1) return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0.getOperand(0)); return SDValue(); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node /// are normal, non-volatile loads. If so, it is profitable to bitcast an /// i64 vector to have f64 elements, since the value can then be loaded /// directly into a VFP register. static bool hasNormalLoadOperand(SDNode *N) { unsigned NumElts = N->getValueType(0).getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ISD::isNormalLoad(Elt) && !cast(Elt)->isVolatile()) return true; } return false; } /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. SelectionDAG &DAG = DCI.DAG; if (N->getNumOperands() == 2) if (SDValue RV = PerformVMOVDRRCombine(N, DAG)) return RV; // Load i64 elements as f64 values so that type legalization does not split // them up into i32 values. EVT VT = N->getValueType(0); if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) return SDValue(); SDLoc dl(N); SmallVector Ops; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); Ops.push_back(V); // Make the DAGCombiner fold the bitcast. DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. // At that time, we may have inserted bitcasts from integer to float. // If these bitcasts have survived DAGCombine, change the lowering of this // BUILD_VECTOR in something more vector friendly, i.e., that does not // force to use floating point types. // Make sure we can change the type of the vector. // This is possible iff: // 1. The vector is only used in a bitcast to a integer type. I.e., // 1.1. Vector is used only once. // 1.2. Use is a bit convert to an integer type. // 2. The size of its operands are 32-bits (64-bits are not legal). EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); // Check 1.1. and 2. if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) return SDValue(); // By construction, the input type must be float. assert(EltVT == MVT::f32 && "Unexpected type!"); // Check 1.2. SDNode *Use = *N->use_begin(); if (Use->getOpcode() != ISD::BITCAST || Use->getValueType(0).isFloatingPoint()) return SDValue(); // Check profitability. // Model is, if more than half of the relevant operands are bitcast from // i32, turn the build_vector into a sequence of insert_vector_elt. // Relevant operands are everything that is not statically // (i.e., at compile time) bitcasted. unsigned NumOfBitCastedElts = 0; unsigned NumElts = VT.getVectorNumElements(); unsigned NumOfRelevantElts = NumElts; for (unsigned Idx = 0; Idx < NumElts; ++Idx) { SDValue Elt = N->getOperand(Idx); if (Elt->getOpcode() == ISD::BITCAST) { // Assume only bit cast to i32 will go away. if (Elt->getOperand(0).getValueType() == MVT::i32) ++NumOfBitCastedElts; } else if (Elt.isUndef() || isa(Elt)) // Constants are statically casted, thus do not count them as // relevant operands. --NumOfRelevantElts; } // Check if more than half of the elements require a non-free bitcast. if (NumOfBitCastedElts <= NumOfRelevantElts / 2) return SDValue(); SelectionDAG &DAG = DCI.DAG; // Create the new vector type. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); // Check if the type is legal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(VecVT)) return SDValue(); // Combine: // ARMISD::BUILD_VECTOR E1, E2, ..., EN. // => BITCAST INSERT_VECTOR_ELT // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), // (BITCAST EN), N. SDValue Vec = DAG.getUNDEF(VecVT); SDLoc dl(N); for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { SDValue V = N->getOperand(Idx); if (V.isUndef()) continue; if (V.getOpcode() == ISD::BITCAST && V->getOperand(0).getValueType() == MVT::i32) // Fold obvious case. V = V.getOperand(0); else { V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); } Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); return Vec; } /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Bitcast an i64 load inserted into a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. EVT VT = N->getValueType(0); SDNode *Elt = N->getOperand(1).getNode(); if (VT.getVectorElementType() != MVT::i64 || !ISD::isNormalLoad(Elt) || cast(Elt)->isVolatile()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VT.getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); DCI.AddToWorklist(V.getNode()); SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, Vec, V, N->getOperand(2)); return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); } /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for /// ISD::VECTOR_SHUFFLE. static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { // The LLVM shufflevector instruction does not require the shuffle mask // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the // operands do not match the mask length, they are extended by concatenating // them with undef vectors. That is probably the right thing for other // targets, but for NEON it is better to concatenate two double-register // size vector operands into a single quad-register size vector. Do that // transformation here: // shuffle(concat(v1, undef), concat(v2, undef)) -> // shuffle(concat(v1, v2), undef) SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::CONCAT_VECTORS || Op1.getOpcode() != ISD::CONCAT_VECTORS || Op0.getNumOperands() != 2 || Op1.getNumOperands() != 2) return SDValue(); SDValue Concat0Op1 = Op0.getOperand(1); SDValue Concat1Op1 = Op1.getOperand(1); if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef()) return SDValue(); // Skip the transformation if any of the types are illegal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(Concat0Op1.getValueType()) || !TLI.isTypeLegal(Concat1Op1.getValueType())) return SDValue(); SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Op0.getOperand(0), Op1.getOperand(0)); // Translate the shuffle mask. SmallVector NewMask; unsigned NumElts = VT.getVectorNumElements(); unsigned HalfElts = NumElts/2; ShuffleVectorSDNode *SVN = cast(N); for (unsigned n = 0; n < NumElts; ++n) { int MaskElt = SVN->getMaskElt(n); int NewElt = -1; if (MaskElt < (int)HalfElts) NewElt = MaskElt; else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) NewElt = HalfElts + MaskElt - NumElts; NewMask.push_back(NewElt); } return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, DAG.getUNDEF(VT), NewMask); } /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, /// NEON load/store intrinsics, and generic vector load/stores, to merge /// base address updates. /// For generic load/stores, the memory type is assumed to be a vector. /// The caller is assumed to have checked legality. static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || N->getOpcode() == ISD::INTRINSIC_W_CHAIN); const bool isStore = N->getOpcode() == ISD::STORE; const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); SDValue Addr = N->getOperand(AddrOpIdx); MemSDNode *MemN = cast(N); SDLoc dl(N); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = Addr.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User->getOpcode() != ISD::ADD || UI.getUse().getResNo() != Addr.getResNo()) continue; // Check that the add is independent of the load/store. Otherwise, folding // it would create a cycle. if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) continue; // Find the new opcode for the updating load/store. bool isLoadOp = true; bool isLaneOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; if (isIntrinsic) { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; NumVecs = 2; break; case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; NumVecs = 3; break; case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; NumVecs = 4; break; case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; NumVecs = 2; isLaneOp = true; break; case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; NumVecs = 3; isLaneOp = true; break; case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; NumVecs = 4; isLaneOp = true; break; case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; NumVecs = 1; isLoadOp = false; break; case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; NumVecs = 2; isLoadOp = false; break; case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; NumVecs = 3; isLoadOp = false; break; case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; NumVecs = 4; isLoadOp = false; break; case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; NumVecs = 2; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; NumVecs = 3; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; NumVecs = 4; isLoadOp = false; isLaneOp = true; break; } } else { isLaneOp = true; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break; case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; isLaneOp = false; break; case ISD::STORE: NewOpc = ARMISD::VST1_UPD; NumVecs = 1; isLaneOp = false; isLoadOp = false; break; } } // Find the size of memory referenced by the load/store. EVT VecTy; if (isLoadOp) { VecTy = N->getValueType(0); } else if (isIntrinsic) { VecTy = N->getOperand(AddrOpIdx+1).getValueType(); } else { assert(isStore && "Node has to be a load, a store, or an intrinsic!"); VecTy = N->getOperand(1).getValueType(); } unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) NumBytes /= VecTy.getVectorNumElements(); // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { uint64_t IncVal = CInc->getZExtValue(); if (IncVal != NumBytes) continue; } else if (NumBytes >= 3 * 16) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; } // OK, we found an ADD we can fold into the base update. // Now, create a _UPD node, taking care of not breaking alignment. EVT AlignedVecTy = VecTy; unsigned Alignment = MemN->getAlignment(); // If this is a less-than-standard-aligned load/store, change the type to // match the standard alignment. // The alignment is overlooked when selecting _UPD variants; and it's // easier to introduce bitcasts here than fix that. // There are 3 ways to get to this base-update combine: // - intrinsics: they are assumed to be properly aligned (to the standard // alignment of the memory type), so we don't need to do anything. // - ARMISD::VLDx nodes: they are only generated from the aforementioned // intrinsics, so, likewise, there's nothing to do. // - generic load/store instructions: the alignment is specified as an // explicit operand, rather than implicitly as the standard alignment // of the memory type (like the intrisics). We need to change the // memory type to match the explicit alignment. That way, we don't // generate non-standard-aligned ARMISD::VLDx nodes. if (isa(N)) { if (Alignment == 0) Alignment = 1; if (Alignment < VecTy.getScalarSizeInBits() / 8) { MVT EltTy = MVT::getIntegerVT(Alignment * 8); assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); assert(!isLaneOp && "Unexpected generic load/store lane."); unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); } // Don't set an explicit alignment on regular load/stores that we want // to transform to VLD/VST 1_UPD nodes. // This matches the behavior of regular load/stores, which only get an // explicit alignment if the MMO alignment is larger than the standard // alignment of the memory type. // Intrinsics, however, always get an explicit alignment, set to the // alignment of the MMO. Alignment = 1; } // Create the new updating load/store node. // First, create an SDVTList for the new updating node's results. EVT Tys[6]; unsigned NumResultVecs = (isLoadOp ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) Tys[n] = AlignedVecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); // Then, gather the new node's operands. SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); Ops.push_back(Inc); if (StoreSDNode *StN = dyn_cast(N)) { // Try to match the intrinsic's signature Ops.push_back(StN->getValue()); } else { // Loads (and of course intrinsics) match the intrinsics' signature, // so just add all but the alignment operand. for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i) Ops.push_back(N->getOperand(i)); } // For all node types, the alignment operand is always the last one. Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32)); // If this is a non-standard-aligned STORE, the penultimate operand is the // stored value. Bitcast it to the aligned type. if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { SDValue &StVal = Ops[Ops.size()-2]; StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal); } EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy; SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT, MemN->getMemOperand()); // Update the uses. SmallVector NewResults; for (unsigned i = 0; i < NumResultVecs; ++i) NewResults.push_back(SDValue(UpdN.getNode(), i)); // If this is an non-standard-aligned LOAD, the first result is the loaded // value. Bitcast it to the expected result type. if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { SDValue &LdVal = NewResults[0]; LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal); } NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; } return SDValue(); } static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); return CombineBaseUpdate(N, DCI); } /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and /// return true. static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); // vldN-dup instructions only support 64-bit vectors for N > 1. if (!VT.is64BitVector()) return false; // Check if the VDUPLANE operand is a vldN-dup intrinsic. SDNode *VLD = N->getOperand(0).getNode(); if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) return false; unsigned NumVecs = 0; unsigned NewOpc = 0; unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); if (IntNo == Intrinsic::arm_neon_vld2lane) { NumVecs = 2; NewOpc = ARMISD::VLD2DUP; } else if (IntNo == Intrinsic::arm_neon_vld3lane) { NumVecs = 3; NewOpc = ARMISD::VLD3DUP; } else if (IntNo == Intrinsic::arm_neon_vld4lane) { NumVecs = 4; NewOpc = ARMISD::VLD4DUP; } else { return false; } // First check that all the vldN-lane uses are VDUPLANEs and that the lane // numbers match the load. unsigned VLDLaneNo = cast(VLD->getOperand(NumVecs+3))->getZExtValue(); for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { // Ignore uses of the chain result. if (UI.getUse().getResNo() == NumVecs) continue; SDNode *User = *UI; if (User->getOpcode() != ARMISD::VDUPLANE || VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) return false; } // Create the vldN-dup node. EVT Tys[5]; unsigned n; for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { unsigned ResNo = UI.getUse().getResNo(); // Ignore uses of the chain result. if (ResNo == NumVecs) continue; SDNode *User = *UI; DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); } // Now the vldN-lane intrinsic is dead except for its chain result. // Update uses of the chain. std::vector VLDDupResults; for (unsigned n = 0; n < NumVecs; ++n) VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); DCI.CombineTo(VLD, VLDDupResults); return true; } /// PerformVDUPLANECombine - Target-specific dag combine xforms for /// ARMISD::VDUPLANE. static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue Op = N->getOperand(0); // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. if (CombineVLDDUP(N, DCI)) return SDValue(N, 0); // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is // redundant. Ignore bit_converts for now; element sizes are checked below. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) return SDValue(); // Make sure the VMOV element size is not bigger than the VDUPLANE elements. unsigned EltSize = Op.getScalarValueSizeInBits(); // The canonical VMOV for a zero vector uses a 32-bit element size. unsigned Imm = cast(Op.getOperand(0))->getZExtValue(); unsigned EltBits; if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) EltSize = 8; EVT VT = N->getValueType(0); if (EltSize > VT.getScalarSizeInBits()) return SDValue(); return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP. static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; SDValue Op = N->getOperand(0); // Match VDUP(LOAD) -> VLD1DUP. // We match this pattern here rather than waiting for isel because the // transform is only legal for unindexed loads. LoadSDNode *LD = dyn_cast(Op.getNode()); if (LD && Op.hasOneUse() && LD->isUnindexed() && LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1), DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) }; SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops, LD->getMemoryVT(), LD->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1)); return VLDDup; } return SDValue(); } static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); // If this is a legal vector load, try to combine it into a VLD1_UPD. if (ISD::isNormalLoad(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); return SDValue(); } /// PerformSTORECombine - Target-specific dag combine xforms for /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { StoreSDNode *St = cast(N); if (St->isVolatile()) return SDValue(); // Optimize trunc store (of multiple scalars) to shuffle and store. First, // pack all of the elements in one place. Next, store to memory in fewer // chunks. SDValue StVal = St->getValue(); EVT VT = StVal.getValueType(); if (St->isTruncatingStore() && VT.isVector()) { SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT StVT = St->getMemoryVT(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromEltSz = VT.getScalarSizeInBits(); unsigned ToEltSz = StVT.getScalarSizeInBits(); // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); unsigned SizeRatio = FromEltSz / ToEltSz; assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. // Find the largest store unit MVT StoreType = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) StoreType = Tp; } // Didn't find a legal store type. if (!TLI.isTypeLegal(StoreType)) return SDValue(); // Bitcast the original vector into a vector of store-size units EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); SmallVector Chains; SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, TLI.getPointerTy(DAG.getDataLayout())); SDValue BasePtr = St->getBasePtr(); // Perform one or more big stores into memory. unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); for (unsigned I = 0; I < E; I++) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) return SDValue(); // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore( St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0), BasePtr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), std::min(4U, St->getAlignment() / 2), St->getMemOperand()->getFlags()); } if (StVal.getValueType() == MVT::i64 && StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { // Bitcast an i64 store extracted from a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; SDLoc dl(StVal); SDValue IntVec = StVal.getOperand(0); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, IntVec.getValueType().getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Vec, StVal.getOperand(1)); dl = SDLoc(N); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags(), St->getAAInfo()); } // If this is a legal vector store, try to combine it into a VST1_UPD. if (ISD::isNormalStore(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); return SDValue(); } /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) /// can replace combinations of VMUL and VCVT (floating-point to integer) /// when the VMUL has a constant operand that is a power of 2. /// /// Example (assume d17 = ): /// vmul.f32 d16, d17, d16 /// vcvt.s32.f32 d16, d16 /// becomes: /// vcvt.s32.f32 d16, d16, #3 static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL) return SDValue(); SDValue ConstVec = Op->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions // only support v2i32/v4i32 types. return SDValue(); } BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; SDValue FixConv = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0), DAG.getConstant(C, dl, MVT::i32)); if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) /// can replace combinations of VCVT (integer to floating-point) and VDIV /// when the VDIV has a constant operand that is a power of 2. /// /// Example (assume d17 = ): /// vcvt.f32.s32 d16, d16 /// vdiv.f32 d16, d17, d16 /// becomes: /// vcvt.f32.s32 d16, d16, #3 static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); SDValue ConstVec = N->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from i32 to f32. We can handle // smaller integers by generating an extra extend, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions // only support v2i32/v4i32 types. return SDValue(); } BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); bool isSigned = OpOpcode == ISD::SINT_TO_FP; SDValue ConvInput = Op.getOperand(0); if (IntBits < FloatBits) ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput); unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), ConvInput, DAG.getConstant(C, dl, MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { // Ignore bit_converts. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ElementBits) || SplatBitSize > ElementBits) return false; Cnt = SplatBits.getSExtValue(); return true; } /// isVShiftLImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift left operation. That value must be in the range: /// 0 <= Value < ElementBits for a left shift; or /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); } /// isVShiftRImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift right operation. For a shift opcode, the value /// is positive, but for an intrinsic the value count must be negative. The /// absolute value must be in the range: /// 1 <= |Value| <= ElementBits for a right shift; or /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; if (!isIntrinsic) return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) { Cnt = -Cnt; return true; } return false; } /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); switch (IntNo) { default: // Don't do anything for most intrinsics. break; // Vector shifts: check for immediate versions and lower them. // Note: This is done during DAG combining instead of DAG legalizing because // the build_vectors for 64-bit vector element shift counts are generally // not legal, and it is hard to see their values after they get legalized to // loads from a constant pool. case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshifts: case Intrinsic::arm_neon_vqshiftu: case Intrinsic::arm_neon_vqshiftsu: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: case Intrinsic::arm_neon_vqshiftnsu: case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: { EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; switch (IntNo) { case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { VShiftOpc = ARMISD::VSHL; break; } if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRs : ARMISD::VSHRu); break; } return SDValue(); case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) break; return SDValue(); case Intrinsic::arm_neon_vqshifts: case Intrinsic::arm_neon_vqshiftu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; return SDValue(); case Intrinsic::arm_neon_vqshiftsu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; llvm_unreachable("invalid shift count for vqshlu intrinsic"); case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: case Intrinsic::arm_neon_vqshiftnsu: case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; llvm_unreachable("invalid shift count for narrowing vector shift " "intrinsic"); default: llvm_unreachable("unhandled vector shift"); } switch (IntNo) { case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: // Opcode already set above. break; case Intrinsic::arm_neon_vrshifts: VShiftOpc = ARMISD::VRSHRs; break; case Intrinsic::arm_neon_vrshiftu: VShiftOpc = ARMISD::VRSHRu; break; case Intrinsic::arm_neon_vrshiftn: VShiftOpc = ARMISD::VRSHRN; break; case Intrinsic::arm_neon_vqshifts: VShiftOpc = ARMISD::VQSHLs; break; case Intrinsic::arm_neon_vqshiftu: VShiftOpc = ARMISD::VQSHLu; break; case Intrinsic::arm_neon_vqshiftsu: VShiftOpc = ARMISD::VQSHLsu; break; case Intrinsic::arm_neon_vqshiftns: VShiftOpc = ARMISD::VQSHRNs; break; case Intrinsic::arm_neon_vqshiftnu: VShiftOpc = ARMISD::VQSHRNu; break; case Intrinsic::arm_neon_vqshiftnsu: VShiftOpc = ARMISD::VQSHRNsu; break; case Intrinsic::arm_neon_vqrshiftns: VShiftOpc = ARMISD::VQRSHRNs; break; case Intrinsic::arm_neon_vqrshiftnu: VShiftOpc = ARMISD::VQRSHRNu; break; case Intrinsic::arm_neon_vqrshiftnsu: VShiftOpc = ARMISD::VQRSHRNsu; break; } SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, N->getValueType(0), N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32)); } case Intrinsic::arm_neon_vshiftins: { EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) VShiftOpc = ARMISD::VSLI; else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) VShiftOpc = ARMISD::VSRI; else { llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, N->getValueType(0), N->getOperand(1), N->getOperand(2), DAG.getConstant(Cnt, dl, MVT::i32)); } case Intrinsic::arm_neon_vqrshifts: case Intrinsic::arm_neon_vqrshiftu: // No immediate versions of these to check for. break; } return SDValue(); } /// PerformShiftCombine - Checks for immediate versions of vector shifts and /// lowers them. As with the vector shift intrinsics, this is done during DAG /// combining instead of DAG legalizing because the build_vectors for 64-bit /// vector element shift counts are generally not legal, and it is hard to see /// their values after they get legalized to loads from a constant pool. static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. SDValue N1 = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(N1)) { SDValue N0 = N->getOperand(0); if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && DAG.MaskedValueIsZero(N0.getOperand(0), APInt::getHighBitsSet(32, 16))) return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); } } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); assert(ST->hasNEON() && "unexpected vector shift"); int64_t Cnt; switch (N->getOpcode()) { default: llvm_unreachable("unexpected shift opcode"); case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) { SDLoc dl(N); return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0), DAG.getConstant(Cnt, dl, MVT::i32)); } break; case ISD::SRA: case ISD::SRL: if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? ARMISD::VSHRs : ARMISD::VSHRu); SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), DAG.getConstant(Cnt, dl, MVT::i32)); } } return SDValue(); } /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDValue N0 = N->getOperand(0); // Check for sign- and zero-extensions of vector extract operations of 8- // and 16-bit vector elements. NEON supports these directly. They are // handled during DAG combining because type legalization will promote them // to 32-bit types and it is messy to recognize the operations after that. if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue Vec = N0.getOperand(0); SDValue Lane = N0.getOperand(1); EVT VT = N->getValueType(0); EVT EltVT = N0.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (VT == MVT::i32 && (EltVT == MVT::i8 || EltVT == MVT::i16) && TLI.isTypeLegal(Vec.getValueType()) && isa(Lane)) { unsigned Opc = 0; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode"); case ISD::SIGN_EXTEND: Opc = ARMISD::VGETLANEs; break; case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Opc = ARMISD::VGETLANEu; break; } return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); } } return SDValue(); } static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, APInt &KnownOne) { if (Op.getOpcode() == ARMISD::BFI) { // Conservatively, we can recurse down the first operand // and just mask out all affected bits. computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); // The operand to BFI is already a mask suitable for removing the bits it // sets. ConstantSDNode *CI = cast(Op.getOperand(2)); const APInt &Mask = CI->getAPIntValue(); KnownZero &= Mask; KnownOne &= Mask; return; } if (Op.getOpcode() == ARMISD::CMOV) { APInt KZ2(KnownZero.getBitWidth(), 0); APInt KO2(KnownOne.getBitWidth(), 0); computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); KnownZero &= KZ2; KnownOne &= KO2; return; } return DAG.computeKnownBits(Op, KnownZero, KnownOne); } SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) // y |= CM; // // And: // * CN is a single bit; // * All bits covered by CM are known zero in y // // Then we can convert this into a sequence of BFI instructions. This will // always be a win if CM is a single bit, will always be no worse than the // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is // three bits (due to the extra IT instruction). SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); auto CCNode = cast(CMOV->getOperand(2)); auto CC = CCNode->getAPIntValue().getLimitedValue(); SDValue CmpZ = CMOV->getOperand(4); // The compare must be against zero. if (!isNullConstant(CmpZ->getOperand(1))) return SDValue(); assert(CmpZ->getOpcode() == ARMISD::CMPZ); SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); if (!AndC || !AndC->getAPIntValue().isPowerOf2()) return SDValue(); SDValue X = And->getOperand(0); if (CC == ARMCC::EQ) { // We're performing an "equal to zero" compare. Swap the operands so we // canonicalize on a "not equal to zero" compare. std::swap(Op0, Op1); } else { assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); } if (Op1->getOpcode() != ISD::OR) return SDValue(); ConstantSDNode *OrC = dyn_cast(Op1->getOperand(1)); if (!OrC) return SDValue(); SDValue Y = Op1->getOperand(0); if (Op0 != Y) return SDValue(); // Now, is it profitable to continue? APInt OrCI = OrC->getAPIntValue(); unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; if (OrCI.countPopulation() > Heuristic) return SDValue(); // Lastly, can we determine that the bits defined by OrCI // are zero in Y? APInt KnownZero, KnownOne; computeKnownBits(DAG, Y, KnownZero, KnownOne); if ((OrCI & KnownZero) != OrCI) return SDValue(); // OK, we can do the combine. SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); unsigned BitInX = AndC->getAPIntValue().logBase2(); if (BitInX != 0) { // We must shift X first. X = DAG.getNode(ISD::SRL, dl, VT, X, DAG.getConstant(BitInX, dl, VT)); } for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); BitInY < NumActiveBits; ++BitInY) { if (OrCI[BitInY] == 0) continue; APInt Mask(VT.getSizeInBits(), 0); Mask.setBit(BitInY); V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, // Confusingly, the operand is an *inverted* mask. DAG.getConstant(~Mask, dl, VT)); } return V; } /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at NE cases. return SDValue(); EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue Chain = N->getOperand(0); SDValue BB = N->getOperand(1); SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) // -> (brcond Chain BB CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && LHS->getOperand(0)->hasOneUse()) { auto *LHS00C = dyn_cast(LHS->getOperand(0)->getOperand(0)); auto *LHS01C = dyn_cast(LHS->getOperand(0)->getOperand(1)); auto *LHS1C = dyn_cast(LHS->getOperand(1)); auto *RHSC = dyn_cast(RHS); if ((LHS00C && LHS00C->getZExtValue() == 0) && (LHS01C && LHS01C->getZExtValue() == 1) && (LHS1C && LHS1C->getZExtValue() == 1) && (RHSC && RHSC->getZExtValue() == 0)) { return DAG.getNode( ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); } } return SDValue(); } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at EQ and NE cases. return SDValue(); EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); // BFI is only available on V6T2+. if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { SDValue R = PerformCMOVToBFICombine(N, DAG); if (R) return R; } // Simplify // mov r1, r0 // cmp r1, x // mov r0, y // moveq r0, x // to // cmp r0, x // movne r0, y // // mov r1, r0 // cmp r1, x // mov r0, x // movne r0, y // to // cmp r0, x // movne r0, y /// FIXME: Turn this into a target neutral optimization? SDValue Res; if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, N->getOperand(3), Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { SDValue ARMcc; SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, N->getOperand(3), NewCmp); } // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) // -> (cmov F T CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) { auto *LHS0C = dyn_cast(LHS->getOperand(0)); auto *LHS1C = dyn_cast(LHS->getOperand(1)); auto *RHSC = dyn_cast(RHS); if ((LHS0C && LHS0C->getZExtValue() == 0) && (LHS1C && LHS1C->getZExtValue() == 1) && (RHSC && RHSC->getZExtValue() == 0)) { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, LHS->getOperand(2), LHS->getOperand(3), LHS->getOperand(4)); } } if (Res.getNode()) { APInt KnownZero, KnownOne; DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i1)); else if (KnownZero == 0xffffff00) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i8)); else if (KnownZero == 0xffff0000) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i16)); } return Res; } SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ARMISD::VDUP: return PerformVDUPCombine(N, DCI); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI.DAG, Subtarget); case ISD::FDIV: return PerformVDIVCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD1DUP: case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: case Intrinsic::arm_neon_vld4: case Intrinsic::arm_neon_vld2lane: case Intrinsic::arm_neon_vld3lane: case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: case Intrinsic::arm_neon_vst4: case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: return PerformVLDCombine(N, DCI); default: break; } break; } return SDValue(); } bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const { return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: case MVT::i16: case MVT::i32: { // Unaligned access can use (for example) LRDB, LRDH, LDR if (AllowsUnaligned) { if (Fast) *Fast = Subtarget->hasV7Ops(); return true; } return false; } case MVT::f64: case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { if (Fast) *Fast = true; return true; } return false; } } } static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck) { return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && (DstAlign == 0 || DstAlign % AlignCheck == 0)); } EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && Fast))) { return MVT::f64; } } // Lowering to i32/i16 if the size permits. if (Size >= 4) return MVT::i32; else if (Size >= 2) return MVT::i16; // Let the target-independent logic figure it out. return MVT::Other; } bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; EVT VT1 = Val.getValueType(); if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() || !VT2.isInteger()) return false; switch (VT1.getSimpleVT().SimpleTy) { default: break; case MVT::i1: case MVT::i8: case MVT::i16: // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. return true; } return false; } bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { EVT VT = ExtVal.getValueType(); if (!isTypeLegal(VT)) return false; // Don't create a loadext if we can fold the extension into a wide/long // instruction. // If there's more than one user instruction, the loadext is desirable no // matter what. There can be two uses by the same instruction. if (ExtVal->use_empty() || !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) return true; SDNode *U = *ExtVal->use_begin(); if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL)) return false; return true; } bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; if (!isTypeLegal(EVT::getEVT(Ty1))) return false; assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); // Assuming the caller doesn't have a zeroext or signext return parameter, // truncation all the way down to i1 is valid. return true; } int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { if (isLegalAddressingMode(DL, AM, Ty, AS)) { if (Subtarget->hasFPAO()) return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster return 0; } return -1; } static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; unsigned Scale = 1; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: // Scale == 1; break; case MVT::i16: // Scale == 2; Scale = 2; break; case MVT::i32: // Scale == 4; Scale = 4; break; } if ((V & (Scale - 1)) != 0) return false; V /= Scale; return V == (V & ((1LL << 5) - 1)); } static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { bool isNeg = false; if (V < 0) { isNeg = true; V = - V; } switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: // + imm12 or - imm8 if (isNeg) return V == (V & ((1LL << 8) - 1)); return V == (V & ((1LL << 12) - 1)); case MVT::f32: case MVT::f64: // Same as ARM mode. FIXME: NEON? if (!Subtarget->hasVFP2()) return false; if ((V & 3) != 0) return false; V >>= 2; return V == (V & ((1LL << 8) - 1)); } } /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { if (V == 0) return true; if (!VT.isSimple()) return false; if (Subtarget->isThumb1Only()) return isLegalT1AddressImmediate(V, VT); else if (Subtarget->isThumb2()) return isLegalT2AddressImmediate(V, VT, Subtarget); // ARM mode. if (V < 0) V = - V; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: // +- imm12 return V == (V & ((1LL << 12) - 1)); case MVT::i16: // +- imm8 return V == (V & ((1LL << 8) - 1)); case MVT::f32: case MVT::f64: if (!Subtarget->hasVFP2()) // FIXME: NEON? return false; if ((V & 3) != 0) return false; V >>= 2; return V == (V & ((1LL << 8) - 1)); } } bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const { int Scale = AM.Scale; if (Scale < 0) return false; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: if (Scale == 1) return true; // r + r << imm Scale = Scale & ~1; return Scale == 2 || Scale == 4 || Scale == 8; case MVT::i64: // r + r if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; return false; case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. if (Scale & 1) return false; return isPowerOf2_32(Scale); } } /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; // Can never fold addr of global into load/store. if (AM.BaseGV) return false; switch (AM.Scale) { case 0: // no scale reg, must be "r+i" or "r", or "i". break; case 1: if (Subtarget->isThumb1Only()) return false; LLVM_FALLTHROUGH; default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) return false; if (!VT.isSimple()) return false; if (Subtarget->isThumb2()) return isLegalT2ScaledAddressingMode(AM, VT); int Scale = AM.Scale; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: if (Scale < 0) Scale = -Scale; if (Scale == 1) return true; // r + r << imm return isPowerOf2_32(Scale & ~1); case MVT::i16: case MVT::i64: // r + r if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; return false; case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. if (Scale & 1) return false; return isPowerOf2_32(Scale); } } return true; } /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can compare /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(std::abs(Imm)) != -1; if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1; // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } /// isLegalAddImmediate - Return true if the specified immediate is a legal add /// *or sub* immediate, that is the target has add or sub instructions which can /// add a register with the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Same encoding for add/sub, just flip the sign. int64_t AbsImm = std::abs(Imm); if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(AbsImm) != -1; if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal(AbsImm) != -1; // Thumb1 only has 8-bit unsigned immediate. return AbsImm >= 0 && AbsImm <= 255; } static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { // AddressingMode 3 Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -256) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } } isInc = (Ptr->getOpcode() == ISD::ADD); Offset = Ptr->getOperand(1); return true; } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { // AddressingMode 2 if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x1000) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); Base = Ptr->getOperand(0); return true; } } if (Ptr->getOpcode() == ISD::ADD) { isInc = true; ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { Base = Ptr->getOperand(1); Offset = Ptr->getOperand(0); } else { Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); } return true; } isInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); return true; } // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. return false; } static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x100) { // 8 bits. assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. isInc = Ptr->getOpcode() == ISD::ADD; Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } } return false; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (Subtarget->isThumb1Only()) return false; EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); } else return false; bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); else isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; return true; } /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); isNonExt = !ST->isTruncatingStore(); } else return false; if (Subtarget->isThumb1Only()) { // Thumb-1 can do a limited post-inc load or store as an updating LDM. It // must be non-extending/truncating, i32, with an offset of 4. assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"); if (Op->getOpcode() != ISD::ADD || !isNonExt) return false; auto *RHS = dyn_cast(Op->getOperand(1)); if (!RHS || RHS->getZExtValue() != 4) return false; Offset = Op->getOperand(1); Base = Op->getOperand(0); AM = ISD::POST_INC; return true; } bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; if (Ptr != Base) { // Swap base ptr and offset to catch more post-index load / store when // it's legal. In Thumb2 mode, offset must be an immediate. if (Ptr == Offset && Op->getOpcode() == ISD::ADD && !Subtarget->isThumb2()) std::swap(Base, Offset); // Post-indexed load / store update the base pointer. if (Ptr != Base) return false; } AM = isInc ? ISD::POST_INC : ISD::POST_DEC; return true; } void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: // These nodes' second result is a boolean if (Op.getResNo() == 0) break; KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { default: return; case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } } } } //===----------------------------------------------------------------------===// // ARM Inline Assembly Support //===----------------------------------------------------------------------===// bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { // Looking for "rev" which is V6+. if (!Subtarget->hasV6Ops()) return false; InlineAsm *IA = cast(CI->getCalledValue()); std::string AsmStr = IA->getAsmString(); SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; case 1: AsmStr = AsmPieces[0]; AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t,"); // rev $0, $1 if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && IA->getConstraintString().compare(0, 4, "=l,l") == 0) { IntegerType *Ty = dyn_cast(CI->getType()); if (Ty && Ty->getBitWidth() == 32) return IntrinsicLowering::LowerToByteSwap(CI); } break; } return false; } const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { // At this point, we have to lower this constraint to something else, so we // lower it to an "r" or "w". However, by doing this we will force the result // to be in register, while the X constraint is much more permissive. // // Although we are correct (we are free to emit anything, without // constraints), we might break use cases that would expect us to be more // efficient and emit something else. if (!Subtarget->hasVFP2()) return "r"; if (ConstraintVT.isFloatingPoint()) return "w"; if (ConstraintVT.isVector() && Subtarget->hasNEON() && (ConstraintVT.getSizeInBits() == 64 || ConstraintVT.getSizeInBits() == 128)) return "w"; return "r"; } /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; case 'w': return C_RegisterClass; case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; case 'j': return C_Other; // Constant for movw. // An address with a single base register. Due to the way we // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { default: break; // All 'U+' constraints are addresses. case 'U': return C_Memory; } } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight ARMTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'l': if (type->isIntegerTy()) { if (Subtarget->isThumb()) weight = CW_SpecificReg; else weight = CW_Register; } break; case 'w': if (type->isFloatingPointTy()) weight = CW_Register; break; } return weight; } typedef std::pair RCPair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { case 'l': // Low regs or general regs. if (Subtarget->isThumb()) return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) return RCPair(0U, &ARM::hGPRRegClass); break; case 'r': if (Subtarget->isThumb1Only()) return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'w': if (VT == MVT::Other) break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPRRegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPRRegClass); break; case 'x': if (VT == MVT::Other) break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_8RegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPR_8RegClass); break; case 't': if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); break; } } if (StringRef("{cc}").equals_lower(Constraint)) return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'j': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': ConstantSDNode *C = dyn_cast(Op); if (!C) return; int64_t CVal64 = C->getSExtValue(); int CVal = (int) CVal64; // None of these constraints allow values larger than 32 bits. Check // that the value fits in an int. if (CVal != CVal64) return; switch (ConstraintLetter) { case 'j': // Constant suitable for movw, must be between 0 and // 65535. if (Subtarget->hasV6T2Ops()) if (CVal >= 0 && CVal <= 65535) break; return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD // immediates. if (CVal >= 0 && CVal <= 255) break; } else if (Subtarget->isThumb2()) { // A constant that can be used as an immediate value in a // data-processing instruction. if (ARM_AM::getT2SOImmVal(CVal) != -1) break; } else { // A constant that can be used as an immediate value in a // data-processing instruction. if (ARM_AM::getSOImmVal(CVal) != -1) break; } return; case 'J': if (Subtarget->isThumb1Only()) { // This must be a constant between -255 and -1, for negated ADD // immediates. This can be used in GCC with an "n" modifier that // prints the negated value, for use with SUB instructions. It is // not useful otherwise but is implemented for compatibility. if (CVal >= -255 && CVal <= -1) break; } else { // This must be a constant between -4095 and 4095. It is not clear // what this constraint is intended for. Implemented for // compatibility with GCC. if (CVal >= -4095 && CVal <= 4095) break; } return; case 'K': if (Subtarget->isThumb1Only()) { // A 32-bit value where only one byte has a nonzero value. Exclude // zero to match GCC. This constraint is used by GCC internally for // constants that can be loaded with a move/shift combination. // It is not useful otherwise but is implemented for compatibility. if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) break; } else if (Subtarget->isThumb2()) { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC // with a "B" modifier that prints the inverted value, for use with // BIC and MVN instructions. It is not useful otherwise but is // implemented for compatibility. if (ARM_AM::getT2SOImmVal(~CVal) != -1) break; } else { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC // with a "B" modifier that prints the inverted value, for use with // BIC and MVN instructions. It is not useful otherwise but is // implemented for compatibility. if (ARM_AM::getSOImmVal(~CVal) != -1) break; } return; case 'L': if (Subtarget->isThumb1Only()) { // This must be a constant between -7 and 7, // for 3-operand ADD/SUB immediate instructions. if (CVal >= -7 && CVal < 7) break; } else if (Subtarget->isThumb2()) { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" // modifier that prints the negated value, for use with SUB // instructions. It is not useful otherwise but is implemented for // compatibility. if (ARM_AM::getT2SOImmVal(-CVal) != -1) break; } else { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" // modifier that prints the negated value, for use with SUB // instructions. It is not useful otherwise but is implemented for // compatibility. if (ARM_AM::getSOImmVal(-CVal) != -1) break; } return; case 'M': if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between 0 and 1020, for // ADD sp + immediate. if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) break; } else { // A power of two or a constant between 0 and 32. This is used in // GCC for the shift amount on shifted register operands, but it is // useful in general for any shift amounts. if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) break; } return; case 'N': if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a constant between 0 and 31, for shift amounts. if (CVal >= 0 && CVal <= 31) break; } return; case 'O': if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a multiple of 4 between -508 and 508, for // ADD/SUB sp = sp + immediate. if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) break; } return; } Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType()); break; } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } static RTLIB::Libcall getDivRemLibcall( const SDNode *N, MVT::SimpleValueType SVT) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"); bool isSigned = N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::SREM; RTLIB::Libcall LC; switch (SVT) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } return LC; } static TargetLowering::ArgListTy getDivRemArgList( const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"); bool isSigned = N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::SREM; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { EVT ArgVT = N->getOperand(i).getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*Context); Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } if (Subtarget->isTargetWindows() && Args.size() >= 2) std::swap(Args[0], Args[1]); return Args; } SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"); bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); SDLoc dl(Op); // If the target has hardware divide, use divide + multiply + subtract: // div = a / b // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); const SDValue Divisor = Op->getOperand(1); SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor); SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor); SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); SDValue Values[2] = {Div, Rem}; return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values); } RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(), VT.getSimpleVT().SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(), DAG.getContext(), Subtarget); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } // Lowers REM using divmod helpers // see RTABI section 4.2/4.3 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { // Build return types (div and rem) std::vector RetTyParams; Type *RetTyElement; switch (N->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break; case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break; case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break; case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break; } RetTyParams.push_back(RetTyElement); RetTyParams.push_back(RetTyElement); ArrayRef ret = ArrayRef(RetTyParams); Type *RetTy = StructType::get(*DAG.getContext(), ret); RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(), Subtarget); bool isSigned = N->getOpcode() == ISD::SREM; SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, N, InChain); // Lower call CallLoweringInfo CLI(DAG); CLI.setChain(InChain) .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); std::pair CallResult = LowerCallTo(CLI); // Return second (rem) result operand (first contains div) SDNode *ResNode = CallResult.first.getNode(); assert(ResNode->getNumOperands() == 2 && "divmod should return two operands"); return ResNode->getOperand(1); } SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "unsupported target platform"); SDLoc DL(Op); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, DAG.getConstant(2, DL, MVT::i32)); SDValue Flag; Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); Chain = NewSP.getValue(1); SDValue Ops[2] = { NewSP, Chain }; return DAG.getMergeValues(Ops, DL); } SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && "Unexpected type for custom-lowering FP_EXTEND"); RTLIB::Libcall LC; LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOperand(0).getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && "Unexpected type for custom-lowering FP_ROUND"); RTLIB::Libcall LC; LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. return false; } bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) return false; // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's return isShiftedMask_32(~v); } /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!Subtarget->hasVFP3()) return false; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: case Intrinsic::arm_neon_vld4: case Intrinsic::arm_neon_vld2lane: case Intrinsic::arm_neon_vld3lane: case Intrinsic::arm_neon_vld4lane: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); Info.vol = false; // volatile loads with NEON intrinsics not supported Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: case Intrinsic::arm_neon_vst4: case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); Info.vol = false; // volatile stores with NEON intrinsics not supported Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; } case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; } default: break; } return false; } /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); if (Bits == 0 || Bits > 32) return false; return true; } bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; return (Index == 0 || Index == ResVT.getVectorNumElements()); } Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); // First, if the target has no DMB, see what fallback we can use. if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(7), Builder.getInt32(10), Builder.getInt32(5)}; return Builder.CreateCall(MCR, args); } else { // Instead of using barriers, atomic accesses on these subtargets use // libcalls. llvm_unreachable("makeDMB on a target so old that it has no barriers"); } } else { Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); // Only a full system barrier exists in the M-class architectures. Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; Constant *CDomain = Builder.getInt32(Domain); return Builder.CreateCall(DMB, CDomain); } } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/non-atomic"); case AtomicOrdering::Monotonic: case AtomicOrdering::Acquire: return nullptr; // Nothing to do case AtomicOrdering::SequentiallyConsistent: if (!IsStore) return nullptr; // Nothing to do /*FALLTHROUGH*/ case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/not-atomic"); case AtomicOrdering::Monotonic: case AtomicOrdering::Release: return nullptr; // Nothing to do case AtomicOrdering::Acquire: case AtomicOrdering::AcquireRelease: case AtomicOrdering::SequentiallyConsistent: return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); return (Size == 64) && !Subtarget->isMClass(); } // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that // guarantee, see DDI0406C ARM architecture reference manual, // sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. bool hasAtomicCmpXchg = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg; } bool ARMTargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { return InsertFencesForAtomic; } // This has so far only been implemented for MachO. bool ARMTargetLowering::useLoadStackGuardNode() const { return Subtarget->isTargetMachO(); } bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const { // If we do not have NEON, vector types are not natively supported. if (!Subtarget->hasNEON()) return false; // Floating point values and vector values map to the same register file. // Therefore, although we could do a store extract of a vector type, this is // better to leave at float as we have more freedom in the addressing mode for // those. if (VectorTy->isFPOrFPVectorTy()) return false; // If the index is unknown at compile time, this is very expensive to lower // and it is not possible to combine the store with the extract. if (!isa(Idx)) return false; assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); unsigned BitWidth = cast(VectorTy)->getBitWidth(); // We can do a store + vector extract on any vector that fits perfectly in a D // or Q register. if (BitWidth == 64 || BitWidth == 128) { Cost = 0; return true; } return false; } bool ARMTargetLowering::isCheapToSpeculateCttz() const { return Subtarget->hasV6T2Ops(); } bool ARMTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget->hasV6T2Ops(); } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = isAcquireOrStronger(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a // single i64 here. if (ValTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); return Builder.CreateOr( Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); } Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldrex, Addr), cast(Addr->getType())->getElementType()); } void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilder<> &Builder) const { if (!Subtarget->hasV7Ops()) return; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); } Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); bool IsRelease = isReleaseOrStronger(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form // before the call. if (Val->getType()->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; Function *Strex = Intrinsic::getDeclaration(M, Int); Type *Int32Ty = Type::getInt32Ty(M->getContext()); Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall(Strex, {Lo, Hi, Addr}); } Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; Type *Tys[] = { Addr->getType() }; Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateCall( Strex, {Builder.CreateZExtOrBitCast( Val, Strex->getFunctionType()->getParamType(0)), Addr}); } /// \brief Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements /// /// Into: /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4) /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1 bool ARMTargetLowering::lowerInterleavedLoad( LoadInst *LI, ArrayRef Shuffles, ArrayRef Indices, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); assert(!Shuffles.empty() && "Empty shufflevector input"); assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); VectorType *VecTy = Shuffles[0]->getType(); Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); unsigned VecSize = DL.getTypeSizeInBits(VecTy); bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vldN doesn't support i64/f64 elements). if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; IRBuilder<> Builder(LI); SmallVector Ops; Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); Ops.push_back(Builder.getInt32(LI->getAlignment())); Type *Tys[] = { VecTy, Int8Ptr }; Function *VldnFunc = Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); // Replace uses of each shufflevector with the corresponding vector loaded // by ldN. for (unsigned i = 0; i < Shuffles.size(); i++) { ShuffleVectorInst *SV = Shuffles[i]; unsigned Index = Indices[i]; Value *SubVec = Builder.CreateExtractValue(VldN, Index); // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); SV->replaceAllUsesWith(SubVec); } return true; } /// \brief Get a mask consisting of sequential integers starting from \p Start. /// /// I.e. static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts) { SmallVector Mask; for (unsigned i = 0; i < NumElts; i++) Mask.push_back(Builder.getInt32(Start + i)); return ConstantVector::get(Mask); } /// \brief Lower an interleaved store into a vstN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4 /// /// Into: /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) /// /// Note that the new shufflevectors will be removed and we'll only generate one /// vst3 instruction in CodeGen. /// /// Example for a more general valid mask (Factor 3). Lower: /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1, /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> /// store <12 x i32> %i.vec, <12 x i32>* %ptr /// /// Into: /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7> /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); VectorType *VecTy = SVI->getType(); assert(VecTy->getVectorNumElements() % Factor == 0 && "Invalid interleaved store"); unsigned LaneLen = VecTy->getVectorNumElements() / Factor; Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vstN doesn't support i64/f64 elements). if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) return false; Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { Type *IntTy = DL.getIntPtrType(EltTy); // Convert to the corresponding integer vector. Type *IntVecTy = VectorType::get(IntTy, Op0->getType()->getVectorNumElements()); Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); SubVecTy = VectorType::get(IntTy, LaneLen); } static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; SmallVector Ops; Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); Type *Tys[] = { Int8Ptr, SubVecTy }; Function *VstNFunc = Intrinsic::getDeclaration( SI->getModule(), StoreInts[Factor - 2], Tys); // Split the shufflevector operands into sub vectors for the new vstN call. auto Mask = SVI->getShuffleMask(); for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { if (Mask[j*Factor + i] >= 0) { StartMask = Mask[j*Factor + i] - j; break; } } // Note: If all elements in a chunk are undefs, StartMask=0! // Note: Filling undef gaps with random elements is ok, since // those elements were being written anyway (with undefs). // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); } } Ops.push_back(Builder.getInt32(SI->getAlignment())); Builder.CreateCall(VstNFunc, Ops); return true; } enum HABaseType { HA_UNKNOWN = 0, HA_FLOAT, HA_DOUBLE, HA_VECT64, HA_VECT128 }; static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members) { if (auto *ST = dyn_cast(Ty)) { for (unsigned i = 0; i < ST->getNumElements(); ++i) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) return false; Members += SubMembers; } } else if (auto *AT = dyn_cast(Ty)) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) return false; Members += SubMembers * AT->getNumElements(); } else if (Ty->isFloatTy()) { if (Base != HA_UNKNOWN && Base != HA_FLOAT) return false; Members = 1; Base = HA_FLOAT; } else if (Ty->isDoubleTy()) { if (Base != HA_UNKNOWN && Base != HA_DOUBLE) return false; Members = 1; Base = HA_DOUBLE; } else if (auto *VT = dyn_cast(Ty)) { Members = 1; switch (Base) { case HA_FLOAT: case HA_DOUBLE: return false; case HA_VECT64: return VT->getBitWidth() == 64; case HA_VECT128: return VT->getBitWidth() == 128; case HA_UNKNOWN: switch (VT->getBitWidth()) { case 64: Base = HA_VECT64; return true; case 128: Base = HA_VECT128; return true; default: return false; } } } return (Members > 0 && Members <= 4); } /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when /// passing according to AAPCS rules. bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { if (getEffectiveCallingConv(CallConv, isVarArg) != CallingConv::ARM_AAPCS_VFP) return false; HABaseType Base = HA_UNKNOWN; uint64_t Members = 0; bool IsHA = isHomogeneousAggregate(Ty, Base, Members); DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); return IsHA || IsIntArray; } unsigned ARMTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0; } unsigned ARMTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; } void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in ARMFunctionInfo. ARMFunctionInfo *AFI = Entry->getParent()->getInfo(); AFI->setIsSplitCSR(true); } void ARMTargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (ARM::GPRRegClass.contains(*I)) RC = &ARM::GPRRegClass; else if (ARM::DPRRegClass.contains(*I)) RC = &ARM::DPRRegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction()->hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } Index: projects/clang400-import/contrib/llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- projects/clang400-import/contrib/llvm/lib/Transforms/Scalar/LICM.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/lib/Transforms/Scalar/LICM.cpp (revision 314177) @@ -1,1264 +1,1261 @@ //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass performs loop invariant code motion, attempting to remove as much // code from the body of a loop as possible. It does this by either hoisting // code into the preheader block, or by sinking code to the exit blocks if it is // safe. This pass also promotes must-aliased memory locations in the loop to // live in registers, thus hoisting and sinking "invariant" loads and stores. // // This pass uses alias analysis for two purposes: // // 1. Moving loop invariant loads and calls out of loops. If we can determine // that a load or call inside of a loop never aliases anything stored to, // we can hoist it or sink it like any other instruction. // 2. Scalar Promotion of Memory - If there is a store instruction inside of // the loop, we try to move the store to happen AFTER the loop instead of // inside of the loop. This can only happen if a few conditions are true: // A. The pointer stored through is loop invariant // B. There are no stores or loads in the loop which _may_ alias the // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use // the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include #include using namespace llvm; #define DEBUG_TYPE "licm" STATISTIC(NumSunk, "Number of instructions sunk out of loop"); STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); static cl::opt DisablePromotion("disable-licm-promotion", cl::Hidden, cl::desc("Disable memory promotion in LICM pass")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI = nullptr); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo); namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap &getLoopToAliasSetMap() { return LoopToAliasSetMap; } private: DenseMap LoopToAliasSetMap; AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass() : LoopPass(ID) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) { // If we have run LICM on a previous loop but now we are skipping // (because we've hit the opt-bisect limit), we need to clear the // loop alias information. for (auto <AS : LICM.getLoopToAliasSetMap()) delete LTAS.second; LICM.getLoopToAliasSetMap().clear(); return false; } auto *SE = getAnalysisIfAvailable(); // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); return LICM.runOnLoop(L, &getAnalysis().getAAResults(), &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), &getAnalysis().getTLI(), SE ? &SE->getSE() : nullptr, &ORE, false); } /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); getLoopAnalysisUsage(AU); } using llvm::Pass::doFinalization; bool doFinalization() override { assert(LICM.getLoopToAliasSetMap().empty() && "Didn't free loop alias sets"); return false; } private: LoopInvariantCodeMotion LICM; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) override; /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias /// set. void deleteAnalysisValue(Value *V, Loop *L) override; /// Simple Analysis hook. Delete loop L from alias set map. void deleteAnalysisLoop(Loop *L) override; }; } PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { const auto &FAM = AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); auto *ORE = FAM.getCachedResult(*F); // FIXME: This should probably be optional rather than required. if (!ORE) report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); LoopInvariantCodeMotion LICM; if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) return PreservedAnalyses::all(); // FIXME: There is no setPreservesCFG in the new PM. When that becomes // available, it should be used here. return getLoopPassPreservedAnalyses(); } char LegacyLICMPass::ID = 0; INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. /// We should delete AST for inner loops in the new pass manager to avoid /// memory leak. /// bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); // Get the preheader block to move instructions into... BasicBlock *Preheader = L->getLoopPreheader(); // Compute loop safety information. LoopSafetyInfo SafetyInfo; computeLoopSafetyInfo(&SafetyInfo, L); // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of // the subloops). // // Traverse the body of the loop in depth first order on the dominator tree so // that we are guaranteed to see definitions before we see uses. This allows // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, CurAST, &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, CurAST, &SafetyInfo, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. // Don't sink stores from loops without dedicated block exits. Exits // containing indirect branches are not transformed by loop simplify, // make sure we catch that. An additional load may be generated in the // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // We can't insert into a catchswitch. bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { return isa(Exit->getTerminator()); }); if (!HasCatchSwitch) { SmallVector InsertPts; InsertPts.reserve(ExitBlocks.size()); for (BasicBlock *ExitBlock : ExitBlocks) InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); PredIteratorCache PIC; bool Promoted = false; // Loop over all of the alias sets in the tracker object. for (AliasSet &AS : *CurAST) Promoted |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, CurAST, &SafetyInfo, ORE); // Once we have promoted values across the loop body we have to // recursively reform LCSSA as any nested loop may now have values defined // within the loop used in the outer loop. // FIXME: This is really heavy handed. It would be a bit better to use an // SSAUpdater strategy during promotion that was LCSSA aware and reformed // it as it went. if (Promoted) formLCSSARecursively(*L, *DT, LI, SE); Changed |= Promoted; } } // Check that neither this loop nor its parent have had LCSSA broken. LICM is // specifically moving instructions across the loop boundary and so it is // especially in need of sanity checking here. assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. if (L->getParentLoop() && !DeleteAST) LoopToAliasSetMap[L] = CurAST; else delete CurAST; if (Changed && SE) SE->forgetLoopDispositions(L); return Changed; } /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in reverse depth /// first order w.r.t the DominatorTree. This allows us to visit uses before /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return false; // We are processing blocks in reverse dfo, so process children first. bool Changed = false; const std::vector &Children = N->getChildren(); for (DomTreeNode *Child : Children) Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB, CurLoop, LI)) return Changed; for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { Instruction &I = *--II; // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); I.eraseFromParent(); Changed = true; continue; } // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { ++II; Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); } } return Changed; } /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return false; // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). bool Changed = false; if (!inSubLoop(BB, CurLoop, LI)) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { Instruction &I = *II++; // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. if (Constant *C = ConstantFoldInstruction( &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); I.replaceAllUsesWith(C); if (isInstructionTriviallyDead(&I, TLI)) { CurAST->deleteValue(&I); I.eraseFromParent(); } Changed = true; continue; } // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); } const std::vector &Children = N->getChildren(); for (DomTreeNode *Child : Children) Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); return Changed; } /// Computes loop safety information, checks loop body & header /// for the possibility of may throw exception. /// void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { assert(CurLoop != nullptr && "CurLoop cant be null"); BasicBlock *Header = CurLoop->getHeader(); // Setting default safety values. SafetyInfo->MayThrow = false; SafetyInfo->HeaderMayThrow = false; // Iterate over header and compute safety info. for (BasicBlock::iterator I = Header->begin(), E = Header->end(); (I != E) && !SafetyInfo->HeaderMayThrow; ++I) SafetyInfo->HeaderMayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; // Iterate over loop instructions and compute safety info. for (Loop::block_iterator BB = CurLoop->block_begin(), BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); // Compute funclet colors if we might sink/hoist in a function with a funclet // personality routine. Function *Fn = CurLoop->getHeader()->getParent(); if (Fn->hasPersonalityFn()) if (Constant *PersonalityFn = Fn->getPersonalityFn()) if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) SafetyInfo->BlockColors = colorEHFunclets(*Fn); } bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) return false; // Don't hoist volatile/atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; if (LI->getMetadata(LLVMContext::MD_invariant_load)) return true; // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; if (LI->getType()->isSized()) Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); bool Invalidated = pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit(OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) << "failed to move load with loop-invariant address " "because the loop may invalidate its value"); return !Invalidated; } else if (CallInst *CI = dyn_cast(&I)) { // Don't sink or hoist dbg info; it's legal, but not useful. if (isa(I)) return false; // Don't sink calls which can throw. if (CI->mayThrow()) return false; // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) return true; if (AliasAnalysis::onlyReadsMemory(Behavior)) { // A readonly argmemonly function only reads from memory pointed to by // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { for (Value *Op : CI->arg_operands()) if (Op->getType()->isPointerTy() && pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, AAMDNodes(), CurAST)) return false; return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; for (AliasSet &AS : *CurAST) { if (!AS.isForwardingAliasSet() && AS.isMod()) { FoundMod = true; break; } } if (!FoundMod) return true; } // FIXME: This should use mod/ref information to see if we can hoist or // sink the call. return false; } // Only these instructions are hoistable/sinkable. if (!isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I) && !isa(I)) return false; // SafetyInfo is nullptr if we are checking for sinking from preheader to // loop body. It will be always safe as there is no speculative execution. if (!SafetyInfo) return true; // TODO: Plumb the context instruction through to make hoisting and sinking // more powerful. Hoisting of loads already works due to the special casing // above. return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); } /// Returns true if a PHINode is a trivially replaceable with an /// Instruction. /// This is true when all incoming values are that instruction. /// This pattern occurs most often with LCSSA PHI nodes. /// static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { for (const Value *IncValue : PN.incoming_values()) if (IncValue != &I) return false; return true; } /// Return true if the only users of this instruction are outside of /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo) { const auto &BlockColors = SafetyInfo->BlockColors; for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { const BasicBlock *BB = PN->getParent(); // We cannot sink uses in catchswitches. if (isa(BB->getTerminator())) return false; // We need to sink a callsite to a unique funclet. Avoid sinking if the // phi use is too muddled. if (isa(I)) if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; // A PHI node where all of the incoming values are this instruction are // special -- they can just be RAUW'ed with the instruction and thus // don't require a use in the predecessor. This is a particular important // special case because it is the pattern found in LCSSA form. if (isTriviallyReplacablePHI(*PN, I)) { if (CurLoop->contains(PN)) return false; else continue; } // Otherwise, PHI node uses occur in predecessor blocks if the incoming // values. Check for such a use being inside the loop. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &I) if (CurLoop->contains(PN->getIncomingBlock(i))) return false; continue; } if (CurLoop->contains(UI)) return false; } return true; } static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo) { Instruction *New; if (auto *CI = dyn_cast(&I)) { const auto &BlockColors = SafetyInfo->BlockColors; // Sinking call-sites need to be handled differently from other // instructions. The cloned call-site needs a funclet bundle operand // appropriate for it's location in the CFG. SmallVector OpBundles; for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); BundleIdx != BundleEnd; ++BundleIdx) { OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); if (Bundle.getTagID() == LLVMContext::OB_funclet) continue; OpBundles.emplace_back(Bundle); } if (!BlockColors.empty()) { const ColorVector &CV = BlockColors.find(&ExitBlock)->second; assert(CV.size() == 1 && "non-unique color for exit block!"); BasicBlock *BBColor = CV.front(); Instruction *EHPad = BBColor->getFirstNonPHI(); if (EHPad->isEHPad()) OpBundles.emplace_back("funclet", EHPad); } New = CallInst::Create(CI, OpBundles); } else { New = I.clone(); } ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); if (!I.getName().empty()) New->setName(I.getName() + ".le"); // Build LCSSA PHI nodes for any in-loop operands. Note that this is // particularly cheap because we can rip off the PHI node that we're // replacing for the number and blocks of the predecessors. // OPT: If this shows up in a profile, we can instead finish sinking all // invariant instructions, and then walk their operands to re-establish // LCSSA. That will eliminate creating PHI nodes just to nuke them when // sinking bottom-up. for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; ++OI) if (Instruction *OInst = dyn_cast(*OI)) if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) if (!OLoop->contains(&PN)) { PHINode *OpPN = PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), OInst->getName() + ".lcssa", &ExitBlock.front()); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); *OI = OpPN; } return New; } /// When an instruction is found to only be used outside of the loop, this /// function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) << "sinking " << ore::NV("Inst", &I)); bool Changed = false; if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumSunk; Changed = true; #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif // Clones of this instruction. Don't create more than one per exit block! SmallDenseMap SunkCopies; // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. while (!I.use_empty()) { Value::user_iterator UI = I.user_begin(); auto *User = cast(*UI); if (!DT->isReachableFromEntry(User->getParent())) { User->replaceUsesOfWith(&I, UndefValue::get(I.getType())); continue; } // The user must be a PHI node. PHINode *PN = cast(User); // Surprisingly, instructions can be used outside of loops without any // exits. This can only happen in PHI nodes if the incoming block is // unreachable. Use &U = UI.getUse(); BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); continue; } BasicBlock *ExitBlock = PN->getParent(); assert(ExitBlockSet.count(ExitBlock) && "The LCSSA PHI is not in an exit block!"); Instruction *New; auto It = SunkCopies.find(ExitBlock); if (It != SunkCopies.end()) New = It->second; else New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); PN->replaceAllUsesWith(New); PN->eraseFromParent(); } CurAST->deleteValue(&I); I.eraseFromParent(); return Changed; } /// When an instruction is found to only use loop invariant operands that /// is safe to hoist, this instruction is called to do the dirty work. /// static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { auto *Preheader = CurLoop->getLoopPreheader(); DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hosting " << ore::NV("Inst", &I)); // Metadata can be dependent on conditions we are hoisting above. // Conservatively strip all metadata on the instruction unless we were // guaranteed to execute I if we entered the loop, in which case the metadata // is valid in the loop preheader. if (I.hasMetadataOtherThanDebugLoc() && // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) I.dropUnknownNonDebugMetadata(); // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, // need to retain their debug locs because they may be inlined. // FIXME: How do we retain source locations without causing poor debugging // behavior? if (!isa(I)) I.setDebugLoc(DebugLoc()); if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumHoisted; return true; } /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI) { if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) return true; bool GuaranteedToExecute = isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); if (!GuaranteedToExecute) { auto *LI = dyn_cast(&Inst); if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit(OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) << "failed to hoist load with loop-invariant address " "because load is conditionally executed"); } return GuaranteedToExecute; } namespace { class LoopPromoter : public LoadAndStorePromoter { Value *SomePtr; // Designated pointer to store to. SmallPtrSetImpl &PointerMustAliases; SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; PredIteratorCache &PredCache; AliasSetTracker &AST; LoopInfo &LI; DebugLoc DL; int Alignment; AAMDNodes AATags; Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { if (Instruction *I = dyn_cast(V)) if (Loop *L = LI.getLoopFor(I->getParent())) if (!L->contains(BB)) { // We need to create an LCSSA PHI node for the incoming value and // store that. PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), I->getName() + ".lcssa", &BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PN->addIncoming(I, Pred); return PN; } return V; } public: LoopPromoter(Value *SP, ArrayRef Insts, SSAUpdater &S, SmallPtrSetImpl &PMA, SmallVectorImpl &LEB, SmallVectorImpl &LIP, PredIteratorCache &PIC, AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, const AAMDNodes &AATags) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { Value *Ptr; if (LoadInst *LI = dyn_cast(I)) Ptr = LI->getOperand(0); else Ptr = cast(I)->getPointerOperand(); return PointerMustAliases.count(Ptr); } void doExtraRewritesBeforeFinalDeletion() const override { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader // definition, it is all set and we can start using it. for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); } } void replaceLoadWithValue(LoadInst *LI, Value *V) const override { // Update alias analysis. AST.copyValue(LI, V); } void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } }; } // end anon namespace /// Try to promote memory values to scalars by sinking stores out of the /// loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. /// bool llvm::promoteLoopAccessesToScalars( AliasSet &AS, SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return false; assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // The safety property divides into two parts: // p1) The memory may not be dereferenceable on entry to the loop. In this // case, we can't insert the required load in the preheader. // p2) The memory model does not allow us to insert a store along any dynamic // path which did not originally have one. // // If at least one store is guaranteed to execute, both properties are // satisfied, and promotion is legal. // // This, however, is not a necessary condition. Even if no store/load is // guaranteed to execute, we can still establish these properties. // We can establish (p1) by proving that hoisting the load into the preheader // is safe (i.e. proving dereferenceability on all paths through the loop). We // can use any access within the alias set to prove dereferenceability, // since they're all must alias. // // There are two ways establish (p2): // a) Prove the location is thread-local. In this case the memory model // requirement does not apply, and stores are safe to insert. // b) Prove a store dominates every exit block. In this case, if an exit // blocks is reached, the original dynamic path would have taken us through // the store, so inserting a store into the exit block is safe. Note that this // is different from the store being guaranteed to execute. For instance, // if an exception is thrown on the first iteration of the loop, the original // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; bool SafeToInsertStore = false; SmallVector LoopUses; SmallPtrSet PointerMustAliases; // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; AAMDNodes AATags; const DataLayout &MDL = Preheader->getModule()->getDataLayout(); if (SafetyInfo->MayThrow) { // If a loop can throw, we have to insert a store along each unwind edge. // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. // // Currently, this code just special-cases alloca instructions. if (!isa(GetUnderlyingObject(SomePtr, MDL))) return false; } // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. While we are at it, collect alignment and AA info. for (const auto &ASI : AS) { Value *ASIV = ASI.getValue(); PointerMustAliases.insert(ASIV); // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return false; for (User *U : ASIV->users()) { // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast(U); if (!UI || !CurLoop->contains(UI)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *Load = dyn_cast(UI)) { assert(!Load->isVolatile() && "AST broken"); if (!Load->isSimple()) return false; if (!DereferenceableInPH) DereferenceableInPH = isSafeToExecuteUnconditionally( *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) continue; assert(!Store->isVolatile() && "AST broken"); if (!Store->isSimple()) return false; // If the store is guaranteed to execute, both properties are satisfied. // We may want to check if a store is guaranteed to execute even if we // already know that promotion is safe, since it may have higher // alignment than any other guaranteed stores, in which case we can // raise the alignment on the promoted store. unsigned InstAlignment = Store->getAlignment(); if (!InstAlignment) InstAlignment = MDL.getABITypeAlignment(Store->getValueOperand()->getType()); if (!DereferenceableInPH || !SafeToInsertStore || (InstAlignment > Alignment)) { if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); } } // If a store dominates all exit blocks, it is safe to sink. // As explained above, if an exit block was executed, a dominating // store must have been been executed at least once, so we are not // introducing stores on paths that did not have them. // Note that this only looks at explicit exit blocks. If we ever // start sinking stores into unwind edges (see above), this will break. if (!SafeToInsertStore) SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { return DT->dominates(Store->getParent(), Exit); }); // If the store is not guaranteed to execute, we may still get // deref info through it. if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getAlignment(), MDL, Preheader->getTerminator(), DT); } } else return false; // Not a load or store. // Merge the AA tags. if (LoopUses.empty()) { // On the first load/store, just take its AA tags. UI->getAAMetadata(AATags); } else if (AATags) { UI->getAAMetadata(AATags, /* Merge = */ true); } LoopUses.push_back(UI); } } // If we couldn't prove we can hoist the load, bail. if (!DereferenceableInPH) return false; // We know we can hoist the load, but don't have a guaranteed store. // Check whether the location is thread-local. If it is, then we can insert // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { Value *Object = GetUnderlyingObject(SomePtr, MDL); SafeToInsertStore = (isAllocLikeFn(Object, TLI) || isa(Object)) && !PointerMayBeCaptured(Object, true, true); } // If we've still failed to prove we can sink the store, give up. if (!SafeToInsertStore) return false; // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'); ORE->emit( OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) << "Moving accesses to memory location out of the loop"); ++NumPromoted; // Grab a debug location for the inserted loads/stores; given that the // inserted loads/stores have little relation to the original loads/stores, // this code just arbitrarily picks a location from one, since any debug // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); // We use the SSAUpdater interface to insert phi nodes as required. SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) PreheaderLoad->eraseFromParent(); return true; } /// Returns an owning pointer to an alias set which incorporates aliasing info /// from L and all subloops of L. /// FIXME: In new pass manager, there is no helper function to handle loop /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed /// from scratch for every loop. Hook up with the helper functions when /// available in the new pass manager to avoid redundant computation. AliasSetTracker * LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA) { AliasSetTracker *CurAST = nullptr; SmallVector RecomputeLoops; for (Loop *InnerL : L->getSubLoops()) { auto MapI = LoopToAliasSetMap.find(InnerL); // If the AST for this inner loop is missing it may have been merged into // some other loop's AST and then that loop unrolled, and so we need to // recompute it. if (MapI == LoopToAliasSetMap.end()) { RecomputeLoops.push_back(InnerL); continue; } AliasSetTracker *InnerAST = MapI->second; if (CurAST != nullptr) { // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); // Once we've incorporated the inner loop's AST into ours, we don't need // the subloop's anymore. delete InnerAST; } else { CurAST = InnerAST; } LoopToAliasSetMap.erase(MapI); } if (CurAST == nullptr) CurAST = new AliasSetTracker(*AA); auto mergeLoop = [&](Loop *L) { // Loop over the body of this loop, looking for calls, invokes, and stores. - // Because subloops have already been incorporated into AST, we skip blocks - // in subloops. for (BasicBlock *BB : L->blocks()) - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block }; // Add everything from the sub loops that are no longer directly available. for (Loop *InnerL : RecomputeLoops) mergeLoop(InnerL); // And merge in this loop. mergeLoop(L); return CurAST; } /// Simple analysis hook. Clone alias set info. /// void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); if (!AST) return; AST->copyValue(From, To); } /// Simple Analysis hook. Delete value V from alias set /// void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); if (!AST) return; AST->deleteValue(V); } /// Simple Analysis hook. Delete value L from alias set map. /// void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); if (!AST) return; delete AST; LICM.getLoopToAliasSetMap().erase(L); } /// Return true if the body of this loop may store into the memory /// location pointed to by V. /// static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST) { // Check to see if any of the basic blocks in CurLoop invalidate *V. return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); } /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 314177) @@ -1,6801 +1,6795 @@ //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This provides a class for OpenMP runtime code generation. // //===----------------------------------------------------------------------===// #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "ConstantBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include using namespace clang; using namespace CodeGen; namespace { /// \brief Base class for handling code generation inside OpenMP regions. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: /// \brief Kinds of OpenMP regions used in codegen. enum CGOpenMPRegionKind { /// \brief Region with outlined function for standalone 'parallel' /// directive. ParallelOutlinedRegion, /// \brief Region with outlined function for standalone 'task' directive. TaskOutlinedRegion, /// \brief Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, /// \brief Region with outlined function for standalone 'target' directive. TargetRegion, }; CGOpenMPRegionInfo(const CapturedStmt &CS, const CGOpenMPRegionKind RegionKind, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. virtual const VarDecl *getThreadIDVariable() const = 0; /// \brief Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; /// \brief Get an LValue for the current ThreadID variable. /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } bool hasCancel() const { return HasCancel; } static bool classof(const CGCapturedStmtInfo *Info) { return Info->getKind() == CR_OpenMP; } ~CGOpenMPRegionInfo() override = default; protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; OpenMPDirectiveKind Kind; bool HasCancel; }; /// \brief API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel, StringRef HelperName) : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, HasCancel), ThreadIDVar(ThreadIDVar), HelperName(HelperName) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == ParallelOutlinedRegion; } private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; StringRef HelperName; }; /// \brief API for captured statement code generation in OpenMP constructs. class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: class UntiedTaskActionTy final : public PrePostActionTy { bool Untied; const VarDecl *PartIDVar; const RegionCodeGenTy UntiedCodeGen; llvm::SwitchInst *UntiedSwitch = nullptr; public: UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, const RegionCodeGenTy &UntiedCodeGen) : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} void Enter(CodeGenFunction &CGF) override { if (Untied) { // Emit task switching point. auto PartIdLVal = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(PartIDVar), PartIDVar->getType()->castAs()); auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); auto *DoneBB = CGF.createBasicBlock(".untied.done."); UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); CGF.EmitBlock(DoneBB); CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); UntiedSwitch->addCase(CGF.Builder.getInt32(0), CGF.Builder.GetInsertBlock()); emitUntiedSwitch(CGF); } } void emitUntiedSwitch(CodeGenFunction &CGF) const { if (Untied) { auto PartIdLVal = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(PartIDVar), PartIDVar->getType()->castAs()); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), PartIdLVal); UntiedCodeGen(CGF); CodeGenFunction::JumpDest CurPoint = CGF.getJumpDestInCurrentScope(".untied.next."); CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), CGF.Builder.GetInsertBlock()); CGF.EmitBranchThroughCleanup(CurPoint); CGF.EmitBlock(CurPoint.getBlock()); } } unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel, const UntiedTaskActionTy &Action) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), ThreadIDVar(ThreadIDVar), Action(Action) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } /// \brief Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } void emitUntiedSwitch(CodeGenFunction &CGF) override { Action.emitUntiedSwitch(CGF); } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == TaskOutlinedRegion; } private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; /// Action for emitting code for untied tasks. const UntiedTaskActionTy &Action; }; /// \brief API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null(OldCSI)) {} // \brief Retrieve the value of the context parameter. llvm::Value *getContextValue() const override { if (OuterRegionInfo) return OuterRegionInfo->getContextValue(); llvm_unreachable("No context value for inlined OpenMP region"); } void setContextValue(llvm::Value *V) override { if (OuterRegionInfo) { OuterRegionInfo->setContextValue(V); return; } llvm_unreachable("No context value for inlined OpenMP region"); } /// \brief Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) return OuterRegionInfo->lookup(VD); // If there is no outer outlined region,no need to lookup in a list of // captured variables, we can use the original one. return nullptr; } FieldDecl *getThisFieldDecl() const override { if (OuterRegionInfo) return OuterRegionInfo->getThisFieldDecl(); return nullptr; } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { if (OuterRegionInfo) return OuterRegionInfo->getThreadIDVariable(); return nullptr; } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { if (auto *OuterRegionInfo = getOldCSI()) return OuterRegionInfo->getHelperName(); llvm_unreachable("No helper name for inlined OpenMP construct"); } void emitUntiedSwitch(CodeGenFunction &CGF) override { if (OuterRegionInfo) OuterRegionInfo->emitUntiedSwitch(CGF); } CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == InlinedRegion; } ~CGOpenMPInlinedRegionInfo() override = default; private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; CGOpenMPRegionInfo *OuterRegionInfo; }; /// \brief API for captured statement code generation in OpenMP target /// constructs. For this captures, implicit parameters are used instead of the /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, /*HasCancel=*/false), HelperName(HelperName) {} /// \brief This is unused for target regions because each starts executing /// with a single thread. const VarDecl *getThreadIDVariable() const override { return nullptr; } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast(Info)->getRegionKind() == TargetRegion; } private: StringRef HelperName; }; static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { llvm_unreachable("No codegen for expressions"); } /// \brief API for generation of expressions captured in a innermost OpenMP /// region. class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { public: CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, OMPD_unknown, /*HasCancel=*/false), PrivScope(CGF) { // Make sure the globals captured in the provided statement are local by // using the privatization logic. We assume the same variable is not // captured more than once. for (auto &C : CS.captures()) { if (!C.capturesVariable() && !C.capturesVariableByCopy()) continue; const VarDecl *VD = C.getCapturedVar(); if (VD->isLocalVarDeclOrParm()) continue; DeclRefExpr DRE(const_cast(VD), /*RefersToEnclosingVariableOrCapture=*/false, VD->getType().getNonReferenceType(), VK_LValue, SourceLocation()); PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { return CGF.EmitLValue(&DRE).getAddress(); }); } (void)PrivScope.Privatize(); } /// \brief Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) return FD; return nullptr; } /// \brief Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { llvm_unreachable("No body for expressions"); } /// \brief Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { llvm_unreachable("No thread id for expressions"); } /// \brief Get the name of the capture helper. StringRef getHelperName() const override { llvm_unreachable("No helper name for expressions"); } static bool classof(const CGCapturedStmtInfo *Info) { return false; } private: /// Private scope to capture global variables. CodeGenFunction::OMPPrivateScope PrivScope; }; /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; public: /// \brief Constructs region for combined constructs. /// \param CodeGen Code generation sequence for combined directives. Includes /// a list of functions used for code generation of implicitly inlined /// regions. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, bool HasCancel) : CGF(CGF) { // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); LambdaThisCaptureField = CGF.LambdaThisCaptureField; CGF.LambdaThisCaptureField = nullptr; } ~InlinedOpenMPRegionRAII() { // Restore original CapturedStmtInfo only if we're done with code emission. auto *OldCSI = cast(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); CGF.LambdaThisCaptureField = LambdaThisCaptureField; } }; /// \brief Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h enum OpenMPLocationFlags { /// \brief Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, /// \brief Use c-style ident structure. OMP_IDENT_KMPC = 0x02, /// \brief Atomic reduction option for kmpc_reduce. OMP_ATOMIC_REDUCE = 0x10, /// \brief Explicit 'barrier' directive. OMP_IDENT_BARRIER_EXPL = 0x20, /// \brief Implicit barrier in code. OMP_IDENT_BARRIER_IMPL = 0x40, /// \brief Implicit barrier in 'for' directive. OMP_IDENT_BARRIER_IMPL_FOR = 0x40, /// \brief Implicit barrier in 'sections' directive. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, /// \brief Implicit barrier in 'single' directive. OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 }; /// \brief Describes ident structure that describes a source location. /// All descriptions are taken from /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h /// Original structure: /// typedef struct ident { /// kmp_int32 reserved_1; /**< might be used in Fortran; /// see above */ /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; /// KMP_IDENT_KMPC identifies this union /// member */ /// kmp_int32 reserved_2; /**< not really used in Fortran any more; /// see above */ ///#if USE_ITT_BUILD /// /* but currently used for storing /// region-specific ITT */ /// /* contextual information. */ ///#endif /* USE_ITT_BUILD */ /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for /// C++ */ /// char const *psource; /**< String describing the source location. /// The string is composed of semi-colon separated // fields which describe the source file, /// the function and a pair of line numbers that /// delimit the construct. /// */ /// } ident_t; enum IdentFieldIndex { /// \brief might be used in Fortran IdentField_Reserved_1, /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. IdentField_Flags, /// \brief Not really used in Fortran any more IdentField_Reserved_2, /// \brief Source[4] in Fortran, do not use for C++ IdentField_Reserved_3, /// \brief String describing the source location. The string is composed of /// semi-colon separated fields which describe the source file, the function /// and a pair of line numbers that delimit the construct. IdentField_PSource }; /// \brief Schedule types for 'omp for' loops (these enumerators are taken from /// the enum sched_type in kmp.h). enum OpenMPSchedType { /// \brief Lower bound for default (unordered) versions. OMP_sch_lower = 32, OMP_sch_static_chunked = 33, OMP_sch_static = 34, OMP_sch_dynamic_chunked = 35, OMP_sch_guided_chunked = 36, OMP_sch_runtime = 37, OMP_sch_auto = 38, /// static with chunk adjustment (e.g., simd) OMP_sch_static_balanced_chunked = 45, /// \brief Lower bound for 'ordered' versions. OMP_ord_lower = 64, OMP_ord_static_chunked = 65, OMP_ord_static = 66, OMP_ord_dynamic_chunked = 67, OMP_ord_guided_chunked = 68, OMP_ord_runtime = 69, OMP_ord_auto = 70, OMP_sch_default = OMP_sch_static, /// \brief dist_schedule types OMP_dist_sch_static_chunked = 91, OMP_dist_sch_static = 92, /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. /// Set if the monotonic schedule modifier was present. OMP_sch_modifier_monotonic = (1 << 29), /// Set if the nonmonotonic schedule modifier was present. OMP_sch_modifier_nonmonotonic = (1 << 30), }; enum OpenMPRTLFunction { /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, /// kmpc_micro microtask, ...); OMPRTL__kmpc_fork_call, /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, /// kmp_int32 global_tid, void *data, size_t size, void ***cache); OMPRTL__kmpc_threadprivate_cached, /// \brief Call to void __kmpc_threadprivate_register( ident_t *, /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); OMPRTL__kmpc_threadprivate_register, // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); OMPRTL__kmpc_global_thread_num, // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); OMPRTL__kmpc_critical, // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 // global_tid, kmp_critical_name *crit, uintptr_t hint); OMPRTL__kmpc_critical_with_hint, // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); OMPRTL__kmpc_end_critical, // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 // global_tid); OMPRTL__kmpc_cancel_barrier, // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_barrier, // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_for_static_fini, // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); OMPRTL__kmpc_serialized_parallel, // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); OMPRTL__kmpc_end_serialized_parallel, // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_threads); OMPRTL__kmpc_push_num_threads, // Call to void __kmpc_flush(ident_t *loc); OMPRTL__kmpc_flush, // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_master, // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_end_master, // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, // int end_part); OMPRTL__kmpc_omp_taskyield, // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_single, // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_end_single, // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); OMPRTL__kmpc_omp_task_alloc, // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * // new_task); OMPRTL__kmpc_omp_task, // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), // kmp_int32 didit); OMPRTL__kmpc_copyprivate, // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); OMPRTL__kmpc_reduce, // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name // *lck); OMPRTL__kmpc_reduce_nowait, // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); OMPRTL__kmpc_end_reduce, // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); OMPRTL__kmpc_end_reduce_nowait, // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, // kmp_task_t * new_task); OMPRTL__kmpc_omp_task_begin_if0, // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t * new_task); OMPRTL__kmpc_omp_task_complete_if0, // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_ordered, // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_end_ordered, // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); OMPRTL__kmpc_omp_taskwait, // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_taskgroup, // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_end_taskgroup, // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, // int proc_bind); OMPRTL__kmpc_push_proc_bind, // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); OMPRTL__kmpc_omp_task_with_deps, // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); OMPRTL__kmpc_omp_wait_deps, // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind); OMPRTL__kmpc_cancellationpoint, // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind); OMPRTL__kmpc_cancel, // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_teams, kmp_int32 thread_limit); OMPRTL__kmpc_push_num_teams, // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); OMPRTL__kmpc_fork_teams, // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int // sched, kmp_uint64 grainsize, void *task_dup); OMPRTL__kmpc_taskloop, // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 // num_dims, struct kmp_dim *dims); OMPRTL__kmpc_doacross_init, // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); OMPRTL__kmpc_doacross_fini, // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_post, // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); OMPRTL__kmpc_doacross_wait, // // Offloading related calls // // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); OMPRTL__tgt_unregister_lib, // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); OMPRTL__tgt_target_data_begin, // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); OMPRTL__tgt_target_data_end, // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); OMPRTL__tgt_target_data_update, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP /// region. class CleanupTy final : public EHScopeStack::Cleanup { PrePostActionTy *Action; public: explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { if (!CGF.HaveInsertPoint()) return; Action->Exit(CGF); } }; } // anonymous namespace void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { CodeGenFunction::RunCleanupsScope Scope(CGF); if (PrePostAction) { CGF.EHStack.pushCleanup(NormalAndEHCleanup, PrePostAction); Callback(CodeGen, CGF, *PrePostAction); } else { PrePostActionTy Action; Callback(CodeGen, CGF, Action); } } LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType()->castAs()); } void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { if (!CGF.HaveInsertPoint()) return; // 1.2.2 OpenMP Language Terminology // Structured block - An executable statement with a single entry at the // top and a single exit at the bottom. // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); CodeGen(CGF); CGF.EHStack.popTerminate(); } LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType(), AlignmentSource::Decl); } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), OffloadEntriesInfoManager(CGM) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, CGM.Int8PtrTy /* psource */, nullptr); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); } void CGOpenMPRuntime::clear() { InternalVars.clear(); } static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner) { // void .omp_combiner.(Ty *in, Ty *out); auto &C = CGM.getContext(); QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), /*Id=*/nullptr, PtrTy); ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), /*Id=*/nullptr, PtrTy); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); auto *Fn = llvm::Function::Create( FnTy, llvm::GlobalValue::InternalLinkage, IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); CodeGenFunction::OMPPrivateScope Scope(CGF); Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs()) .getAddress(); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs()) .getAddress(); }); (void)Scope.Privatize(); CGF.EmitIgnoredExpr(CombinerInitializer); Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; } void CGOpenMPRuntime::emitUserDefinedReduction( CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { if (UDRMap.count(D) > 0) return; auto &C = CGM.getContext(); if (!In || !Out) { In = &C.Idents.get("omp_in"); Out = &C.Idents.get("omp_out"); } llvm::Function *Combiner = emitCombinerOrInitializer( CGM, D->getType(), D->getCombiner(), cast(D->lookup(In).front()), cast(D->lookup(Out).front()), /*IsCombiner=*/true); llvm::Function *Initializer = nullptr; if (auto *Init = D->getInitializer()) { if (!Priv || !Orig) { Priv = &C.Idents.get("omp_priv"); Orig = &C.Idents.get("omp_orig"); } Initializer = emitCombinerOrInitializer( CGM, D->getType(), Init, cast(D->lookup(Orig).front()), cast(D->lookup(Priv).front()), /*IsCombiner=*/false); } UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); if (CGF) { auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); Decls.second.push_back(D); } } std::pair CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { auto I = UDRMap.find(D); if (I != UDRMap.end()) return I->second; emitUserDefinedReduction(/*CGF=*/nullptr, D); return UDRMap.lookup(D); } // Layout information for ident_t. static CharUnits getIdentAlign(CodeGenModule &CGM) { return CGM.getPointerAlign(); } static CharUnits getIdentSize(CodeGenModule &CGM) { assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); return CharUnits::fromQuantity(16) + CGM.getPointerSize(); } static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { // All the fields except the last are i32, so this works beautifully. return unsigned(Field) * CharUnits::fromQuantity(4); } static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, IdentFieldIndex Field, const llvm::Twine &Name = "") { auto Offset = getOffsetOfIdentField(Field); return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); } llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { assert(ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *"); const CapturedStmt *CS = cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); bool HasCancel = false; if (auto *OPD = dyn_cast(&D)) HasCancel = OPD->hasCancel(); else if (auto *OPSD = dyn_cast(&D)) HasCancel = OPSD->hasCancel(); else if (auto *OPFD = dyn_cast(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, getOutlinedHelperName()); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { auto *ThreadID = getThreadID(CGF, D.getLocStart()); auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), TaskTVar->getType()->castAs()) .getPointer()}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); }; CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, UntiedCodeGen); CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast(D.getAssociatedStmt()); auto *TD = dyn_cast(&D); CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); auto *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) NumberOfParts = Action.getNumberOfParts(); return Res; } Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { CharUnits Align = getIdentAlign(CGM); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { if (!DefaultOpenMPPSource) { // Initialize default location for psource field of ident_t structure of // all ident_t objects. Format is ";file;function;line;column;;". // Taken from // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c DefaultOpenMPPSource = CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } ConstantInitBuilder builder(CGM); auto fields = builder.beginStruct(IdentTy); fields.addInt(CGM.Int32Ty, 0); fields.addInt(CGM.Int32Ty, Flags); fields.addInt(CGM.Int32Ty, 0); fields.addInt(CGM.Int32Ty, 0); fields.add(DefaultOpenMPPSource); auto DefaultOpenMPLocation = fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, llvm::GlobalValue::PrivateLinkage); DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; } return Address(Entry, Align); } llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { Flags |= OMP_IDENT_KMPC; // If no debug info is generated - return global default location. if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) return getOrCreateDefaultLocation(Flags).getPointer(); assert(CGF.CurFn && "No function in current CodeGenFunction."); Address LocValue = Address::invalid(); auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if // GetOpenMPThreadID was called before this routine. if (!LocValue.isValid()) { // Generate "ident_t .kmpc_loc.addr;" Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), ".kmpc_loc.addr"); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), CGM.getSize(getIdentSize(CGF.CGM))); } // char **psource = &.kmpc_loc_.addr.psource; Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); if (OMPDebugLoc == nullptr) { SmallString<128> Buffer2; llvm::raw_svector_ostream OS2(Buffer2); // Build debug location PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); OS2 << ";" << PLoc.getFilename() << ";"; if (const FunctionDecl *FD = dyn_cast_or_null(CGF.CurFuncDecl)) { OS2 << FD->getQualifiedNameAsString(); } OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; } // *psource = ";;;;;;"; CGF.Builder.CreateStore(OMPDebugLoc, PSource); // Our callers always pass this to a runtime function, so for // convenience, go ahead and return a naked pointer. return LocValue.getPointer(); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, SourceLocation Loc) { assert(CGF.CurFn && "No function in current CodeGenFunction."); llvm::Value *ThreadID = nullptr; // Check whether we've already cached a load of the thread id in this // function. auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) { ThreadID = I->second.ThreadID; if (ThreadID != nullptr) return ThreadID; } if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->getThreadIDVariable()) { // Check if this an outlined function with thread id passed as argument. auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); // If value loaded in entry block, cache it and use it everywhere in // function. if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } return ThreadID; } } // This is not an outlined function region - need to call __kmpc_int32 // kmpc_global_thread_num(ident_t *loc). // Generate thread id value and cache this value for use across the // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); ThreadID = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; return ThreadID; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); if (OpenMPLocThreadIDMap.count(CGF.CurFn)) OpenMPLocThreadIDMap.erase(CGF.CurFn); if (FunctionUDRMap.count(CGF.CurFn) > 0) { for(auto *D : FunctionUDRMap[CGF.CurFn]) { UDRMap.erase(D); } FunctionUDRMap.erase(CGF.CurFn); } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { if (!IdentTy) { } return llvm::PointerType::getUnqual(IdentTy); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { if (!Kmpc_MicroTy) { // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), llvm::PointerType::getUnqual(CGM.Int32Ty)}; Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); } return llvm::PointerType::getUnqual(Kmpc_MicroTy); } llvm::Constant * CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Constant *RTLFn = nullptr; switch (static_cast(Function)) { case OMPRTL__kmpc_fork_call: { // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; } case OMPRTL__kmpc_global_thread_num: { // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } case OMPRTL__kmpc_threadprivate_cached: { // Build void *__kmpc_threadprivate_cached(ident_t *loc, // kmp_int32 global_tid, void *data, size_t size, void ***cache); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.SizeTy, CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); break; } case OMPRTL__kmpc_critical: { // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); break; } case OMPRTL__kmpc_critical_with_hint: { // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit, uintptr_t hint); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy), CGM.IntPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); break; } case OMPRTL__kmpc_threadprivate_register: { // Build void __kmpc_threadprivate_register(ident_t *, void *data, // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); // typedef void *(*kmpc_ctor)(void *); auto KmpcCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg*/ false)->getPointerTo(); // typedef void *(*kmpc_cctor)(void *, void *); llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto KmpcCopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, /*isVarArg*/ false)->getPointerTo(); // typedef void (*kmpc_dtor)(void *); auto KmpcDtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) ->getPointerTo(); llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, KmpcCopyCtorTy, KmpcDtorTy}; auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); break; } case OMPRTL__kmpc_end_critical: { // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); break; } case OMPRTL__kmpc_cancel_barrier: { // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); break; } case OMPRTL__kmpc_barrier: { // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; } case OMPRTL__kmpc_for_static_fini: { // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); break; } case OMPRTL__kmpc_push_num_threads: { // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); break; } case OMPRTL__kmpc_serialized_parallel: { // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); break; } case OMPRTL__kmpc_end_serialized_parallel: { // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; } case OMPRTL__kmpc_flush: { // Build void __kmpc_flush(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); break; } case OMPRTL__kmpc_master: { // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); break; } case OMPRTL__kmpc_end_master: { // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); break; } case OMPRTL__kmpc_omp_taskyield: { // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, // int end_part); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); break; } case OMPRTL__kmpc_single: { // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); break; } case OMPRTL__kmpc_end_single: { // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); break; } case OMPRTL__kmpc_omp_task_alloc: { // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); assert(KmpRoutineEntryPtrTy != nullptr && "Type kmp_routine_entry_t must be created."); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; // Return void * and then cast to particular kmp_task_t type. llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); break; } case OMPRTL__kmpc_omp_task: { // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); break; } case OMPRTL__kmpc_copyprivate: { // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), // kmp_int32 didit); llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto *CpyFnTy = llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, CpyFnTy->getPointerTo(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); break; } case OMPRTL__kmpc_reduce: { // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, /*isVarArg=*/false); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); break; } case OMPRTL__kmpc_reduce_nowait: { // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name // *lck); llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, /*isVarArg=*/false); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); break; } case OMPRTL__kmpc_end_reduce: { // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); break; } case OMPRTL__kmpc_end_reduce_nowait: { // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); break; } case OMPRTL__kmpc_omp_task_begin_if0: { // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); break; } case OMPRTL__kmpc_omp_task_complete_if0: { // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_complete_if0"); break; } case OMPRTL__kmpc_ordered: { // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); break; } case OMPRTL__kmpc_end_ordered: { // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); break; } case OMPRTL__kmpc_omp_taskwait: { // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); break; } case OMPRTL__kmpc_taskgroup: { // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); break; } case OMPRTL__kmpc_end_taskgroup: { // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); break; } case OMPRTL__kmpc_push_proc_bind: { // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, // int proc_bind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); break; } case OMPRTL__kmpc_omp_task_with_deps: { // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); break; } case OMPRTL__kmpc_omp_wait_deps: { // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, // kmp_depend_info_t *noalias_dep_list); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); break; } case OMPRTL__kmpc_cancellationpoint: { // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); break; } case OMPRTL__kmpc_cancel: { // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; } case OMPRTL__kmpc_push_num_teams: { // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, // kmp_int32 num_teams, kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); break; } case OMPRTL__kmpc_fork_teams: { // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); break; } case OMPRTL__kmpc_taskloop: { // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int // sched, kmp_uint64 grainsize, void *task_dup); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo(), CGM.Int64Ty, CGM.IntTy, CGM.IntTy, CGM.Int64Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); break; } case OMPRTL__kmpc_doacross_init: { // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 // num_dims, struct kmp_dim *dims); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); break; } case OMPRTL__kmpc_doacross_fini: { // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); break; } case OMPRTL__kmpc_doacross_post: { // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); break; } case OMPRTL__kmpc_doacross_wait: { // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 // *vec); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int32Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } case OMPRTL__tgt_target_teams: { // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int32Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); break; } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); break; } case OMPRTL__tgt_unregister_lib: { // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; } case OMPRTL__tgt_target_data_begin: { // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int32Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); break; } case OMPRTL__tgt_target_data_end: { // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int32Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); break; } case OMPRTL__tgt_target_data_update: { // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int32Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); break; } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; } llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" : "__kmpc_for_static_init_4u") : (IVSigned ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_8u"); auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid CGM.Int32Ty, // schedtype llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter PtrTy, // p_lower PtrTy, // p_upper PtrTy, // p_stride ITy, // incr ITy // chunk }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid CGM.Int32Ty, // schedtype ITy, // lower ITy, // upper ITy, // stride ITy // chunk }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter PtrTy, // p_lower PtrTy, // p_upper PtrTy // p_stride }; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { assert(!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)) + ".cache."); } Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return VDAddr; auto VarTy = VDAddr.getElementType(); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), getOrCreateThreadPrivateCache(VD)}; return Address(CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), VDAddr.getAlignment()); } void CGOpenMPRuntime::emitThreadPrivateVarInit( CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. auto OMPLoc = emitUpdateLocation(CGF, Loc); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), OMPLoc); // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. llvm::Value *Args[] = {OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); } llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return nullptr; VD = VD->getDefinition(CGM.getContext()); if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { ThreadPrivateWithDefinition.insert(VD); QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; auto Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { // Generate function that re-emits the declaration's initializer into the // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( CGM.getContext().VoidPtrTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_ctor_.", FI, Loc); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, Args, SourceLocation()); auto ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); Address Arg = Address(ArgVal, VDAddr.getAlignment()); Arg = CtorCGF.Builder.CreateElementBitCast(Arg, CtorCGF.ConvertTypeForMem(ASTTy)); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); CtorCGF.FinishFunction(); Ctor = Fn; } if (VD->getType().isDestructedType() != QualType::DK_none) { // Generate function that emits destructor call for the threadprivate copy // of the variable VD CodeGenFunction DtorCGF(CGM); FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, CGM.getContext().VoidPtrTy); Args.push_back(&Dst); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( CGM.getContext().VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto Fn = CGM.CreateGlobalInitOrDestructFunction( FTy, ".__kmpc_global_dtor_.", FI, Loc); auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, SourceLocation()); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); auto ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); DtorCGF.FinishFunction(); Dtor = Fn; } // Do not emit init function if it is not required. if (!Ctor && !Dtor) return nullptr; llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; auto CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, /*isVarArg=*/false)->getPointerTo(); // Copying constructor for the threadprivate variable. // Must be NULL - reserved by runtime, but currently it requires that this // parameter is always NULL. Otherwise it fires assertion. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); if (Ctor == nullptr) { auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg=*/false)->getPointerTo(); Ctor = llvm::Constant::getNullValue(CtorTy); } if (Dtor == nullptr) { auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg=*/false)->getPointerTo(); Dtor = llvm::Constant::getNullValue(DtorTy); } if (!CGF) { auto InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( InitFunctionTy, ".__omp_threadprivate_init_.", CGM.getTypes().arrangeNullaryFunction()); CodeGenFunction InitCGF(CGM); FunctionArgList ArgList; InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, CGM.getTypes().arrangeNullaryFunction(), ArgList, Loc); emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); InitCGF.FinishFunction(); return InitFunction; } emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); } return nullptr; } /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { /// ThenGen(); /// } else { /// ElseGen(); /// } void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { if (CondConstant) ThenGen(CGF); else ElseGen(CGF); return; } // Otherwise, the condition did not fold, or we couldn't elide it. Just // emit the conditional branch. auto ThenBlock = CGF.createBasicBlock("omp_if.then"); auto ElseBlock = CGF.createBasicBlock("omp_if.else"); auto ContBlock = CGF.createBasicBlock("omp_if.end"); CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); // Emit the 'then' code. CGF.EmitBlock(ThenBlock); ThenGen(CGF); CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(ElseBlock); ElseGen(CGF); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBranch(ContBlock); // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef CapturedVars, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; llvm::SmallVector RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); auto ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); Address ZeroAddr = CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); }; if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } } // If we're inside an (outlined) parallel region, use the region info's // thread-ID variable (it is passed in a first argument of the outlined function // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in // regular serial code region, get thread ID by calling kmp_int32 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and // return the address of that temp. Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc) { if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); auto ThreadID = getThreadID(CGF, Loc); auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); return ThreadIDTemp; } llvm::Constant * CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Name; auto RuntimeName = Out.str(); auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; if (Elem.second) { assert(Elem.second->getType()->getPointerElementType() == Ty && "OMP internal variable has different type than requested"); return &*Elem.second; } return Elem.second = new llvm::GlobalVariable( CGM.getModule(), Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), Elem.first()); } llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { llvm::Twine Name(".gomp_critical_user_", CriticalName); return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); } namespace { /// Common pre(post)-action for different OpenMP constructs. class CommonActionTy final : public PrePostActionTy { llvm::Value *EnterCallee; ArrayRef EnterArgs; llvm::Value *ExitCallee; ArrayRef ExitArgs; bool Conditional; llvm::BasicBlock *ContBlock = nullptr; public: CommonActionTy(llvm::Value *EnterCallee, ArrayRef EnterArgs, llvm::Value *ExitCallee, ArrayRef ExitArgs, bool Conditional = false) : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), ExitArgs(ExitArgs), Conditional(Conditional) {} void Enter(CodeGenFunction &CGF) override { llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); if (Conditional) { llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); ContBlock = CGF.createBasicBlock("omp_if.end"); // Generate the branch (If-stmt) CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); CGF.EmitBlock(ThenBlock); } } void Done(CodeGenFunction &CGF) { // Emit the rest of blocks/branches CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); } void Exit(CodeGenFunction &CGF) override { CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; } // anonymous namespace void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint) { // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); // CriticalOpGen(); // __kmpc_end_critical(ident_t *, gtid, Lock); // Prepare arguments and build a call to __kmpc_critical if (!CGF.HaveInsertPoint()) return; llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; llvm::SmallVector EnterArgs(std::begin(Args), std::end(Args)); if (Hint) { EnterArgs.push_back(CGF.Builder.CreateIntCast( CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); } CommonActionTy Action( createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint : OMPRTL__kmpc_critical), EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // if(__kmpc_master(ident_t *, gtid)) { // MasterOpGen(); // __kmpc_end_master(ident_t *, gtid); // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, createRuntimeFunction(OMPRTL__kmpc_end_master), Args, /*Conditional=*/true); MasterOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_master, MasterOpGen); Action.Done(CGF); } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_omp_taskyield(loc, thread_id, 0); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // __kmpc_taskgroup(ident_t *, gtid); // TaskgroupOpGen(); // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), Args); TaskgroupOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); } /// Given an array of pointers to variables, project the address of a /// given variable. static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var) { // Pull out the pointer to the variable. Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); Addr = CGF.Builder.CreateElementBitCast( Addr, CGF.ConvertTypeForMem(Var->getType())); return Addr; } static llvm::Value *emitCopyprivateCopyFunction( CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef CopyprivateVars, ArrayRef DestExprs, ArrayRef SrcExprs, ArrayRef AssignmentOps) { auto &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.copyprivate.copy_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), CGF.getPointerAlign()); Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), CGF.getPointerAlign()); // *(Type0*)Dst[0] = *(Type0*)Src[0]; // *(Type1*)Dst[1] = *(Type1*)Src[1]; // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { auto DestVar = cast(cast(DestExprs[I])->getDecl()); Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); auto SrcVar = cast(cast(SrcExprs[I])->getDecl()); Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); auto *VD = cast(CopyprivateVars[I])->getDecl(); QualType Type = VD->getType(); CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); } CGF.FinishFunction(); return Fn; } void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef CopyprivateVars, ArrayRef SrcExprs, ArrayRef DstExprs, ArrayRef AssignmentOps) { if (!CGF.HaveInsertPoint()) return; assert(CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()); auto &C = CGM.getContext(); // int32 did_it = 0; // if(__kmpc_single(ident_t *, gtid)) { // SingleOpGen(); // __kmpc_end_single(ident_t *, gtid); // did_it = 1; // } // call __kmpc_copyprivate(ident_t *, gtid, , , // , did_it); Address DidIt = Address::invalid(); if (!CopyprivateVars.empty()) { // int32 did_it = 0; auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, createRuntimeFunction(OMPRTL__kmpc_end_single), Args, /*Conditional=*/true); SingleOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_single, SingleOpGen); if (DidIt.isValid()) { // did_it = 1; CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); } Action.Done(CGF); // call __kmpc_copyprivate(ident_t *, gtid, , , // , did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); auto CopyprivateArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { Address Elem = CGF.Builder.CreateConstArrayGEP( CopyprivateList, I, CGF.getPointerSize()); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), Elem); } // Build function that copies private values from single region to all other // threads in the corresponding parallel region. auto *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); auto *DidItVal = CGF.Builder.CreateLoad(DidIt); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), // ident_t * getThreadID(CGF, Loc), // i32 BufSize, // size_t CL.getPointer(), // void * CpyFn, // void (*) (void *, void *) DidItVal // i32 did_it }; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); } } void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) { if (!CGF.HaveInsertPoint()) return; // __kmpc_ordered(ident_t *, gtid); // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, createRuntimeFunction(OMPRTL__kmpc_end_ordered), Args); OrderedOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); return; } emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); // Build call __kmpc_barrier(loc, thread_id); unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; else if (Kind == OMPD_sections) Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; else if (Kind == OMPD_single) Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; else if (Kind == OMPD_barrier) Flags = OMP_IDENT_BARRIER_EXPL; else Flags = OMP_IDENT_BARRIER_IMPL; // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { auto *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); if (EmitChecks) { // if (__kmpc_cancel_barrier()) { // exit from construct; // } auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); auto *ContBB = CGF.createBasicBlock(".cancel.continue"); auto *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; auto CancelDestination = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDestination); CGF.EmitBlock(ContBB, /*IsFinished=*/true); } return; } } CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); } /// \brief Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { switch (ScheduleKind) { case OMPC_SCHEDULE_static: return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) : (Ordered ? OMP_ord_static : OMP_sch_static); case OMPC_SCHEDULE_dynamic: return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; case OMPC_SCHEDULE_guided: return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; case OMPC_SCHEDULE_runtime: return Ordered ? OMP_ord_runtime : OMP_sch_runtime; case OMPC_SCHEDULE_auto: return Ordered ? OMP_ord_auto : OMP_sch_auto; case OMPC_SCHEDULE_unknown: assert(!Chunked && "chunk was specified but schedule kind not known"); return Ordered ? OMP_ord_static : OMP_sch_static; } llvm_unreachable("Unexpected runtime schedule"); } /// \brief Map the OpenMP distribute schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { // only static is allowed for dist_schedule return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; } bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } bool CGOpenMPRuntime::isStaticNonchunked( OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); return Schedule == OMP_dist_sch_static; } bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { auto Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); return Schedule != OMP_sch_static; } static int addMonoNonMonoModifier(OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2) { int Modifier = 0; switch (M1) { case OMPC_SCHEDULE_MODIFIER_monotonic: Modifier = OMP_sch_modifier_monotonic; break; case OMPC_SCHEDULE_MODIFIER_nonmonotonic: Modifier = OMP_sch_modifier_nonmonotonic; break; case OMPC_SCHEDULE_MODIFIER_simd: if (Schedule == OMP_sch_static_chunked) Schedule = OMP_sch_static_balanced_chunked; break; case OMPC_SCHEDULE_MODIFIER_last: case OMPC_SCHEDULE_MODIFIER_unknown: break; } switch (M2) { case OMPC_SCHEDULE_MODIFIER_monotonic: Modifier = OMP_sch_modifier_monotonic; break; case OMPC_SCHEDULE_MODIFIER_nonmonotonic: Modifier = OMP_sch_modifier_nonmonotonic; break; case OMPC_SCHEDULE_MODIFIER_simd: if (Schedule == OMP_sch_static_chunked) Schedule = OMP_sch_static_balanced_chunked; break; case OMPC_SCHEDULE_MODIFIER_last: case OMPC_SCHEDULE_MODIFIER_unknown: break; } return Schedule | Modifier; } void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, llvm::Value *UB, llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)); // Call __kmpc_dispatch_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, // kmp_int[32|64] lower, kmp_int[32|64] upper, // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. if (Chunk == nullptr) Chunk = CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type CGF.Builder.getIntN(IVSize, 0), // Lower UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; assert(!Ordered); assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked); // Call __kmpc_for_static_init( // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, // kmp_int[32|64] incr, kmp_int[32|64] chunk); if (Chunk == nullptr) { assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule"); // If the Chunk was not specified in the clause - use default value 1. Chunk = CGF.Builder.getIntN(IVSize, 1); } else { assert((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && "expected static chunked schedule"); } llvm::Value *Args[] = { UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, M1, M2)), // Schedule type IL.getPointer(), // &isLastIter LB.getPointer(), // &LB UB.getPointer(), // &UB ST.getPointer(), // &Stride CGF.Builder.getIntN(IVSize, 1), // Incr Chunk // Chunk }; CGF.EmitRuntimeCall(ForStaticInitFunction, Args); } void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk) { OpenMPSchedType ScheduleNum = getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); auto *ThreadId = getThreadID(CGF, Loc); auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, Ordered, IL, LB, UB, ST, Chunk); } void CGOpenMPRuntime::emitDistributeStaticInit( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk) { OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); auto *ThreadId = getThreadID(CGF, Loc); auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, UB, ST, Chunk); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); } llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) { // Call __kmpc_dispatch_next( // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, // kmp_int[32|64] *p_stride); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), IL.getPointer(), // &isLastIter LB.getPointer(), // &Lower UB.getPointer(), // &Upper ST.getPointer() // &Stride }; llvm::Value *Call = CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); return CGF.EmitScalarConversion( Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), CGF.getContext().BoolTy, Loc); } void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), Args); } void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Constants for proc bind value accepted by the runtime. enum ProcBindTy { ProcBindFalse = 0, ProcBindTrue, ProcBindMaster, ProcBindClose, ProcBindSpread, ProcBindIntel, ProcBindDefault } RuntimeProcBind; switch (ProcBind) { case OMPC_PROC_BIND_master: RuntimeProcBind = ProcBindMaster; break; case OMPC_PROC_BIND_close: RuntimeProcBind = ProcBindClose; break; case OMPC_PROC_BIND_spread: RuntimeProcBind = ProcBindSpread; break; case OMPC_PROC_BIND_unknown: llvm_unreachable("Unsupported proc_bind value."); } // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); } void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call void __kmpc_flush(ident_t *loc) CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), emitUpdateLocation(CGF, Loc)); } namespace { /// \brief Indexes of fields for type kmp_task_t. enum KmpTaskTFields { /// \brief List of shared variables. KmpTaskTShareds, /// \brief Task routine. KmpTaskTRoutine, /// \brief Partition id for the untied tasks. KmpTaskTPartId, /// Function with call of destructors for private variables. Data1, /// Task priority. Data2, /// (Taskloops only) Lower bound. KmpTaskTLowerBound, /// (Taskloops only) Upper bound. KmpTaskTUpperBound, /// (Taskloops only) Stride. KmpTaskTStride, /// (Taskloops only) Is last iteration flag. KmpTaskTLastIter, }; } // anonymous namespace bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { // FIXME: Add other entries type when they become supported. return OffloadEntriesTargetRegion.empty(); } /// \brief Initialize target region entry. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order) { assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation."); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, /*Flags=*/0); ++OffloadingEntriesNum; } void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, int32_t Flags) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && "Entry must exist."); auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); Entry.setFlags(Flags); return; } else { OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; } } bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const { auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); if (PerDevice == OffloadEntriesTargetRegion.end()) return false; auto PerFile = PerDevice->second.find(FileID); if (PerFile == PerDevice->second.end()) return false; auto PerParentName = PerFile->second.find(ParentName); if (PerParentName == PerFile->second.end()) return false; auto PerLine = PerParentName->second.find(LineNum); if (PerLine == PerParentName->second.end()) return false; // Fail if this entry is already registered. if (PerLine->second.getAddress() || PerLine->second.getID()) return false; return true; } void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( const OffloadTargetRegionEntryInfoActTy &Action) { // Scan all target region entries and perform the provided action. for (auto &D : OffloadEntriesTargetRegion) for (auto &F : D.second) for (auto &P : F.second) for (auto &L : P.second) Action(D.first, F.first, P.first(), L.first, L.second); } /// \brief Create a Ctor/Dtor-like function whose body is emitted through /// \a Codegen. This is used to emit the two functions that register and /// unregister the descriptor of the current compilation unit. static llvm::Function * createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, const RegionCodeGenTy &Codegen) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); Args.push_back(&DummyPtr); CodeGenFunction CGF(CGM); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto *Fn = CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); Codegen(CGF); CGF.FinishFunction(); return Fn; } llvm::Function * CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) return nullptr; auto &M = CGM.getModule(); auto &C = CGM.getContext(); // Get list of devices we care about auto &Devices = CGM.getLangOpts().OMPTargetTriples; // We should be creating an offloading descriptor only if there are devices // specified. assert(!Devices.empty() && "No OpenMP offloading devices??"); // Create the external variables that will point to the begin and end of the // host entries section. These will be defined by the linker. auto *OffloadEntryTy = CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, ".omp_offloading.entries_begin"); llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, ".omp_offloading.entries_end"); // Create all device images auto *DeviceImageTy = cast( CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); ConstantInitBuilder DeviceImagesBuilder(CGM); auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); for (unsigned i = 0; i < Devices.size(); ++i) { StringRef T = Devices[i].getTriple(); auto *ImgBegin = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, Twine(".omp_offloading.img_start.") + Twine(T)); auto *ImgEnd = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); Dev.add(ImgBegin); Dev.add(ImgEnd); Dev.add(HostEntriesBegin); Dev.add(HostEntriesEnd); Dev.finishAndAddTo(DeviceImagesEntries); } // Create device images global array. llvm::GlobalVariable *DeviceImages = DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", CGM.getPointerAlign(), /*isConstant=*/true); DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // This is a Zero array to be used in the creation of the constant expressions llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), llvm::Constant::getNullValue(CGM.Int32Ty)}; // Create the target region descriptor. auto *BinaryDescriptorTy = cast( CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); ConstantInitBuilder DescBuilder(CGM); auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); DescInit.addInt(CGM.Int32Ty, Devices.size()); DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), DeviceImages, Index)); DescInit.add(HostEntriesBegin); DescInit.add(HostEntriesEnd); auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", CGM.getPointerAlign(), /*isConstant=*/true); // Emit code to register or unregister the descriptor at execution // startup or closing, respectively. // Create a variable to drive the registration and unregistration of the // descriptor, so we can reuse the logic that emits Ctors and Dtors. auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), IdentInfo, C.CharTy); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); return RegFn; } void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags) { StringRef Name = Addr->getName(); auto *TgtOffloadEntryType = cast( CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); llvm::LLVMContext &C = CGM.getModule().getContext(); llvm::Module &M = CGM.getModule(); // Make sure the address has the right type. llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); llvm::GlobalVariable *Str = new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, StrPtrInit, ".omp_offloading.entry_name"); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); // We can't have any padding between symbols, so we need to have 1-byte // alignment. auto Align = CharUnits::fromQuantity(1); // Create the entry struct. ConstantInitBuilder EntryBuilder(CGM); auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); EntryInit.add(AddrPtr); EntryInit.add(StrPtr); EntryInit.addInt(CGM.SizeTy, Size); EntryInit.addInt(CGM.Int32Ty, Flags); EntryInit.addInt(CGM.Int32Ty, 0); llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal(".omp_offloading.entry", Align, /*constant*/ true, llvm::GlobalValue::ExternalLinkage); // The entry has to be created in the section the linker expects it to be. Entry->setSection(".omp_offloading.entries"); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Emit the offloading entries and metadata so that the device codegen side // can easily figure out what to emit. The produced metadata looks like // this: // // !omp_offload.info = !{!1, ...} // // Right now we only generate metadata for function that contain target // regions. // If we do not have entries, we dont need to do anything. if (OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); SmallVector OrderedEntries(OffloadEntriesInfoManager.size()); // Create the offloading info metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); // Auxiliar methods to create metadata values and strings. auto getMDInt = [&](unsigned v) { return llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); }; auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = [&]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { llvm::SmallVector Ops; // Generate metadata for target regions. Each entry of this metadata // contains: // - Entry 0 -> Kind of this type of metadata (0). // - Entry 1 -> Device ID of the file where the entry was identified. // - Entry 2 -> File ID of the file where the entry was identified. // - Entry 3 -> Mangled name of the function where the entry was identified. // - Entry 4 -> Line in the file where the entry was identified. // - Entry 5 -> Order the entry was created. // The first element of the metadata node is the kind. Ops.push_back(getMDInt(E.getKind())); Ops.push_back(getMDInt(DeviceID)); Ops.push_back(getMDInt(FileID)); Ops.push_back(getMDString(ParentName)); Ops.push_back(getMDInt(Line)); Ops.push_back(getMDInt(E.getOrder())); // Save this entry in the right position of the ordered entries array. OrderedEntries[E.getOrder()] = &E; // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); }; OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( TargetRegionMetadataEmitter); for (auto *E : OrderedEntries) { assert(E && "All ordered entries must exist!"); if (auto *CE = dyn_cast( E)) { assert(CE->getID() && CE->getAddress() && "Entry ID and Addr are invalid!"); createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); } else llvm_unreachable("Unsupported entry kind."); } } /// \brief Loads all the offload entries information from the host IR /// metadata. void CGOpenMPRuntime::loadOffloadInfoMetadata() { // If we are in target mode, load the metadata from the host IR. This code has // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). if (!CGM.getLangOpts().OpenMPIsDevice) return; if (CGM.getLangOpts().OMPHostIRFile.empty()) return; auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); if (Buf.getError()) return; llvm::LLVMContext C; auto ME = expectedToErrorOrAndEmitErrors( C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); if (ME.getError()) return; llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); if (!MD) return; for (auto I : MD->operands()) { llvm::MDNode *MN = cast(I); auto getMDInt = [&](unsigned Idx) { llvm::ConstantAsMetadata *V = cast(MN->getOperand(Idx)); return cast(V->getValue())->getZExtValue(); }; auto getMDString = [&](unsigned Idx) { llvm::MDString *V = cast(MN->getOperand(Idx)); return V->getString(); }; switch (getMDInt(0)) { default: llvm_unreachable("Unexpected metadata!"); break; case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: OFFLOAD_ENTRY_INFO_TARGET_REGION: OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), /*Order=*/getMDInt(5)); break; } } } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { if (!KmpRoutineEntryPtrTy) { // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. auto &C = CGM.getContext(); QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; FunctionProtoType::ExtProtoInfo EPI; KmpRoutineEntryPtrQTy = C.getPointerType( C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); } } static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy) { auto *Field = FieldDecl::Create( C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); Field->setAccess(AS_public); DC->addDecl(Field); return Field; } QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { // Make sure the type of the entry is already created. This is the type we // have to create: // struct __tgt_offload_entry{ // void *addr; // Pointer to the offload entry info. // // (function or global) // char *name; // Name of the function or global. // size_t size; // Size of the entry info (0 if it a function). // int32_t flags; // Flags associated with the entry, e.g. 'link'. // int32_t reserved; // Reserved, to use by the runtime library. // }; if (TgtOffloadEntryQTy.isNull()) { ASTContext &C = CGM.getContext(); auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); addFieldToRecordDecl(C, RD, C.getSizeType()); addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); RD->completeDefinition(); TgtOffloadEntryQTy = C.getRecordType(RD); } return TgtOffloadEntryQTy; } QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { // These are the types we need to build: // struct __tgt_device_image{ // void *ImageStart; // Pointer to the target code start. // void *ImageEnd; // Pointer to the target code end. // // We also add the host entries to the device image, as it may be useful // // for the target runtime to have access to that information. // __tgt_offload_entry *EntriesBegin; // Begin of the table with all // // the entries. // __tgt_offload_entry *EntriesEnd; // End of the table with all the // // entries (non inclusive). // }; if (TgtDeviceImageQTy.isNull()) { ASTContext &C = CGM.getContext(); auto *RD = C.buildImplicitRecord("__tgt_device_image"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); RD->completeDefinition(); TgtDeviceImageQTy = C.getRecordType(RD); } return TgtDeviceImageQTy; } QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { // struct __tgt_bin_desc{ // int32_t NumDevices; // Number of devices supported. // __tgt_device_image *DeviceImages; // Arrays of device images // // (one per device). // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the // // entries. // __tgt_offload_entry *EntriesEnd; // End of the table with all the // // entries (non inclusive). // }; if (TgtBinaryDescriptorQTy.isNull()) { ASTContext &C = CGM.getContext(); auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); RD->startDefinition(); addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); RD->completeDefinition(); TgtBinaryDescriptorQTy = C.getRecordType(RD); } return TgtBinaryDescriptorQTy; } namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) : Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} const VarDecl *Original; const VarDecl *PrivateCopy; const VarDecl *PrivateElemInit; }; typedef std::pair PrivateDataTy; } // anonymous namespace static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef Privates) { if (!Privates.empty()) { auto &C = CGM.getContext(); // Build struct .kmp_privates_t. { // /* private vars */ // }; auto *RD = C.buildImplicitRecord(".kmp_privates.t"); RD->startDefinition(); for (auto &&Pair : Privates) { auto *VD = Pair.second.Original; auto Type = VD->getType(); Type = Type.getNonReferenceType(); auto *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator I(VD->getAttrs().begin()), E(VD->getAttrs().end()); I != E; ++I) FD->addAttr(*I); } } RD->completeDefinition(); return RD; } return nullptr; } static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { auto &C = CGM.getContext(); // Build struct kmp_task_t { // void * shareds; // kmp_routine_entry_t routine; // kmp_int32 part_id; // kmp_cmplrdata_t data1; // kmp_cmplrdata_t data2; // For taskloops additional fields: // kmp_uint64 lb; // kmp_uint64 ub; // kmp_int64 st; // kmp_int32 liter; // }; auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); addFieldToRecordDecl(C, UD, KmpInt32Ty); addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); UD->completeDefinition(); QualType KmpCmplrdataTy = C.getRecordType(UD); auto *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); addFieldToRecordDecl(C, RD, KmpInt32Ty); addFieldToRecordDecl(C, RD, KmpCmplrdataTy); addFieldToRecordDecl(C, RD, KmpCmplrdataTy); if (isOpenMPTaskLoopDirective(Kind)) { QualType KmpUInt64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); QualType KmpInt64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpUInt64Ty); addFieldToRecordDecl(C, RD, KmpInt64Ty); addFieldToRecordDecl(C, RD, KmpInt32Ty); } RD->completeDefinition(); return RD; } static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef Privates) { auto &C = CGM.getContext(); // Build struct kmp_task_t_with_privates { // kmp_task_t task_data; // .kmp_privates_t. privates; // }; auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); RD->startDefinition(); addFieldToRecordDecl(C, RD, KmpTaskTQTy); if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); } RD->completeDefinition(); return RD; } /// \brief Emit a proxy function which accepts kmp_task_t as the second /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// For taskloops: /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, /// tt->shareds); /// return 0; /// } /// \endcode static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); auto &TaskEntryFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); auto *TaskEntry = llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, ".omp_task_entry.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, // tt, // For taskloops: // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy->castAs()); auto *KmpTaskTWithPrivatesQTyRD = cast(KmpTaskTWithPrivatesQTy->getAsTagDecl()); LValue Base = CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); auto *PartidParam = PartIdLVal.getPointer(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), CGF.ConvertTypeForMem(SharedsPtrTy)); auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); llvm::Value *PrivatesParam; if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); } else PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, CGF.Builder .CreatePointerBitCastOrAddrSpaceCast( TDBase.getAddress(), CGF.VoidPtrTy) .getPointer()}; SmallVector CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); if (isOpenMPTaskLoopDirective(Kind)) { auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); auto StLVal = CGF.EmitLValueForField(Base, *StFI); auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); auto LILVal = CGF.EmitLValueForField(Base, *LIFI); auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); CallArgs.push_back(LIParam); } CallArgs.push_back(SharedsParam); CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); CGF.FinishFunction(); return TaskEntry; } static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; auto &DestructorFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); auto *DestructorFn = llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, ".omp_task_destructor.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, DestructorFnInfo); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, Args); LValue Base = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy->castAs()); auto *KmpTaskTWithPrivatesQTyRD = cast(KmpTaskTWithPrivatesQTy->getAsTagDecl()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); Base = CGF.EmitLValueForField(Base, *FI); for (auto *Field : cast(FI->getType()->getAsTagDecl())->fields()) { if (auto DtorKind = Field->getType().isDestructedType()) { auto FieldLValue = CGF.EmitLValueForField(Base, Field); CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); } } CGF.FinishFunction(); return DestructorFn; } /// \brief Emit a privates mapping function for correct handling of private and /// firstprivate variables. /// \code /// void .omp_task_privates_map.(const .privates. *noalias privs, /// **noalias priv1,..., **noalias privn) { /// *priv1 = &.privates.priv1; /// ...; /// *privn = &.privates.privn; /// } /// \endcode static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef PrivateVars, ArrayRef FirstprivateVars, ArrayRef LastprivateVars, QualType PrivatesQTy, ArrayRef Privates) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(PrivatesQTy).withConst().withRestrict()); Args.push_back(&TaskPrivatesArg); llvm::DenseMap PrivateVarsPos; unsigned Counter = 1; for (auto *E: PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict())); auto *VD = cast(cast(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E : FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict())); auto *VD = cast(cast(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } for (auto *E: LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict())); auto *VD = cast(cast(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *TaskPrivatesMapTy = CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); auto *TaskPrivatesMap = llvm::Function::Create( TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, ".omp_task_privates_map.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, TaskPrivatesMapFnInfo, Args); // *privi = &.privates.privi; LValue Base = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()->castAs()); auto *PrivatesQTyRD = cast(PrivatesQTy->getAsTagDecl()); Counter = 0; for (auto *Field : PrivatesQTyRD->fields()) { auto FieldLVal = CGF.EmitLValueForField(Base, Field); auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( RefLVal.getAddress(), RefLVal.getType()->castAs()); CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); ++Counter; } CGF.FinishFunction(); return TaskPrivatesMap; } static int array_pod_sort_comparator(const PrivateDataTy *P1, const PrivateDataTy *P2) { return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); } /// Emit initialization for private variables in task-based directives. static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef Privates, bool ForDup) { auto &C = CGF.getContext(); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); LValue SrcBase; if (!Data.FirstprivateVars.empty()) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } CodeGenFunction::CGCapturedStmtInfo CapturesInfo( cast(*D.getAssociatedStmt())); FI = cast(FI->getType()->getAsTagDecl())->field_begin(); for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; auto *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa(Init) && !CGF.isTrivialInitializer(Init)))) { LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); if (auto *Elem = Pair.second.PrivateElemInit) { auto *OriginalVD = Pair.second.Original; auto *SharedField = CapturesInfo.lookup(OriginalVD); auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), AlignmentSource::Decl); QualType Type = OriginalVD->getType(); if (Type->isArrayType()) { // Initialize firstprivate array. if (!isa(Init) || CGF.isTrivialInitializer(Init)) { // Perform simple memcpy. CGF.EmitAggregateAssign(PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type); } else { // Initialize firstprivate array using element-by-element // intialization. CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. CodeGenFunction::OMPPrivateScope InitScope(CGF); InitScope.addPrivate( Elem, [SrcElement]() -> Address { return SrcElement; }); (void)InitScope.Privatize(); // Emit initialization for single element. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( CGF, &CapturesInfo); CGF.EmitAnyExprToMem(Init, DestElement, Init->getType().getQualifiers(), /*IsInitializer=*/false); }); } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { return SharedRefLValue.getAddress(); }); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } } else CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } ++FI; } } /// Check if duplication function is required for taskloops. static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef Privates) { bool InitRequired = false; for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; auto *Init = VD->getAnyInitializer(); InitRequired = InitRequired || (Init && isa(Init) && !CGF.isTrivialInitializer(Init)); } return InitRequired; } /// Emit task_dup function (for initialization of /// private/firstprivate/lastprivate vars and last_iter flag) /// \code /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int /// lastpriv) { /// // setup lastprivate flag /// task_dst->last = lastpriv; /// // could be constructor calls here... /// } /// \endcode static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef Privates, bool WithLastIter) { auto &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy); Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); auto &TaskDupFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); auto *TaskDup = llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, ".omp_task_dup.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&DstArg), KmpTaskTWithPrivatesPtrQTy->castAs()); // task_dst->liter = lastpriv; if (WithLastIter) { auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); LValue Base = CGF.EmitLValueForField( TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); CGF.EmitStoreOfScalar(Lastpriv, LILVal); } // Emit initial values for private copies (if any). assert(!Privates.empty()); Address KmpTaskSharedsPtr = Address::invalid(); if (!Data.FirstprivateVars.empty()) { LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&SrcArg), KmpTaskTWithPrivatesPtrQTy->castAs()); LValue Base = CGF.EmitLValueForField( TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); KmpTaskSharedsPtr = Address( CGF.EmitLoadOfScalar(CGF.EmitLValueForField( Base, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), Loc), CGF.getNaturalTypeAlignment(SharedsTy)); } emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); CGF.FinishFunction(); return TaskDup; } /// Checks if destructor function is required to be generated. /// \return true if cleanups are required, false otherwise. static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { bool NeedsCleanup = false; auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); auto *PrivateRD = cast(FI->getType()->getAsTagDecl()); for (auto *FD : PrivateRD->fields()) { NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); if (NeedsCleanup) break; } return NeedsCleanup; } CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data) { auto &C = CGM.getContext(); llvm::SmallVector Privates; // Aggregate privates and sort them by the alignment. auto I = Data.PrivateCopies.begin(); for (auto *E : Data.PrivateVars) { auto *VD = cast(cast(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast(cast(*I)->getDecl()), /*PrivateElemInit=*/nullptr))); ++I; } I = Data.FirstprivateCopies.begin(); auto IElemInitRef = Data.FirstprivateInits.begin(); for (auto *E : Data.FirstprivateVars) { auto *VD = cast(cast(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), PrivateHelpersTy( VD, cast(cast(*I)->getDecl()), cast(cast(*IElemInitRef)->getDecl())))); ++I; ++IElemInitRef; } I = Data.LastprivateCopies.begin(); for (auto *E : Data.LastprivateVars) { auto *VD = cast(cast(E)->getDecl()); Privates.push_back(std::make_pair( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast(cast(*I)->getDecl()), /*PrivateElemInit=*/nullptr))); ++I; } llvm::array_pod_sort(Privates.begin(), Privates.end(), array_pod_sort_comparator); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). if (KmpTaskTQTy.isNull()) { KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } auto *KmpTaskTQTyRD = cast(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. auto *KmpTaskTWithPrivatesQTyRD = createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); QualType KmpTaskTWithPrivatesPtrQTy = C.getPointerType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); QualType SharedsPtrTy = C.getPointerType(SharedsTy); // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; auto *TaskPrivatesMapTy = std::next(cast(TaskFunction)->getArgumentList().begin(), 3) ->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { TaskPrivatesMap = llvm::ConstantPointerNull::get( cast(TaskPrivatesMapTy)); } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); auto *TaskEntry = emitProxyTaskFunction( CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); // Task flags. Format is taken from // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, // description of kmp_tasking_flags struct. enum { TiedFlag = 0x1, FinalFlag = 0x2, DestructorsFlag = 0x8, PriorityFlag = 0x20 }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; if (!Privates.empty()) { NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); if (NeedsCleanup) Flags = Flags | DestructorsFlag; } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; auto *TaskFlags = Data.Final.getPointer() ? CGF.Builder.CreateSelect(Data.Final.getPointer(), CGF.Builder.getInt32(FinalFlag), CGF.Builder.getInt32(/*C=*/0)) : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskEntry, KmpRoutineEntryPtrTy)}; auto *NewTask = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( NewTask, KmpTaskTWithPrivatesPtrTy); LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, KmpTaskTWithPrivatesQTy); LValue TDBase = CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); // Fill the data in the resulting kmp_task_t record. // Copy shareds if there are any. Address KmpTaskSharedsPtr = Address::invalid(); if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { KmpTaskSharedsPtr = Address(CGF.EmitLoadOfScalar( CGF.EmitLValueForField( TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), Loc), CGF.getNaturalTypeAlignment(SharedsTy)); CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); } // Emit initial values for private copies (if any). TaskResultTy Result; if (!Privates.empty()) { emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/false); if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { Result.TaskDupFn = emitTaskDupFunction( CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, /*WithLastIter=*/!Data.LastprivateVars.empty()); } } // Fields of union "kmp_cmplrdata_t" for destructors and priority. enum { Priority = 0, Destructors = 1 }; // Provide pointer to function with destructors for privates. auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); if (NeedsCleanup) { llvm::Value *DestructorFn = emitDestructorsFunction( CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy); LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); LValue DestructorsLV = CGF.EmitLValueForField( Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( DestructorFn, KmpRoutineEntryPtrTy), DestructorsLV); } // Set priority. if (Data.Priority.getInt()) { LValue Data2LV = CGF.EmitLValueForField( TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); LValue PriorityLV = CGF.EmitLValueForField( Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); } Result.NewTask = NewTask; Result.TaskEntry = TaskEntry; Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; Result.TDBase = TDBase; Result.KmpTaskTQTyRD = KmpTaskTQTyRD; return Result; } void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); llvm::Value *NewTask = Result.NewTask; llvm::Value *TaskEntry = Result.TaskEntry; llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; LValue TDBase = Result.TDBase; RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; auto &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); unsigned NumDependencies = Data.Dependences.size(); if (NumDependencies) { // Dependence kind for RTL. enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; RecordDecl *KmpDependInfoRD; QualType FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); if (KmpDependInfoTy.isNull()) { KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); KmpDependInfoRD->startDefinition(); addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); KmpDependInfoRD->completeDefinition(); KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); } else KmpDependInfoRD = cast(KmpDependInfoTy->getAsTagDecl()); CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); for (unsigned i = 0; i < NumDependencies; ++i) { const Expr *E = Data.Dependences[i].second; auto Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); if (auto *ASE = dyn_cast(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else Size = CGF.getTypeSize(Ty); auto Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), KmpDependInfoTy); // deps[i].base_addr = &; auto BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(); auto LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = ; RTLDependenceKindTy DepKind; switch (Data.Dependences[i].first) { case OMPC_DEPEND_in: DepKind = DepIn; break; // Out and InOut dependencies must use the same code. case OMPC_DEPEND_out: case OMPC_DEPEND_inout: DepKind = DepInOut; break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); } auto FlagsLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), FlagsLVal); } DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), CGF.VoidPtrTy); } // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence // list is not empty auto *ThreadID = getThreadID(CGF, Loc); auto *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; llvm::Value *DepTaskArgs[7]; if (NumDependencies) { DepTaskArgs[0] = UpLoc; DepTaskArgs[1] = ThreadID; DepTaskArgs[2] = NewTask; DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); DepTaskArgs[4] = DependenciesArray.getPointer(); DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { if (!Data.Tied) { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); } if (NumDependencies) { CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); } else { CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } // Check if parent region is untied and build return for untied task; if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) Region->emitUntiedSwitch(CGF); }; llvm::Value *DepWaitTaskArgs[6]; if (NumDependencies) { DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); DepWaitTaskArgs[3] = DependenciesArray.getPointer(); DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. if (NumDependencies) CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); }; // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); RegionCodeGenTy RCG(CodeGen); CommonActionTy Action( RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); RCG.setAction(Action); RCG(CGF); }; if (IfCond) emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); else { RegionCodeGenTy ThenRCG(ThenCodeGen); ThenRCG(CGF); } } void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int // sched, kmp_uint64 grainsize, void *task_dup); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *IfVal; if (IfCond) { IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, /*isSigned=*/true); } else IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); LValue LBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); auto *LBVar = cast(cast(D.getLowerBoundVariable())->getDecl()); CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); auto *UBVar = cast(cast(D.getUpperBoundVariable())->getDecl()); CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); auto *StVar = cast(cast(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize : NoSchedule), Data.Schedule.getPointer() ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, CGF.VoidPtrTy) : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } /// \brief Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. /// \param LHSVar Variable on the left side of the reduction operation /// (references element of array in original variable). /// \param RHSVar Variable on the right side of the reduction operation /// (references element of array in original variable). /// \param RedOpGen Generator of reduction operation with use of LHSVar and /// RHSVar. static void EmitOMPAggregateReduction( CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref &RedOpGen, const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, const Expr *UpExpr = nullptr) { // Perform element-by-element initialization. QualType ElementTy; Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); // Drill down to the base element type on both arrays. auto ArrayTy = Type->getAsArrayTypeUnsafe(); auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); auto RHSBegin = RHSAddr.getPointer(); auto LHSBegin = LHSAddr.getPointer(); // Cast from pointer to array type to pointer to single element. auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); // The basic structure here is a while-do loop. auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); auto IsEmpty = CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. auto EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); RHSElementPHI->addIncoming(RHSBegin, EntryBB); Address RHSElementCurrent = Address(RHSElementPHI, RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); LHSElementPHI->addIncoming(LHSBegin, EntryBB); Address LHSElementCurrent = Address(LHSElementPHI, LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CodeGenFunction::OMPPrivateScope Scope(CGF); Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); Scope.Privatize(); RedOpGen(CGF, XExpr, EExpr, UpExpr); Scope.ForceCleanup(); // Shift the address forward by one element. auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. auto Done = CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); // Done. CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } /// Emit reduction combiner. If the combiner is a simple expression emit it as /// is, otherwise consider it as combiner of UDR decl and emit it as a call of /// UDR combiner function. static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp) { if (auto *CE = dyn_cast(ReductionOp)) if (auto *OVE = dyn_cast(CE->getCallee())) if (auto *DRE = dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts())) if (auto *DRD = dyn_cast(DRE->getDecl())) { std::pair Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); RValue Func = RValue::get(Reduction.first); CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); CGF.EmitIgnoredExpr(ReductionOp); return; } CGF.EmitIgnoredExpr(ReductionOp); } static llvm::Value *emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps) { auto &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, C.VoidPtrTy); Args.push_back(&LHSArg); Args.push_back(&RHSArg); auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *Fn = llvm::Function::Create( CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, ".omp.reduction.reduction_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); // Dst = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), CGF.getPointerAlign()); Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), CGF.getPointerAlign()); // ... // *(Type*)lhs[i] = RedOp(*(Type*)lhs[i], *(Type*)rhs[i]); // ... CodeGenFunction::OMPPrivateScope Scope(CGF); auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { auto RHSVar = cast(cast(RHSExprs[I])->getDecl()); Scope.addPrivate(RHSVar, [&]() -> Address { return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); }); auto LHSVar = cast(cast(LHSExprs[I])->getDecl()); Scope.addPrivate(LHSVar, [&]() -> Address { return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); }); QualType PrivTy = (*IPriv)->getType(); if (PrivTy->isVariablyModifiedType()) { // Get array size and emit VLA type. ++Idx; Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); auto *OVE = cast(VLA->getSizeExpr()); CodeGenFunction::OpaqueValueMapping OpaqueMap( CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); CGF.EmitVariablyModifiedType(PrivTy); } } Scope.Privatize(); IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { // Emit reduction for array section. auto *LHSVar = cast(cast(*ILHS)->getDecl()); auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction( CGF, (*IPriv)->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitReductionCombiner(CGF, E); }); } else // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, E); ++IPriv; ++ILHS; ++IRHS; } Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; } static void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS) { if (PrivateRef->getType()->isArrayType()) { // Emit reduction for array section. auto *LHSVar = cast(LHS->getDecl()); auto *RHSVar = cast(RHS->getDecl()); EmitOMPAggregateReduction( CGF, PrivateRef->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitReductionCombiner(CGF, ReductionOp); }); } else // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, ReductionOp); } void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, bool WithNowait, bool SimpleReduction) { if (!CGF.HaveInsertPoint()) return; // Next code should be emitted for reduction: // // static kmp_critical_name lock = { 0 }; // // void reduce_func(void *lhs[], void *rhs[]) { // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); // ... // *(Type-1*)lhs[-1] = ReductionOperation-1(*(Type-1*)lhs[-1], // *(Type-1*)rhs[-1]); // } // // ... // void *RedList[] = {&[0], ..., &[-1]}; // switch (__kmpc_reduce{_nowait}(, , , sizeof(RedList), // RedList, reduce_func, &)) { // case 1: // ... // [i] = RedOp(*[i], *[i]); // ... // __kmpc_end_reduce{_nowait}(, , &); // break; // case 2: // ... // Atomic([i] = RedOp(*[i], *[i])); // ... // [__kmpc_end_reduce(, , &);] // break; // default:; // } // // if SimpleReduction is true, only the next code is generated: // ... // [i] = RedOp(*[i], *[i]); // ... auto &C = CGM.getContext(); if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), cast(*IRHS)); ++IPriv; ++ILHS; ++IRHS; } return; } // 1. Build a list of reduction variables. // void *RedList[] = {[0], ..., [-1]}; auto Size = RHSExprs.size(); for (auto *E : Privates) { if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); llvm::Value *Size = CGF.Builder.CreateIntCast( CGF.getVLASize( CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) .first, CGF.SizeTy, /*isSigned=*/false); CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), Elem); } } // 2. Emit reduce_func(). auto *ReductionFn = emitReductionFunction( CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; auto *Lock = getCriticalRegionLock(".reduction"); // 4. Build res = __kmpc_reduce{_nowait}(, , , sizeof(RedList), // RedList, reduce_func, &); auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); auto *ThreadId = getThreadID(CGF, Loc); auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( ReductionList.getPointer(), CGF.VoidPtrTy); llvm::Value *Args[] = { IdentTLoc, // ident_t * ThreadId, // i32 CGF.Builder.getInt32(RHSExprs.size()), // i32 ReductionArrayTySize, // size_type sizeof(RedList) RL, // void *RedList ReductionFn, // void (*) (void *, void *) Lock // kmp_critical_name *& }; auto Res = CGF.EmitRuntimeCall( createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait : OMPRTL__kmpc_reduce), Args); // 5. Build switch(res) auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); // 6. Build case 1: // ... // [i] = RedOp(*[i], *[i]); // ... // __kmpc_end_reduce{_nowait}(, , &); // break; auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); // Add emission of __kmpc_end_reduce{_nowait}(, , &); llvm::Value *EndArgs[] = { IdentTLoc, // ident_t * ThreadId, // i32 Lock // kmp_critical_name *& }; auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast(*ILHS), cast(*IRHS)); ++IPriv; ++ILHS; ++IRHS; } }; RegionCodeGenTy RCG(CodeGen); CommonActionTy Action( nullptr, llvm::None, createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait : OMPRTL__kmpc_end_reduce), EndArgs); RCG.setAction(Action); RCG(CGF); CGF.EmitBranch(DefaultBB); // 7. Build case 2: // ... // Atomic([i] = RedOp(*[i], *[i])); // ... // break; auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); for (auto *E : ReductionOps) { const Expr *XExpr = nullptr; const Expr *EExpr = nullptr; const Expr *UpExpr = nullptr; BinaryOperatorKind BO = BO_Comma; if (auto *BO = dyn_cast(E)) { if (BO->getOpcode() == BO_Assign) { XExpr = BO->getLHS(); UpExpr = BO->getRHS(); } } // Try to emit update expression as a simple atomic. auto *RHSExpr = UpExpr; if (RHSExpr) { // Analyze RHS part of the whole expression. if (auto *ACO = dyn_cast( RHSExpr->IgnoreParenImpCasts())) { // If this is a conditional operator, analyze its condition for // min/max reduction operator. RHSExpr = ACO->getCond(); } if (auto *BORHS = dyn_cast(RHSExpr->IgnoreParenImpCasts())) { EExpr = BORHS->getRHS(); BO = BORHS->getOpcode(); } } if (XExpr) { auto *VD = cast(cast(*ILHS)->getDecl()); auto &&AtomicRedGen = [BO, VD, IPriv, Loc](CodeGenFunction &CGF, const Expr *XExpr, const Expr *EExpr, const Expr *UpExpr) { LValue X = CGF.EmitLValue(XExpr); RValue E; if (EExpr) E = CGF.EmitAnyExpr(EExpr); CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::AtomicOrdering::Monotonic, Loc, [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate( VD, [&CGF, VD, XRValue, Loc]() -> Address { Address LHSTemp = CGF.CreateMemTemp(VD->getType()); CGF.emitOMPSimpleStore( CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, VD->getType().getNonReferenceType(), Loc); return LHSTemp; }); (void)PrivateScope.Privatize(); return CGF.EmitAnyExpr(UpExpr); }); }; if ((*IPriv)->getType()->isArrayType()) { // Emit atomic reduction for array section. auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, AtomicRedGen, XExpr, EExpr, UpExpr); } else // Emit atomic reduction for array subscript or single variable. AtomicRedGen(CGF, XExpr, EExpr, UpExpr); } else { // Emit as a critical region. auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { auto &RT = CGF.CGM.getOpenMPRuntime(); RT.emitCriticalRegion( CGF, ".atomic_reduction", [=](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); emitReductionCombiner(CGF, E); }, Loc); }; if ((*IPriv)->getType()->isArrayType()) { auto *LHSVar = cast(cast(*ILHS)->getDecl()); auto *RHSVar = cast(cast(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, CritRedGen); } else CritRedGen(CGF, nullptr, nullptr, nullptr); } ++ILHS; ++IRHS; ++IPriv; } }; RegionCodeGenTy AtomicRCG(AtomicCodeGen); if (!WithNowait) { // Add emission of __kmpc_end_reduce(, , &); llvm::Value *EndArgs[] = { IdentTLoc, // ident_t * ThreadId, // i32 Lock // kmp_critical_name *& }; CommonActionTy Action(nullptr, llvm::None, createRuntimeFunction(OMPRTL__kmpc_end_reduce), EndArgs); AtomicRCG.setAction(Action); AtomicRCG(CGF); } else AtomicRCG(CGF); CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; // Ignore return result until untied tasks are supported. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnerKind, const RegionCodeGenTy &CodeGen, bool HasCancel) { if (!CGF.HaveInsertPoint()) return; InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } namespace { enum RTCancelKind { CancelNoreq = 0, CancelParallel = 1, CancelLoop = 2, CancelSections = 3, CancelTaskgroup = 4 }; } // anonymous namespace static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { RTCancelKind CancelKind = CancelNoreq; if (CancelRegion == OMPD_parallel) CancelKind = CancelParallel; else if (CancelRegion == OMPD_for) CancelKind = CancelLoop; else if (CancelRegion == OMPD_sections) CancelKind = CancelSections; else { assert(CancelRegion == OMPD_taskgroup); CancelKind = CancelTaskgroup; } return CancelKind; } void CGOpenMPRuntime::emitCancellationPointCall( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) { if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->hasCancel()) { llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. auto *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); // if (__kmpc_cancellationpoint()) { - // __kmpc_cancel_barrier(); // exit from construct; // } auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); auto *ContBB = CGF.createBasicBlock(".cancel.continue"); auto *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); - // __kmpc_cancel_barrier(); - emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; auto CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); } } } void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) { if (!CGF.HaveInsertPoint()) return; // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo)) { auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. auto *Result = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); // if (__kmpc_cancel()) { - // __kmpc_cancel_barrier(); // exit from construct; // } auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); auto *ContBB = CGF.createBasicBlock(".cancel.continue"); auto *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); - // __kmpc_cancel_barrier(); - RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; auto CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &, PrePostActionTy &) {}); else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } } } /// \brief Obtain information that uniquely identifies a target entry. This /// consists of the file and device IDs as well as line number associated with /// the relevant entry source location. static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum) { auto &SM = C.getSourceManager(); // The loc should be always valid and have a file ID (the user cannot use // #pragma directives in macros) assert(Loc.isValid() && "Source location is expected to be always valid."); assert(Loc.isFileID() && "Source location is expected to refer to a file."); PresumedLoc PLoc = SM.getPresumedLoc(Loc); assert(PLoc.isValid() && "Source location is expected to be always valid."); llvm::sys::fs::UniqueID ID; if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) llvm_unreachable("Source file with target region no longer exists!"); DeviceID = ID.getDevice(); FileID = ID.getFile(); LineNum = PLoc.getLine(); } void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { // Create a unique name for the entry function using the source location // information of the current target region. The name will be something like: // // __omp_offloading_DD_FFFF_PP_lBB // // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the // mangled name of the function that encloses the target region and BB is the // line number of the target region. unsigned DeviceID; unsigned FileID; unsigned Line; getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, Line); SmallString<64> EntryFnName; { llvm::raw_svector_ostream OS(EntryFnName); OS << "__omp_offloading" << llvm::format("_%x", DeviceID) << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; } const CapturedStmt &CS = *cast(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); // If this target outline function is not an offload entry, we don't need to // register it. if (!IsOffloadEntry) return; // The target region ID is used by the runtime library to identify the current // target region, so it only has to be unique and not necessarily point to // anything. It could be the pointer to the outlined function that implements // the target region, but we aren't using that so that the compiler doesn't // need to keep that, and could therefore inline the host function if proven // worthwhile during optimization. In the other hand, if emitting code for the // device, the ID has to be the function address so that it can retrieved from // the offloading entry and launched by the runtime library. We also mark the // outlined function to have external linkage in case we are emitting code for // the device, because these functions will be entry points to the device. if (CGM.getLangOpts().OpenMPIsDevice) { OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); } else OutlinedFnID = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, /*Flags=*/0); } /// discard all CompoundStmts intervening between two constructs static const Stmt *ignoreCompoundStmts(const Stmt *Body) { while (auto *CS = dyn_cast_or_null(Body)) Body = CS->body_front(); return Body; } /// \brief Emit the num_teams clause of an enclosed teams directive at the /// target region scope. If there is no teams directive associated with the /// target directive, or if there is no num_teams clause associated with the /// enclosed teams directive, return nullptr. static llvm::Value * emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); // FIXME: For the moment we do not support combined directives with target and // teams, so we do not expect to get any num_teams clause in the provided // directive. Once we support that, this assertion can be replaced by the // actual emission of the clause expression. assert(D.getSingleClause() == nullptr && "Not expecting clause in directive."); // If the current target region has a teams region enclosed, we need to get // the number of teams to pass to the runtime function call. This is done // by generating the expression in a inlined region. This is required because // the expression is captured in the enclosing target environment when the // teams directive is not combined with target. const CapturedStmt &CS = *cast(D.getAssociatedStmt()); // FIXME: Accommodate other combined directives with teams when they become // available. if (auto *TeamsDir = dyn_cast_or_null( ignoreCompoundStmts(CS.getCapturedStmt()))) { if (auto *NTE = TeamsDir->getSingleClause()) { CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, /*IsSigned=*/true); } // If we have an enclosed teams directive but no num_teams clause we use // the default value 0. return CGF.Builder.getInt32(0); } // No teams associated with the directive. return nullptr; } /// \brief Emit the thread_limit clause of an enclosed teams directive at the /// target region scope. If there is no teams directive associated with the /// target directive, or if there is no thread_limit clause associated with the /// enclosed teams directive, return nullptr. static llvm::Value * emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); // FIXME: For the moment we do not support combined directives with target and // teams, so we do not expect to get any thread_limit clause in the provided // directive. Once we support that, this assertion can be replaced by the // actual emission of the clause expression. assert(D.getSingleClause() == nullptr && "Not expecting clause in directive."); // If the current target region has a teams region enclosed, we need to get // the thread limit to pass to the runtime function call. This is done // by generating the expression in a inlined region. This is required because // the expression is captured in the enclosing target environment when the // teams directive is not combined with target. const CapturedStmt &CS = *cast(D.getAssociatedStmt()); // FIXME: Accommodate other combined directives with teams when they become // available. if (auto *TeamsDir = dyn_cast_or_null( ignoreCompoundStmts(CS.getCapturedStmt()))) { if (auto *TLE = TeamsDir->getSingleClause()) { CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); } // If we have an enclosed teams directive but no thread_limit clause we use // the default value 0. return CGF.Builder.getInt32(0); } // No teams associated with the directive. return nullptr; } namespace { // \brief Utility to handle information from clauses associated with a given // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). // It provides a convenient interface to obtain the information and generate // code for that information. class MappableExprsHandler { public: /// \brief Values for bit flags used to specify the mapping type for /// offloading. enum OpenMPOffloadMappingFlags { /// \brief Allocate memory on the device and move data from host to device. OMP_MAP_TO = 0x01, /// \brief Allocate memory on the device and move data from device to host. OMP_MAP_FROM = 0x02, /// \brief Always perform the requested mapping action on the element, even /// if it was already mapped before. OMP_MAP_ALWAYS = 0x04, /// \brief Delete the element from the device environment, ignoring the /// current reference count associated with the element. OMP_MAP_DELETE = 0x08, /// \brief The element being mapped is a pointer, therefore the pointee /// should be mapped as well. OMP_MAP_IS_PTR = 0x10, /// \brief This flags signals that an argument is the first one relating to /// a map/private clause expression. For some cases a single /// map/privatization results in multiple arguments passed to the runtime /// library. OMP_MAP_FIRST_REF = 0x20, /// \brief Signal that the runtime library has to return the device pointer /// in the current position for the data being mapped. OMP_MAP_RETURN_PTR = 0x40, /// \brief This flag signals that the reference being passed is a pointer to /// private data. OMP_MAP_PRIVATE_PTR = 0x80, /// \brief Pass the element to the device by value. OMP_MAP_PRIVATE_VAL = 0x100, }; /// Class that associates information with a base pointer to be passed to the /// runtime library. class BasePointerInfo { /// The base pointer. llvm::Value *Ptr = nullptr; /// The base declaration that refers to this device pointer, or null if /// there is none. const ValueDecl *DevPtrDecl = nullptr; public: BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} llvm::Value *operator*() const { return Ptr; } const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } }; typedef SmallVector MapBaseValuesArrayTy; typedef SmallVector MapValuesArrayTy; typedef SmallVector MapFlagsArrayTy; private: /// \brief Directive from where the map clauses were extracted. const OMPExecutableDirective &CurDir; /// \brief Function the directive is being generated for. CodeGenFunction &CGF; /// \brief Set of all first private variables in the current directive. llvm::SmallPtrSet FirstPrivateDecls; /// Map between device pointer declarations and their expression components. /// The key value for declarations in 'this' is null. llvm::DenseMap< const ValueDecl *, SmallVector> DevPointersMap; llvm::Value *getExprTypeSize(const Expr *E) const { auto ExprTy = E->getType().getCanonicalType(); // Reference types are ignored for mapping purposes. if (auto *RefTy = ExprTy->getAs()) ExprTy = RefTy->getPointeeType().getCanonicalType(); // Given that an array section is considered a built-in type, we need to // do the calculation based on the length of the section instead of relying // on CGF.getTypeSize(E->getType()). if (const auto *OAE = dyn_cast(E)) { QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); // If there is no length associated with the expression, that means we // are using the whole length of the base. if (!OAE->getLength() && OAE->getColonLoc().isValid()) return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; if (auto *PTy = BaseTy->getAs()) ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); else { auto *ATy = cast(BaseTy.getTypePtr()); assert(ATy && "Expecting array type if not a pointer type."); ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); } // If we don't have a length at this point, that is because we have an // array section with a single element. if (!OAE->getLength()) return ElemSize; auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); LengthVal = CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); } return CGF.getTypeSize(ExprTy); } /// \brief Return the corresponding bits for a given map clause modifier. Add /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. unsigned getMapTypeBits(OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, bool AddIsFirstFlag) const { unsigned Bits = 0u; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: // alloc and release is the default behavior in the runtime library, i.e. // if we don't pass any bits alloc/release that is what the runtime is // going to do. Therefore, we don't need to signal anything for these two // type modifiers. break; case OMPC_MAP_to: Bits = OMP_MAP_TO; break; case OMPC_MAP_from: Bits = OMP_MAP_FROM; break; case OMPC_MAP_tofrom: Bits = OMP_MAP_TO | OMP_MAP_FROM; break; case OMPC_MAP_delete: Bits = OMP_MAP_DELETE; break; default: llvm_unreachable("Unexpected map type!"); break; } if (AddPtrFlag) Bits |= OMP_MAP_IS_PTR; if (AddIsFirstFlag) Bits |= OMP_MAP_FIRST_REF; if (MapTypeModifier == OMPC_MAP_always) Bits |= OMP_MAP_ALWAYS; return Bits; } /// \brief Return true if the provided expression is a final array section. A /// final array section, is one whose length can't be proved to be one. bool isFinalArraySectionExpression(const Expr *E) const { auto *OASE = dyn_cast(E); // It is not an array section and therefore not a unity-size one. if (!OASE) return false; // An array section with no colon always refer to a single element. if (OASE->getColonLoc().isInvalid()) return false; auto *Length = OASE->getLength(); // If we don't have a length we have to check if the array has size 1 // for this dimension. Also, we should always expect a length if the // base type is pointer. if (!Length) { auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( OASE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); if (auto *ATy = dyn_cast(BaseQTy.getTypePtr())) return ATy->getSize().getSExtValue() != 1; // If we don't have a constant dimension length, we have to consider // the current section as having any size, so it is not necessarily // unitary. If it happen to be unity size, that's user fault. return true; } // Check if the length evaluates to 1. llvm::APSInt ConstLength; if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) return true; // Can have more that size 1. return ConstLength.getSExtValue() != 1; } /// \brief Generate the base pointers, section pointers, sizes and map type /// bits for the provided map type, map modifier, and expression components. /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, bool IsFirstComponentList) const { // The following summarizes what has to be generated for each map and the // types bellow. The generated information is expressed in this order: // base pointer, section pointer, size, flags // (to add to the ones that come from the map type and modifier). // // double d; // int i[100]; // float *p; // // struct S1 { // int i; // float f[50]; // } // struct S2 { // int i; // float f[50]; // S1 s; // double *p; // struct S2 *ps; // } // S2 s; // S2 *ps; // // map(d) // &d, &d, sizeof(double), noflags // // map(i) // &i, &i, 100*sizeof(int), noflags // // map(i[1:23]) // &i(=&i[0]), &i[1], 23*sizeof(int), noflags // // map(p) // &p, &p, sizeof(float*), noflags // // map(p[1:24]) // p, &p[1], 24*sizeof(float), noflags // // map(s) // &s, &s, sizeof(S2), noflags // // map(s.i) // &s, &(s.i), sizeof(int), noflags // // map(s.s.f) // &s, &(s.i.f), 50*sizeof(int), noflags // // map(s.p) // &s, &(s.p), sizeof(double*), noflags // // map(s.p[:22], s.a s.b) // &s, &(s.p), sizeof(double*), noflags // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag // // map(s.ps) // &s, &(s.ps), sizeof(S2*), noflags // // map(s.ps->s.i) // &s, &(s.ps), sizeof(S2*), noflags // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag // // map(s.ps->ps) // &s, &(s.ps), sizeof(S2*), noflags // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag // // map(s.ps->ps->ps) // &s, &(s.ps), sizeof(S2*), noflags // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag // // map(s.ps->ps->s.f[:22]) // &s, &(s.ps), sizeof(S2*), noflags // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag // // map(ps) // &ps, &ps, sizeof(S2*), noflags // // map(ps->i) // ps, &(ps->i), sizeof(int), noflags // // map(ps->s.f) // ps, &(ps->s.f[0]), 50*sizeof(float), noflags // // map(ps->p) // ps, &(ps->p), sizeof(double*), noflags // // map(ps->p[:22]) // ps, &(ps->p), sizeof(double*), noflags // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag // // map(ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags // // map(ps->ps->s.i) // ps, &(ps->ps), sizeof(S2*), noflags // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag // // map(ps->ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag // // map(ps->ps->ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag // // map(ps->ps->ps->s.f[:22]) // ps, &(ps->ps), sizeof(S2*), noflags // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + // extra_flag // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; // Scan the components from the base to the complete expression. auto CI = Components.rbegin(); auto CE = Components.rend(); auto I = CI; // Track if the map information being generated is the first for a list of // components. bool IsExpressionFirstInfo = true; llvm::Value *BP = nullptr; if (auto *ME = dyn_cast(I->getAssociatedExpression())) { // The base is the 'this' pointer. The content of the pointer is going // to be the base of the field being mapped. BP = CGF.EmitScalarExpr(ME->getBase()); } else { // The base is the reference to the variable. // BP = &Var. BP = CGF.EmitLValue(cast(I->getAssociatedExpression())) .getPointer(); // If the variable is a pointer and is being dereferenced (i.e. is not // the last component), the base has to be the pointer itself, not its // reference. References are ignored for mapping purposes. QualType Ty = I->getAssociatedDeclaration()->getType().getNonReferenceType(); if (Ty->isAnyPointerType() && std::next(I) != CE) { auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), Ty->castAs()) .getPointer(); // We do not need to generate individual map information for the // pointer, it can be associated with the combined storage. ++I; } } for (; I != CE; ++I) { auto Next = std::next(I); // We need to generate the addresses and sizes if this is the last // component, if the component is a pointer or if it is an array section // whose length can't be proved to be one. If this is a pointer, it // becomes the base address for the following components. // A final array section, is one whose length can't be proved to be one. bool IsFinalArraySection = isFinalArraySectionExpression(I->getAssociatedExpression()); // Get information on whether the element is a pointer. Have to do a // special treatment for array sections given that they are built-in // types. const auto *OASE = dyn_cast(I->getAssociatedExpression()); bool IsPointer = (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) .getCanonicalType() ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); if (Next == CE || IsPointer || IsFinalArraySection) { // If this is not the last component, we expect the pointer to be // associated with an array expression or member expression. assert((Next == CE || isa(Next->getAssociatedExpression()) || isa(Next->getAssociatedExpression()) || isa(Next->getAssociatedExpression())) && "Unexpected expression"); auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); auto *Size = getExprTypeSize(I->getAssociatedExpression()); // If we have a member expression and the current component is a // reference, we have to map the reference too. Whenever we have a // reference, the section that reference refers to is going to be a // load instruction from the storage assigned to the reference. if (isa(I->getAssociatedExpression()) && I->getAssociatedDeclaration()->getType()->isReferenceType()) { auto *LI = cast(LB); auto *RefAddr = LI->getPointerOperand(); BasePointers.push_back(BP); Pointers.push_back(RefAddr); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); Types.push_back(getMapTypeBits( /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, !IsExpressionFirstInfo, IsCaptureFirstInfo)); IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; // The reference will be the next base address. BP = RefAddr; } BasePointers.push_back(BP); Pointers.push_back(LB); Sizes.push_back(Size); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). Types.push_back(getMapTypeBits(MapType, MapTypeModifier, !IsExpressionFirstInfo, IsCaptureFirstInfo)); // If we have a final array section, we are done with this expression. if (IsFinalArraySection) break; // The pointer becomes the base for the next element. if (Next != CE) BP = LB; IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; continue; } } } /// \brief Return the adjusted map modifiers if the declaration a capture /// refers to appears in a first-private clause. This is expected to be used /// only with directives that start with 'target'. unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, unsigned CurrentModifiers) { assert(Cap.capturesVariable() && "Expected capture by reference only!"); // A first private variable captured by reference will use only the // 'private ptr' and 'map to' flag. Return the right flags if the captured // declaration is known as first-private in this handler. if (FirstPrivateDecls.count(Cap.getCapturedVar())) return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | MappableExprsHandler::OMP_MAP_TO; // We didn't modify anything. return CurrentModifiers; } public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) : CurDir(Dir), CGF(CGF) { // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind()) for (const auto *D : C->varlists()) FirstPrivateDecls.insert( cast(cast(D)->getDecl())->getCanonicalDecl()); // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind()) for (auto L : C->component_lists()) DevPointersMap[L.first].push_back(L.second); } /// \brief Generate all the base pointers, section pointers, sizes and map /// types for the extracted mappable expressions. Also, for each item that /// relates with a device pointer, a pair of the relevant declaration and /// index where it occurs is appended to the device pointers info array. void generateAllInfo(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { BasePointers.clear(); Pointers.clear(); Sizes.clear(); Types.clear(); struct MapInfo { /// Kind that defines how a device pointer has to be returned. enum ReturnPointerKind { // Don't have to return any pointer. RPK_None, // Pointer is the base of the declaration. RPK_Base, // Pointer is a member of the base declaration - 'this' RPK_Member, // Pointer is a reference and a member of the base declaration - 'this' RPK_MemberReference, }; OMPClauseMappableExprCommon::MappableExprComponentListRef Components; OpenMPMapClauseKind MapType; OpenMPMapClauseKind MapTypeModifier; ReturnPointerKind ReturnDevicePointer; MapInfo() : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), ReturnDevicePointer(RPK_None) {} MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, ReturnPointerKind ReturnDevicePointer) : Components(Components), MapType(MapType), MapTypeModifier(MapTypeModifier), ReturnDevicePointer(ReturnDevicePointer) {} }; // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. llvm::DenseMap> Info; // Helper function to fill the information map for the different supported // clauses. auto &&InfoGen = [&Info]( const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, MapInfo::ReturnPointerKind ReturnDevicePointer) { const ValueDecl *VD = D ? cast(D->getCanonicalDecl()) : nullptr; Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (auto *C : this->CurDir.getClausesOfKind()) for (auto L : C->component_lists()) InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), MapInfo::RPK_None); for (auto *C : this->CurDir.getClausesOfKind()) for (auto L : C->component_lists()) InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, MapInfo::RPK_None); for (auto *C : this->CurDir.getClausesOfKind()) for (auto L : C->component_lists()) InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, MapInfo::RPK_None); // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the // use_device_ptr list, we create one with map type 'alloc' and zero size // section. It is the user fault if that was not mapped before. // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (auto *C : this->CurDir.getClausesOfKind()) for (auto L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); VD = cast(VD->getCanonicalDecl()); auto *IE = L.second.back().getAssociatedExpression(); // If the first component is a member expression, we have to look into // 'this', which maps to null in the map of map information. Otherwise // look directly for the information. auto It = Info.find(isa(IE) ? nullptr : VD); // We potentially have map information for this declaration already. // Look for the first set of components that refer to it. if (It != Info.end()) { auto CI = std::find_if( It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { return MI.Components.back().getAssociatedDeclaration() == VD; }); // If we found a map entry, signal that the pointer has to be returned // and move on to the next declaration. if (CI != It->second.end()) { CI->ReturnDevicePointer = isa(IE) ? (VD->getType()->isReferenceType() ? MapInfo::RPK_MemberReference : MapInfo::RPK_Member) : MapInfo::RPK_Base; continue; } } // We didn't find any match in our map information - generate a zero // size array section. // FIXME: MSVC 2013 seems to require this-> to find member CGF. llvm::Value *Ptr = this->CGF .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) .getScalarVal(); BasePointers.push_back({Ptr, VD}); Pointers.push_back(Ptr); Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); } for (auto &M : Info) { // We need to know when we generate information for the first component // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; for (MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); // Remember the current base pointer index. unsigned CurrentBasePointersIdx = BasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, Sizes, Types, IsFirstComponentList); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. if (IsFirstComponentList && L.ReturnDevicePointer != MapInfo::RPK_None) { // If the pointer is not the base of the map, we need to skip the // base. If it is a reference in a member field, we also need to skip // the map of the reference. if (L.ReturnDevicePointer != MapInfo::RPK_Base) { ++CurrentBasePointersIdx; if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) ++CurrentBasePointersIdx; } assert(BasePointers.size() > CurrentBasePointersIdx && "Unexpected number of mapped base pointers."); auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); assert(RelevantVD && "No relevant declaration related with device pointer??"); BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; } IsFirstComponentList = false; } } } /// \brief Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { assert(!Cap->capturesVariableArrayType() && "Not expecting to generate map info for a variable array type!"); BasePointers.clear(); Pointers.clear(); Sizes.clear(); Types.clear(); // We need to know when we generating information for the first component // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; const ValueDecl *VD = Cap->capturesThis() ? nullptr : cast(Cap->getCapturedVar()->getCanonicalDecl()); // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just // pass its value, otherwise, if it is a member expression, we need to map // 'to' the field. if (!VD) { auto It = DevPointersMap.find(VD); if (It != DevPointersMap.end()) { for (auto L : It->second) { generateInfoForComponentList( /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, BasePointers, Pointers, Sizes, Types, IsFirstComponentList); IsFirstComponentList = false; } return; } } else if (DevPointersMap.count(VD)) { BasePointers.push_back({Arg, VD}); Pointers.push_back(Arg); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); return; } // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (auto *C : this->CurDir.getClausesOfKind()) for (auto L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, Pointers, Sizes, Types, IsFirstComponentList); IsFirstComponentList = false; } return; } /// \brief Generate the default map information for a given capture \a CI, /// record field declaration \a RI and captured value \a CV. void generateDefaultMapInfo(const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, MapBaseValuesArrayTy &CurBasePointers, MapValuesArrayTy &CurPointers, MapValuesArrayTy &CurSizes, MapFlagsArrayTy &CurMapTypes) { // Do the default mapping. if (CI.capturesThis()) { CurBasePointers.push_back(CV); CurPointers.push_back(CV); const PointerType *PtrTy = cast(RI.getType().getTypePtr()); CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); // Default map type. CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); } else if (CI.capturesVariableByCopy()) { CurBasePointers.push_back(CV); CurPointers.push_back(CV); if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); CurSizes.push_back(CGF.getTypeSize(RI.getType())); } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). CurMapTypes.push_back(0u); CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); } } else { assert(CI.capturesVariable() && "Expected captured reference."); CurBasePointers.push_back(CV); CurPointers.push_back(CV); const ReferenceType *PtrTy = cast(RI.getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); CurSizes.push_back(CGF.getTypeSize(ElementType)); // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. CurMapTypes.push_back(ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) : OMP_MAP_TO); // If we have a capture by reference we may need to add the private // pointer flag if the base declaration shows in some first-private // clause. CurMapTypes.back() = adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); } // Every default map produces a single argument, so, it is always the // first one. CurMapTypes.back() |= OMP_MAP_FIRST_REF; } }; enum OpenMPOffloadingReservedDeviceIDs { /// \brief Device ID if the device was not defined, runtime should get it /// from environment variables in the spec. OMP_DEVICEID_UNDEF = -1, }; } // anonymous namespace /// \brief Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info) { auto &CGM = CGF.CGM; auto &Ctx = CGF.getContext(); // Reset the array information. Info.clearArrayInfo(); Info.NumberOfPtrs = BasePointers.size(); if (Info.NumberOfPtrs) { // Detect if we have any capture size requiring runtime evaluation of the // size so that a constant array could be eventually used. bool hasRuntimeEvaluationCaptureSize = false; for (auto *S : Sizes) if (!isa(S)) { hasRuntimeEvaluationCaptureSize = true; break; } llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); QualType PointerArrayType = Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); Info.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); Info.PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); // If we don't have any VLA types or other types that require runtime // evaluation, we can use a constant array for the map sizes, otherwise we // need to fill up the arrays as we do for the pointers. if (hasRuntimeEvaluationCaptureSize) { QualType SizeArrayType = Ctx.getConstantArrayType( Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector ConstSizes; for (auto S : Sizes) ConstSizes.push_back(cast(S)); auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); auto *SizesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, SizesArrayInit, ".offload_sizes"); SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.SizesArray = SizesArrayGbl; } // The map types are always constant so we don't need to generate code to // fill arrays. Instead, we create an array constant. llvm::Constant *MapTypesArrayInit = llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); auto *MapTypesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), MapTypesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapTypesArrayInit, ".offload_maptypes"); MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.MapTypesArray = MapTypesArrayGbl; for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { llvm::Value *BPVal = *BasePointers[i]; if (BPVal->getType()->isPointerTy()) BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); else { assert(BPVal->getType()->isIntegerTy() && "If not a pointer, the value type must be an integer."); BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.BasePointersArray, 0, i); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); if (Info.requiresDevicePointerInfo()) if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); llvm::Value *PVal = Pointers[i]; if (PVal->getType()->isPointerTy()) PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); else { assert(PVal->getType()->isIntegerTy() && "If not a pointer, the value type must be an integer."); PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.PointersArray, 0, i); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); if (hasRuntimeEvaluationCaptureSize) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/i); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); CGF.Builder.CreateStore( CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), SAddr); } } } } /// \brief Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { auto &CGM = CGF.CGM; if (Info.NumberOfPtrs) { BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.BasePointersArray, /*Idx0=*/0, /*Idx1=*/0); PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.PointersArray, /*Idx0=*/0, /*Idx1=*/0); SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), Info.MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); } else { BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); MapTypesArrayArg = llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); } } void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef CapturedVars) { if (!CGF.HaveInsertPoint()) return; assert(OutlinedFn && "Invalid outlined function!"); auto &Ctx = CGF.getContext(); // Fill up the arrays with all the captured variables. MappableExprsHandler::MapValuesArrayTy KernelArgs; MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; MappableExprsHandler::MapValuesArrayTy CurPointers; MappableExprsHandler::MapValuesArrayTy CurSizes; MappableExprsHandler::MapFlagsArrayTy CurMapTypes; // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); const CapturedStmt &CS = *cast(D.getAssociatedStmt()); auto RI = CS.getCapturedRecordDecl()->field_begin(); auto CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { StringRef Name; QualType Ty; CurBasePointers.clear(); CurPointers.clear(); CurSizes.clear(); CurMapTypes.clear(); // VLA sizes are passed to the outlined region by copy and do not have map // information associated. if (CI->capturesVariableArrayType()) { CurBasePointers.push_back(*CV); CurPointers.push_back(*CV); CurSizes.push_back(CGF.getTypeSize(RI->getType())); // Copy to the device as an argument. No need to retrieve it. CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | MappableExprsHandler::OMP_MAP_FIRST_REF); } else { // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); } // We expect to have at least an element of information for this capture. assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); assert(CurBasePointers.size() == CurPointers.size() && CurBasePointers.size() == CurSizes.size() && CurBasePointers.size() == CurMapTypes.size() && "Inconsistent map information sizes!"); // The kernel args are always the first elements of the base pointers // associated with a capture. KernelArgs.push_back(*CurBasePointers.front()); // We need to append the results of this capture to what we already have. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); Pointers.append(CurPointers.begin(), CurPointers.end()); Sizes.append(CurSizes.begin(), CurSizes.end()); MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } // Keep track on whether the host function has to be executed. auto OffloadErrorQType = Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); auto OffloadError = CGF.MakeAddrLValue( CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), OffloadErrorQType); CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), OffloadError); // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, OutlinedFnID, OffloadError, OffloadErrorQType, &D](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. TargetDataInfo Info; emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, Info); // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target // region, so it only has to be unique and not necessarily point to // anything. It could be the pointer to the outlined function that // implements the target region, but we aren't using that so that the // compiler doesn't need to keep that, and could therefore inline the host // function if proven worthwhile during optimization. // From this point on, we need to have an ID of the target region defined. assert(OutlinedFnID && "Invalid outlined function ID!"); // Emit device ID if any. llvm::Value *DeviceID; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Emit the number of elements in the offloading arrays. llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); // Return value of the runtime offloading call. llvm::Value *Return; auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); // If we have NumTeams defined this means that we have an enclosed teams // region. Therefore we also expect to have ThreadLimit defined. These two // values should be defined in the presence of a teams directive, regardless // of having any clauses associated. If the user is using teams but no // clauses, these two values will be the default that should be passed to // the runtime library - a 32-bit integer with the value zero. if (NumTeams) { assert(ThreadLimit && "Thread limit expression should be available along " "with number of teams."); llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, NumTeams, ThreadLimit}; Return = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), OffloadingArgs); } CGF.EmitStoreOfScalar(Return, OffloadError); }; // Notify that the host version must be executed. auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), OffloadError); }; // If we have a target function ID it means that we need to support // offloading, otherwise, just execute on the host. We need to execute on host // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } } else { RegionCodeGenTy ElseRCG(ElseGen); ElseRCG(CGF); } // Check the error code and execute the host version if required. auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); CGF.Builder.CreateCall(OutlinedFn, KernelArgs); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName) { if (!S) return; // If we find a OMP target directive, codegen the outline function and // register the result. // FIXME: Add other directives with target when they become supported. bool isTargetDirective = isa(S); if (isTargetDirective) { auto *E = cast(S); unsigned DeviceID; unsigned FileID; unsigned Line; getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, FileID, Line); // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, Line)) return; llvm::Function *Fn; llvm::Constant *Addr; std::tie(Fn, Addr) = CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( CGM, cast(*E), ParentName, /*isOffloadEntry=*/true); assert(Fn && Addr && "Target region emission failed."); return; } if (const OMPExecutableDirective *E = dyn_cast(S)) { if (!E->hasAssociatedStmt()) return; scanForTargetRegionsFunctions( cast(E->getAssociatedStmt())->getCapturedStmt(), ParentName); return; } // If this is a lambda function, look into its body. if (auto *L = dyn_cast(S)) S = L->getBody(); // Keep looking for target regions recursively. for (auto *II : S->children()) scanForTargetRegionsFunctions(II, ParentName); } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { auto &FD = *cast(GD.getDecl()); // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. if (!CGM.getLangOpts().OpenMPIsDevice) return false; // Try to detect target regions in the function. scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); // We should not emit any function other that the ones created during the // scanning. Therefore, we signal that this function is completely dealt // with. return true; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice) return false; // Check if there are Ctors/Dtors in this declaration and look for target // regions in it. We use the complete variant to produce the kernel name // mangling. QualType RDTy = cast(GD.getDecl())->getType(); if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { for (auto *Ctor : RD->ctors()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); } auto *Dtor = RD->getDestructor(); if (Dtor) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); } } // If we are in target mode we do not emit any global (declare target is not // implemented yet). Therefore we signal that GD was processed in this case. return true; } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { auto *VD = GD.getDecl(); if (isa(VD)) return emitTargetFunctions(GD); return emitTargetGlobalVariable(GD); } llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. createOffloadEntriesAndInfoMetadata(); // Create and register the offloading binary descriptors. This is the main // entity that captures all the information about offloading in the current // compilation unit. return createOffloadingBinaryDescriptorRegistration(); } void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef CapturedVars) { if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); CodeGenFunction::RunCleanupsScope Scope(CGF); // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; llvm::SmallVector RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); CGF.EmitRuntimeCall(RTLFn, RealArgs); } void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *NumTeamsVal = (NumTeams) ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); llvm::Value *ThreadLimitVal = (ThreadLimit) ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, ThreadLimitVal}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), PushNumTeamsArgs); } void CGOpenMPRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { if (!CGF.HaveInsertPoint()) return; // Action used to replace the default codegen action and turn privatization // off. PrePostActionTy NoPrivAction; // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction]( CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; // Get map clause information. MappableExprsHandler MCHandler(D, CGF); MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); // Fill up the arrays and create the arguments. emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; auto &RT = CGF.CGM.getOpenMPRuntime(); CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), OffloadingArgs); // If device pointer privatization is required, emit the body of the region // here. It will have to be duplicated: with and without privatization. if (!Info.CaptureDeviceAddrMap.empty()) CodeGen(CGF); }; // Generate code for the closing of the data region. auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; auto &RT = CGF.CGM.getOpenMPRuntime(); CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), OffloadingArgs); }; // If we need device pointer privatization, we need to emit the body of the // region with no privatization in the 'else' branch of the conditional. // Otherwise, we don't have to do anything. auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, PrePostActionTy &) { if (!Info.CaptureDeviceAddrMap.empty()) { CodeGen.setAction(NoPrivAction); CodeGen(CGF); } }; // We don't have to do anything to close the region if the if clause evaluates // to false. auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; if (IfCond) { emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); } else { RegionCodeGenTy RCG(BeginThenGen); RCG(CGF); } // If we don't require privatization of device pointers, we emit the body in // between the runtime calls. This avoids duplicating the body code. if (Info.CaptureDeviceAddrMap.empty()) { CodeGen.setAction(NoPrivAction); CodeGen(CGF); } if (IfCond) { emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); } else { RegionCodeGenTy RCG(EndThenGen); RCG(CGF); } } void CGOpenMPRuntime::emitTargetDataStandAloneCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) { if (!CGF.HaveInsertPoint()) return; assert((isa(D) || isa(D) || isa(D)) && "Expecting either target enter, exit data, or update directives."); // Generate the code for the opening of the data environment. auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; MappableExprsHandler::MapValuesArrayTy Sizes; MappableExprsHandler::MapFlagsArrayTy MapTypes; // Get map clause information. MappableExprsHandler MEHandler(D, CGF); MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); // Fill up the arrays and create the arguments. TargetDataInfo Info; emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), CGF.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone // directive. OpenMPRTLFunction RTLFn; switch (D.getDirectiveKind()) { default: llvm_unreachable("Unexpected standalone target data directive."); break; case OMPD_target_enter_data: RTLFn = OMPRTL__tgt_target_data_begin; break; case OMPD_target_exit_data: RTLFn = OMPRTL__tgt_target_data_end; break; case OMPD_target_update: RTLFn = OMPRTL__tgt_target_data_update; break; } CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); }; // In the event we get an if clause, we don't have to take any action on the // else side. auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); } else { RegionCodeGenTy ThenGenRCG(ThenGen); ThenGenRCG(CGF); } } namespace { /// Kind of parameter in a function with 'declare simd' directive. enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; /// Attribute set of the parameter. struct ParamAttrTy { ParamKindTy Kind = Vector; llvm::APSInt StrideOrArg; llvm::APSInt Alignment; }; } // namespace static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef ParamAttrs) { // Every vector variant of a SIMD-enabled function has a vector length (VLEN). // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument // of that clause. The VLEN value must be power of 2. // In other case the notion of the function`s "characteristic data type" (CDT) // is used to compute the vector length. // CDT is defined in the following order: // a) For non-void function, the CDT is the return type. // b) If the function has any non-uniform, non-linear parameters, then the // CDT is the type of the first such parameter. // c) If the CDT determined by a) or b) above is struct, union, or class // type which is pass-by-value (except for the type that maps to the // built-in complex data type), the characteristic data type is int. // d) If none of the above three cases is applicable, the CDT is int. // The VLEN is then determined based on the CDT and the size of vector // register of that ISA for which current vector version is generated. The // VLEN is computed using the formula below: // VLEN = sizeof(vector_register) / sizeof(CDT), // where vector register size specified in section 3.2.1 Registers and the // Stack Frame of original AMD64 ABI document. QualType RetType = FD->getReturnType(); if (RetType.isNull()) return 0; ASTContext &C = FD->getASTContext(); QualType CDT; if (!RetType.isNull() && !RetType->isVoidType()) CDT = RetType; else { unsigned Offset = 0; if (auto *MD = dyn_cast(FD)) { if (ParamAttrs[Offset].Kind == Vector) CDT = C.getPointerType(C.getRecordType(MD->getParent())); ++Offset; } if (CDT.isNull()) { for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { if (ParamAttrs[I + Offset].Kind == Vector) { CDT = FD->getParamDecl(I)->getType(); break; } } } } if (CDT.isNull()) CDT = C.IntTy; CDT = CDT->getCanonicalTypeUnqualified(); if (CDT->isRecordType() || CDT->isUnionType()) CDT = C.IntTy; return C.getTypeSize(CDT); } static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State) { struct ISADataTy { char ISA; unsigned VecRegSize; }; ISADataTy ISAData[] = { { 'b', 128 }, // SSE { 'c', 256 }, // AVX { 'd', 256 }, // AVX2 { 'e', 512 }, // AVX512 }; llvm::SmallVector Masked; switch (State) { case OMPDeclareSimdDeclAttr::BS_Undefined: Masked.push_back('N'); Masked.push_back('M'); break; case OMPDeclareSimdDeclAttr::BS_Notinbranch: Masked.push_back('N'); break; case OMPDeclareSimdDeclAttr::BS_Inbranch: Masked.push_back('M'); break; } for (auto Mask : Masked) { for (auto &Data : ISAData) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << "_ZGV" << Data.ISA << Mask; if (!VLENVal) { Out << llvm::APSInt::getUnsigned(Data.VecRegSize / evaluateCDTSize(FD, ParamAttrs)); } else Out << VLENVal; for (auto &ParamAttr : ParamAttrs) { switch (ParamAttr.Kind){ case LinearWithVarStride: Out << 's' << ParamAttr.StrideOrArg; break; case Linear: Out << 'l'; if (!!ParamAttr.StrideOrArg) Out << ParamAttr.StrideOrArg; break; case Uniform: Out << 'u'; break; case Vector: Out << 'v'; break; } if (!!ParamAttr.Alignment) Out << 'a' << ParamAttr.Alignment; } Out << '_' << Fn->getName(); Fn->addFnAttr(Out.str()); } } } void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); FD = FD->getCanonicalDecl(); // Map params to their positions in function decl. llvm::DenseMap ParamPositions; if (isa(FD)) ParamPositions.insert({FD, 0}); unsigned ParamPos = ParamPositions.size(); for (auto *P : FD->parameters()) { ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); ++ParamPos; } for (auto *Attr : FD->specific_attrs()) { llvm::SmallVector ParamAttrs(ParamPositions.size()); // Mark uniform parameters. for (auto *E : Attr->uniforms()) { E = E->IgnoreParenImpCasts(); unsigned Pos; if (isa(E)) Pos = ParamPositions[FD]; else { auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); Pos = ParamPositions[PVD]; } ParamAttrs[Pos].Kind = Uniform; } // Get alignment info. auto NI = Attr->alignments_begin(); for (auto *E : Attr->aligneds()) { E = E->IgnoreParenImpCasts(); unsigned Pos; QualType ParmTy; if (isa(E)) { Pos = ParamPositions[FD]; ParmTy = E->getType(); } else { auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); Pos = ParamPositions[PVD]; ParmTy = PVD->getType(); } ParamAttrs[Pos].Alignment = (*NI) ? (*NI)->EvaluateKnownConstInt(C) : llvm::APSInt::getUnsigned( C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) .getQuantity()); ++NI; } // Mark linear parameters. auto SI = Attr->steps_begin(); auto MI = Attr->modifiers_begin(); for (auto *E : Attr->linears()) { E = E->IgnoreParenImpCasts(); unsigned Pos; if (isa(E)) Pos = ParamPositions[FD]; else { auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); Pos = ParamPositions[PVD]; } auto &ParamAttr = ParamAttrs[Pos]; ParamAttr.Kind = Linear; if (*SI) { if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, Expr::SE_AllowSideEffects)) { if (auto *DRE = cast((*SI)->IgnoreParenImpCasts())) { if (auto *StridePVD = cast(DRE->getDecl())) { ParamAttr.Kind = LinearWithVarStride; ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( ParamPositions[StridePVD->getCanonicalDecl()]); } } } } ++SI; ++MI; } llvm::APSInt VLENVal; if (const Expr *VLEN = Attr->getSimdlen()) VLENVal = VLEN->EvaluateKnownConstInt(C); OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); if (CGM.getTriple().getArch() == llvm::Triple::x86 || CGM.getTriple().getArch() == llvm::Triple::x86_64) emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } } namespace { /// Cleanup action for doacross support. class DoacrossCleanupTy final : public EHScopeStack::Cleanup { public: static const int DoacrossFinArgs = 2; private: llvm::Value *RTLFn; llvm::Value *Args[DoacrossFinArgs]; public: DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef CallArgs) : RTLFn(RTLFn) { assert(CallArgs.size() == DoacrossFinArgs); std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); } void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { if (!CGF.HaveInsertPoint()) return; CGF.EmitRuntimeCall(RTLFn, Args); } }; } // namespace void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D) { if (!CGF.HaveInsertPoint()) return; ASTContext &C = CGM.getContext(); QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); RecordDecl *RD; if (KmpDimTy.isNull()) { // Build struct kmp_dim { // loop bounds info casted to kmp_int64 // kmp_int64 lo; // lower // kmp_int64 up; // upper // kmp_int64 st; // stride // }; RD = C.buildImplicitRecord("kmp_dim"); RD->startDefinition(); addFieldToRecordDecl(C, RD, Int64Ty); addFieldToRecordDecl(C, RD, Int64Ty); addFieldToRecordDecl(C, RD, Int64Ty); RD->completeDefinition(); KmpDimTy = C.getRecordType(RD); } else RD = cast(KmpDimTy->getAsTagDecl()); Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); CGF.EmitNullInitialization(DimsAddr, KmpDimTy); enum { LowerFD = 0, UpperFD, StrideFD }; // Fill dims with data. LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); // dims.upper = num_iterations; LValue UpperLVal = CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); llvm::Value *NumIterVal = CGF.EmitScalarConversion( CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), Int64Ty, D.getNumIterations()->getExprLoc()); CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); // dims.stride = 1; LValue StrideLVal = CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), StrideLVal); // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, // kmp_int32 num_dims, struct kmp_dim * dims); llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), getThreadID(CGF, D.getLocStart()), llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( DimsAddr.getPointer(), CGM.VoidPtrTy)}; llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); CGF.EmitRuntimeCall(RTLFn, Args); llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); } void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) { QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); const Expr *CounterVal = C->getCounterValue(); assert(CounterVal); llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, CounterVal->getExprLoc()); Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), getThreadID(CGF, C->getLocStart()), CntAddr.getPointer()}; llvm::Value *RTLFn; if (C->getDependencyKind() == OMPC_DEPEND_source) RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); else { assert(C->getDependencyKind() == OMPC_DEPEND_sink); RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); } CGF.EmitRuntimeCall(RTLFn, Args); } Index: projects/clang400-import/contrib/llvm/tools/clang/lib/Driver/Tools.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/Driver/Tools.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/Driver/Tools.cpp (revision 314177) @@ -1,12222 +1,12226 @@ //===--- Tools.cpp - Tools Implementations ----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "Tools.h" #include "InputInfo.h" #include "ToolChains.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Job.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compression.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/YAMLParser.h" #ifdef LLVM_ON_UNIX #include // For getuid(). #endif using namespace clang::driver; using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; static void handleTargetFeaturesGroup(const ArgList &Args, std::vector &Features, OptSpecifier Group) { for (const Arg *A : Args.filtered(Group)) { StringRef Name = A->getOption().getName(); A->claim(); // Skip over "-m". assert(Name.startswith("m") && "Invalid feature name."); Name = Name.substr(1); bool IsNegative = Name.startswith("no-"); if (IsNegative) Name = Name.substr(3); Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name)); } } static const char *getSparcAsmModeForCPU(StringRef Name, const llvm::Triple &Triple) { if (Triple.getArch() == llvm::Triple::sparcv9) { return llvm::StringSwitch(Name) .Case("niagara", "-Av9b") .Case("niagara2", "-Av9b") .Case("niagara3", "-Av9d") .Case("niagara4", "-Av9d") .Default("-Av9"); } else { return llvm::StringSwitch(Name) .Case("v8", "-Av8") .Case("supersparc", "-Av8") .Case("sparclite", "-Asparclite") .Case("f934", "-Asparclite") .Case("hypersparc", "-Av8") .Case("sparclite86x", "-Asparclite") .Case("sparclet", "-Asparclet") .Case("tsc701", "-Asparclet") .Case("v9", "-Av8plus") .Case("ultrasparc", "-Av8plus") .Case("ultrasparc3", "-Av8plus") .Case("niagara", "-Av8plusb") .Case("niagara2", "-Av8plusb") .Case("niagara3", "-Av8plusd") .Case("niagara4", "-Av8plusd") .Case("leon2", "-Av8") .Case("at697e", "-Av8") .Case("at697f", "-Av8") .Case("leon3", "-Av8") .Case("ut699", "-Av8") .Case("gr712rc", "-Av8") .Case("leon4", "-Av8") .Case("gr740", "-Av8") .Default("-Av8"); } } static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_C, options::OPT_CC)) { if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) && !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) { D.Diag(diag::err_drv_argument_only_allowed_with) << A->getBaseArg().getAsString(Args) << (D.IsCLMode() ? "/E, /P or /EP" : "-E"); } } } static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) { // In gcc, only ARM checks this, but it seems reasonable to check universally. if (Args.hasArg(options::OPT_static)) if (const Arg *A = Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic)) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-static"; } // Add backslashes to escape spaces and other backslashes. // This is used for the space-separated argument list specified with // the -dwarf-debug-flags option. static void EscapeSpacesAndBackslashes(const char *Arg, SmallVectorImpl &Res) { for (; *Arg; ++Arg) { switch (*Arg) { default: break; case ' ': case '\\': Res.push_back('\\'); break; } Res.push_back(*Arg); } } // Quote target names for inclusion in GNU Make dependency files. // Only the characters '$', '#', ' ', '\t' are quoted. static void QuoteTarget(StringRef Target, SmallVectorImpl &Res) { for (unsigned i = 0, e = Target.size(); i != e; ++i) { switch (Target[i]) { case ' ': case '\t': // Escape the preceding backslashes for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j) Res.push_back('\\'); // Escape the space/tab Res.push_back('\\'); break; case '$': Res.push_back('$'); break; case '#': Res.push_back('\\'); break; default: break; } Res.push_back(Target[i]); } } static void addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar) { const char *DirList = ::getenv(EnvVar); bool CombinedArg = false; if (!DirList) return; // Nothing to do. StringRef Name(ArgName); if (Name.equals("-I") || Name.equals("-L")) CombinedArg = true; StringRef Dirs(DirList); if (Dirs.empty()) // Empty string should not add '.'. return; StringRef::size_type Delim; while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) { if (Delim == 0) { // Leading colon. if (CombinedArg) { CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + ".")); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back("."); } } else { if (CombinedArg) { CmdArgs.push_back( Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim))); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim))); } } Dirs = Dirs.substr(Delim + 1); } if (Dirs.empty()) { // Trailing colon. if (CombinedArg) { CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + ".")); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back("."); } } else { // Add the last path. if (CombinedArg) { CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs)); } else { CmdArgs.push_back(ArgName); CmdArgs.push_back(Args.MakeArgString(Dirs)); } } } static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs, const JobAction &JA) { const Driver &D = TC.getDriver(); // Add extra linker input arguments which are not treated as inputs // (constructed via -Xarch_). Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input); for (const auto &II : Inputs) { // If the current tool chain refers to an OpenMP offloading host, we should // ignore inputs that refer to OpenMP offloading devices - they will be // embedded according to a proper linker script. if (auto *IA = II.getAction()) if (JA.isHostOffloading(Action::OFK_OpenMP) && IA->isDeviceOffloading(Action::OFK_OpenMP)) continue; if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType())) // Don't try to pass LLVM inputs unless we have native support. D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString(); // Add filenames immediately. if (II.isFilename()) { CmdArgs.push_back(II.getFilename()); continue; } // Otherwise, this is a linker input argument. const Arg &A = II.getInputArg(); // Handle reserved library options. if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) TC.AddCXXStdlibLibArgs(Args, CmdArgs); else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext)) TC.AddCCKextLibArgs(Args, CmdArgs); else if (A.getOption().matches(options::OPT_z)) { // Pass -z prefix for gcc linker compatibility. A.claim(); A.render(Args, CmdArgs); } else { A.renderAsInput(Args, CmdArgs); } } // LIBRARY_PATH - included following the user specified library paths. // and only supported on native toolchains. if (!TC.isCrossCompiling()) addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); } /// Add OpenMP linker script arguments at the end of the argument list so that /// the fat binary is built by embedding each of the device images into the /// host. The linker script also defines a few symbols required by the code /// generation so that the images can be easily retrieved at runtime by the /// offloading library. This should be used only in tool chains that support /// linker scripts. static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs, const JobAction &JA) { // If this is not an OpenMP host toolchain, we don't need to do anything. if (!JA.isHostOffloading(Action::OFK_OpenMP)) return; // Create temporary linker script. Keep it if save-temps is enabled. const char *LKS; SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); if (C.getDriver().isSaveTempsEnabled()) { llvm::sys::path::replace_extension(Name, "lk"); LKS = C.getArgs().MakeArgString(Name.c_str()); } else { llvm::sys::path::replace_extension(Name, ""); Name = C.getDriver().GetTemporaryPath(Name, "lk"); LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); } // Add linker script option to the command. CmdArgs.push_back("-T"); CmdArgs.push_back(LKS); // Create a buffer to write the contents of the linker script. std::string LksBuffer; llvm::raw_string_ostream LksStream(LksBuffer); // Get the OpenMP offload tool chains so that we can extract the triple // associated with each device input. auto OpenMPToolChains = C.getOffloadToolChains(); assert(OpenMPToolChains.first != OpenMPToolChains.second && "No OpenMP toolchains??"); // Track the input file name and device triple in order to build the script, // inserting binaries in the designated sections. SmallVector, 8> InputBinaryInfo; // Add commands to embed target binaries. We ensure that each section and // image is 16-byte aligned. This is not mandatory, but increases the // likelihood of data to be aligned with a cache block in several main host // machines. LksStream << "/*\n"; LksStream << " OpenMP Offload Linker Script\n"; LksStream << " *** Automatically generated by Clang ***\n"; LksStream << "*/\n"; LksStream << "TARGET(binary)\n"; auto DTC = OpenMPToolChains.first; for (auto &II : Inputs) { const Action *A = II.getAction(); // Is this a device linking action? if (A && isa(A) && A->isDeviceOffloading(Action::OFK_OpenMP)) { assert(DTC != OpenMPToolChains.second && "More device inputs than device toolchains??"); InputBinaryInfo.push_back(std::make_pair( DTC->second->getTriple().normalize(), II.getFilename())); ++DTC; LksStream << "INPUT(" << II.getFilename() << ")\n"; } } assert(DTC == OpenMPToolChains.second && "Less device inputs than device toolchains??"); LksStream << "SECTIONS\n"; LksStream << "{\n"; LksStream << " .omp_offloading :\n"; LksStream << " ALIGN(0x10)\n"; LksStream << " {\n"; for (auto &BI : InputBinaryInfo) { LksStream << " . = ALIGN(0x10);\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first << " = .);\n"; LksStream << " " << BI.second << "\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first << " = .);\n"; } LksStream << " }\n"; // Add commands to define host entries begin and end. We use 1-byte subalign // so that the linker does not add any padding and the elements in this // section form an array. LksStream << " .omp_offloading.entries :\n"; LksStream << " ALIGN(0x10)\n"; LksStream << " SUBALIGN(0x01)\n"; LksStream << " {\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n"; LksStream << " *(.omp_offloading.entries)\n"; LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n"; LksStream << " }\n"; LksStream << "}\n"; LksStream << "INSERT BEFORE .data\n"; LksStream.flush(); // Dump the contents of the linker script if the user requested that. We // support this option to enable testing of behavior with -###. if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) llvm::errs() << LksBuffer; // If this is a dry run, do not create the linker script file. if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) return; // Open script file and write the contents. std::error_code EC; llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None); if (EC) { C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); return; } Lksf << LksBuffer; } /// \brief Determine whether Objective-C automated reference counting is /// enabled. static bool isObjCAutoRefCount(const ArgList &Args) { return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false); } /// \brief Determine whether we are linking the ObjC runtime. static bool isObjCRuntimeLinked(const ArgList &Args) { if (isObjCAutoRefCount(Args)) { Args.ClaimAllArgs(options::OPT_fobjc_link_runtime); return true; } return Args.hasArg(options::OPT_fobjc_link_runtime); } static bool forwardToGCC(const Option &O) { // Don't forward inputs from the original command line. They are added from // InputInfoList. return O.getKind() != Option::InputClass && !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput); } /// Apply \a Work on the current tool chain \a RegularToolChain and any other /// offloading tool chain that is associated with the current action \a JA. static void forAllAssociatedToolChains(Compilation &C, const JobAction &JA, const ToolChain &RegularToolChain, llvm::function_ref Work) { // Apply Work on the current/regular tool chain. Work(RegularToolChain); // Apply Work on all the offloading tool chains associated with the current // action. if (JA.isHostOffloading(Action::OFK_Cuda)) Work(*C.getSingleOffloadToolChain()); else if (JA.isDeviceOffloading(Action::OFK_Cuda)) Work(*C.getSingleOffloadToolChain()); // // TODO: Add support for other offloading programming models here. // } void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, const Driver &D, const ArgList &Args, ArgStringList &CmdArgs, const InputInfo &Output, const InputInfoList &Inputs) const { Arg *A; const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); CheckPreprocessingOptions(D, Args); Args.AddLastArg(CmdArgs, options::OPT_C); Args.AddLastArg(CmdArgs, options::OPT_CC); // Handle dependency file generation. if ((A = Args.getLastArg(options::OPT_M, options::OPT_MM)) || (A = Args.getLastArg(options::OPT_MD)) || (A = Args.getLastArg(options::OPT_MMD))) { // Determine the output location. const char *DepFile; if (Arg *MF = Args.getLastArg(options::OPT_MF)) { DepFile = MF->getValue(); C.addFailureResultFile(DepFile, &JA); } else if (Output.getType() == types::TY_Dependencies) { DepFile = Output.getFilename(); } else if (A->getOption().matches(options::OPT_M) || A->getOption().matches(options::OPT_MM)) { DepFile = "-"; } else { DepFile = getDependencyFileName(Args, Inputs); C.addFailureResultFile(DepFile, &JA); } CmdArgs.push_back("-dependency-file"); CmdArgs.push_back(DepFile); // Add a default target if one wasn't specified. if (!Args.hasArg(options::OPT_MT) && !Args.hasArg(options::OPT_MQ)) { const char *DepTarget; // If user provided -o, that is the dependency target, except // when we are only generating a dependency file. Arg *OutputOpt = Args.getLastArg(options::OPT_o); if (OutputOpt && Output.getType() != types::TY_Dependencies) { DepTarget = OutputOpt->getValue(); } else { // Otherwise derive from the base input. // // FIXME: This should use the computed output file location. SmallString<128> P(Inputs[0].getBaseInput()); llvm::sys::path::replace_extension(P, "o"); DepTarget = Args.MakeArgString(llvm::sys::path::filename(P)); } CmdArgs.push_back("-MT"); SmallString<128> Quoted; QuoteTarget(DepTarget, Quoted); CmdArgs.push_back(Args.MakeArgString(Quoted)); } if (A->getOption().matches(options::OPT_M) || A->getOption().matches(options::OPT_MD)) CmdArgs.push_back("-sys-header-deps"); if ((isa(JA) && !Args.hasArg(options::OPT_fno_module_file_deps)) || Args.hasArg(options::OPT_fmodule_file_deps)) CmdArgs.push_back("-module-file-deps"); } if (Args.hasArg(options::OPT_MG)) { if (!A || A->getOption().matches(options::OPT_MD) || A->getOption().matches(options::OPT_MMD)) D.Diag(diag::err_drv_mg_requires_m_or_mm); CmdArgs.push_back("-MG"); } Args.AddLastArg(CmdArgs, options::OPT_MP); Args.AddLastArg(CmdArgs, options::OPT_MV); // Convert all -MQ args to -MT for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) { A->claim(); if (A->getOption().matches(options::OPT_MQ)) { CmdArgs.push_back("-MT"); SmallString<128> Quoted; QuoteTarget(A->getValue(), Quoted); CmdArgs.push_back(Args.MakeArgString(Quoted)); // -MT flag - no change } else { A->render(Args, CmdArgs); } } // Add offload include arguments specific for CUDA. This must happen before // we -I or -include anything else, because we must pick up the CUDA headers // from the particular CUDA installation, rather than from e.g. // /usr/local/include. if (JA.isOffloading(Action::OFK_Cuda)) getToolChain().AddCudaIncludeArgs(Args, CmdArgs); // Add -i* options, and automatically translate to // -include-pch/-include-pth for transparent PCH support. It's // wonky, but we include looking for .gch so we can support seamless // replacement into a build system already set up to be generating // .gch files. int YcIndex = -1, YuIndex = -1; { int AI = -1; const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc); const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu); for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) { // Walk the whole i_Group and skip non "-include" flags so that the index // here matches the index in the next loop below. ++AI; if (!A->getOption().matches(options::OPT_include)) continue; if (YcArg && strcmp(A->getValue(), YcArg->getValue()) == 0) YcIndex = AI; if (YuArg && strcmp(A->getValue(), YuArg->getValue()) == 0) YuIndex = AI; } } if (isa(JA) && YcIndex != -1) { Driver::InputList Inputs; D.BuildInputs(getToolChain(), C.getArgs(), Inputs); assert(Inputs.size() == 1 && "Need one input when building pch"); CmdArgs.push_back(Args.MakeArgString(Twine("-find-pch-source=") + Inputs[0].second->getValue())); } bool RenderedImplicitInclude = false; int AI = -1; for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) { ++AI; if (getToolChain().getDriver().IsCLMode() && A->getOption().matches(options::OPT_include)) { // In clang-cl mode, /Ycfoo.h means that all code up to a foo.h // include is compiled into foo.h, and everything after goes into // the .obj file. /Yufoo.h means that all includes prior to and including // foo.h are completely skipped and replaced with a use of the pch file // for foo.h. (Each flag can have at most one value, multiple /Yc flags // just mean that the last one wins.) If /Yc and /Yu are both present // and refer to the same file, /Yc wins. // Note that OPT__SLASH_FI gets mapped to OPT_include. // FIXME: The code here assumes that /Yc and /Yu refer to the same file. // cl.exe seems to support both flags with different values, but that // seems strange (which flag does /Fp now refer to?), so don't implement // that until someone needs it. int PchIndex = YcIndex != -1 ? YcIndex : YuIndex; if (PchIndex != -1) { if (isa(JA)) { // When building the pch, skip all includes after the pch. assert(YcIndex != -1 && PchIndex == YcIndex); if (AI >= YcIndex) continue; } else { // When using the pch, skip all includes prior to the pch. if (AI < PchIndex) { A->claim(); continue; } if (AI == PchIndex) { A->claim(); CmdArgs.push_back("-include-pch"); CmdArgs.push_back( Args.MakeArgString(D.GetClPchPath(C, A->getValue()))); continue; } } } } else if (A->getOption().matches(options::OPT_include)) { // Handling of gcc-style gch precompiled headers. bool IsFirstImplicitInclude = !RenderedImplicitInclude; RenderedImplicitInclude = true; // Use PCH if the user requested it. bool UsePCH = D.CCCUsePCH; bool FoundPTH = false; bool FoundPCH = false; SmallString<128> P(A->getValue()); // We want the files to have a name like foo.h.pch. Add a dummy extension // so that replace_extension does the right thing. P += ".dummy"; if (UsePCH) { llvm::sys::path::replace_extension(P, "pch"); if (llvm::sys::fs::exists(P)) FoundPCH = true; } if (!FoundPCH) { llvm::sys::path::replace_extension(P, "pth"); if (llvm::sys::fs::exists(P)) FoundPTH = true; } if (!FoundPCH && !FoundPTH) { llvm::sys::path::replace_extension(P, "gch"); if (llvm::sys::fs::exists(P)) { FoundPCH = UsePCH; FoundPTH = !UsePCH; } } if (FoundPCH || FoundPTH) { if (IsFirstImplicitInclude) { A->claim(); if (UsePCH) CmdArgs.push_back("-include-pch"); else CmdArgs.push_back("-include-pth"); CmdArgs.push_back(Args.MakeArgString(P)); continue; } else { // Ignore the PCH if not first on command line and emit warning. D.Diag(diag::warn_drv_pch_not_first_include) << P << A->getAsString(Args); } } } else if (A->getOption().matches(options::OPT_isystem_after)) { // Handling of paths which must come late. These entries are handled by // the toolchain itself after the resource dir is inserted in the right // search order. // Do not claim the argument so that the use of the argument does not // silently go unnoticed on toolchains which do not honour the option. continue; } // Not translated, render as usual. A->claim(); A->render(Args, CmdArgs); } Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, options::OPT_F, options::OPT_index_header_map}); // Add -Wp, and -Xpreprocessor if using the preprocessor. // FIXME: There is a very unfortunate problem here, some troubled // souls abuse -Wp, to pass preprocessor options in gcc syntax. To // really support that we would have to parse and then translate // those options. :( Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA, options::OPT_Xpreprocessor); // -I- is a deprecated GCC feature, reject it. if (Arg *A = Args.getLastArg(options::OPT_I_)) D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args); // If we have a --sysroot, and don't have an explicit -isysroot flag, add an // -isysroot to the CC1 invocation. StringRef sysroot = C.getSysRoot(); if (sysroot != "") { if (!Args.hasArg(options::OPT_isysroot)) { CmdArgs.push_back("-isysroot"); CmdArgs.push_back(C.getArgs().MakeArgString(sysroot)); } } // Parse additional include paths from environment variables. // FIXME: We should probably sink the logic for handling these from the // frontend into the driver. It will allow deleting 4 otherwise unused flags. // CPATH - included following the user specified includes (but prior to // builtin and standard includes). addDirectoryList(Args, CmdArgs, "-I", "CPATH"); // C_INCLUDE_PATH - system includes enabled when compiling C. addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH"); // CPLUS_INCLUDE_PATH - system includes enabled when compiling C++. addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH"); // OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC. addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH"); // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++. addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH"); // While adding the include arguments, we also attempt to retrieve the // arguments of related offloading toolchains or arguments that are specific // of an offloading programming model. // Add C++ include arguments, if needed. if (types::isCXX(Inputs[0].getType())) forAllAssociatedToolChains(C, JA, getToolChain(), [&Args, &CmdArgs](const ToolChain &TC) { TC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs); }); // Add system include arguments for all targets but IAMCU. if (!IsIAMCU) forAllAssociatedToolChains(C, JA, getToolChain(), [&Args, &CmdArgs](const ToolChain &TC) { TC.AddClangSystemIncludeArgs(Args, CmdArgs); }); else { // For IAMCU add special include arguments. getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs); } } // FIXME: Move to target hook. static bool isSignedCharDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { default: return true; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: if (Triple.isOSDarwin() || Triple.isOSWindows()) return true; return false; case llvm::Triple::ppc: case llvm::Triple::ppc64: if (Triple.isOSDarwin()) return true; return false; case llvm::Triple::hexagon: case llvm::Triple::ppc64le: case llvm::Triple::systemz: case llvm::Triple::xcore: return false; } } static bool isNoCommonDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { default: return false; case llvm::Triple::xcore: case llvm::Triple::wasm32: case llvm::Triple::wasm64: return true; } } // ARM tools start. // Get SubArch (vN). static int getARMSubArchVersionNumber(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); return llvm::ARM::parseArchVersion(Arch); } // True if M-profile. static bool isARMMProfile(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); unsigned Profile = llvm::ARM::parseArchProfile(Arch); return Profile == llvm::ARM::PK_M; } // Get Arch/CPU from args. static void getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch, llvm::StringRef &CPU, bool FromAs = false) { if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) CPU = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) Arch = A->getValue(); if (!FromAs) return; for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { StringRef Value = A->getValue(); if (Value.startswith("-mcpu=")) CPU = Value.substr(6); if (Value.startswith("-march=")) Arch = Value.substr(7); } } // Handle -mhwdiv=. // FIXME: Use ARMTargetParser. static void getARMHWDivFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef HWDiv, std::vector &Features) { unsigned HWDivID = llvm::ARM::parseHWDiv(HWDiv); if (!llvm::ARM::getHWDivFeatures(HWDivID, Features)) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Handle -mfpu=. static void getARMFPUFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef FPU, std::vector &Features) { unsigned FPUID = llvm::ARM::parseFPU(FPU); if (!llvm::ARM::getFPUFeatures(FPUID, Features)) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Decode ARM features from string like +[no]featureA+[no]featureB+... static bool DecodeARMFeatures(const Driver &D, StringRef text, std::vector &Features) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); for (StringRef Feature : Split) { StringRef FeatureName = llvm::ARM::getArchExtFeature(Feature); if (!FeatureName.empty()) Features.push_back(FeatureName); else return false; } return true; } // Check if -march is valid by checking if it can be canonicalised and parsed. // getARMArch is used here instead of just checking the -march value in order // to handle -march=native correctly. static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args, llvm::StringRef ArchName, std::vector &Features, const llvm::Triple &Triple) { std::pair Split = ArchName.split("+"); std::string MArch = arm::getARMArch(ArchName, Triple); if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID || (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features))) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Check -mcpu=. Needs ArchName to handle -mcpu=generic. static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args, llvm::StringRef CPUName, llvm::StringRef ArchName, std::vector &Features, const llvm::Triple &Triple) { std::pair Split = CPUName.split("+"); std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() || (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features))) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } static bool useAAPCSForMachO(const llvm::Triple &T) { // The backend is hardwired to assume AAPCS for M-class processors, ensure // the frontend matches that. return T.getEnvironment() == llvm::Triple::EABI || T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T); } // Select the float ABI as determined by -msoft-float, -mhard-float, and // -mfloat-abi=. arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) { const Driver &D = TC.getDriver(); const llvm::Triple &Triple = TC.getEffectiveTriple(); auto SubArch = getARMSubArchVersionNumber(Triple); arm::FloatABI ABI = FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) { ABI = FloatABI::Soft; } else if (A->getOption().matches(options::OPT_mhard_float)) { ABI = FloatABI::Hard; } else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", FloatABI::Soft) .Case("softfp", FloatABI::SoftFP) .Case("hard", FloatABI::Hard) .Default(FloatABI::Invalid); if (ABI == FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = FloatABI::Soft; } } // It is incorrect to select hard float ABI on MachO platforms if the ABI is // "apcs-gnu". if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple) && ABI == FloatABI::Hard) { D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << Triple.getArchName(); } } // If unspecified, choose the default based on the platform. if (ABI == FloatABI::Invalid) { switch (Triple.getOS()) { case llvm::Triple::Darwin: case llvm::Triple::MacOSX: case llvm::Triple::IOS: case llvm::Triple::TvOS: { // Darwin defaults to "softfp" for v6 and v7. ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; ABI = Triple.isWatchABI() ? FloatABI::Hard : ABI; break; } case llvm::Triple::WatchOS: ABI = FloatABI::Hard; break; // FIXME: this is invalid for WindowsCE case llvm::Triple::Win32: ABI = FloatABI::Hard; break; case llvm::Triple::FreeBSD: switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: ABI = FloatABI::Hard; break; default: // FreeBSD defaults to soft float ABI = FloatABI::Soft; break; } break; default: switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABIHF: case llvm::Triple::EABIHF: ABI = FloatABI::Hard; break; case llvm::Triple::GNUEABI: case llvm::Triple::MuslEABI: case llvm::Triple::EABI: // EABI is always AAPCS, and if it was not marked 'hard', it's softfp ABI = FloatABI::SoftFP; break; case llvm::Triple::Android: ABI = (SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; break; default: // Assume "soft", but warn the user we are guessing. if (Triple.isOSBinFormatMachO() && Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em) ABI = FloatABI::Hard; else ABI = FloatABI::Soft; if (Triple.getOS() != llvm::Triple::UnknownOS || !Triple.isOSBinFormatMachO()) D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft"; break; } } } assert(ABI != FloatABI::Invalid && "must select an ABI"); return ABI; } static void getARMTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, std::vector &Features, bool ForAS) { const Driver &D = TC.getDriver(); bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); arm::FloatABI ABI = arm::getARMFloatABI(TC, Args); const Arg *WaCPU = nullptr, *WaFPU = nullptr; const Arg *WaHDiv = nullptr, *WaArch = nullptr; if (!ForAS) { // FIXME: Note, this is a hack, the LLVM backend doesn't actually use these // yet (it uses the -mfloat-abi and -msoft-float options), and it is // stripped out by the ARM target. We should probably pass this a new // -target-option, which is handled by the -cc1/-cc1as invocation. // // FIXME2: For consistency, it would be ideal if we set up the target // machine state the same when using the frontend or the assembler. We don't // currently do that for the assembler, we pass the options directly to the // backend and never even instantiate the frontend TargetInfo. If we did, // and used its handleTargetFeatures hook, then we could ensure the // assembler and the frontend behave the same. // Use software floating point operations? if (ABI == arm::FloatABI::Soft) Features.push_back("+soft-float"); // Use software floating point argument passing? if (ABI != arm::FloatABI::Hard) Features.push_back("+soft-float-abi"); } else { // Here, we make sure that -Wa,-mfpu/cpu/arch/hwdiv will be passed down // to the assembler correctly. for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { StringRef Value = A->getValue(); if (Value.startswith("-mfpu=")) { WaFPU = A; } else if (Value.startswith("-mcpu=")) { WaCPU = A; } else if (Value.startswith("-mhwdiv=")) { WaHDiv = A; } else if (Value.startswith("-march=")) { WaArch = A; } } } // Check -march. ClangAs gives preference to -Wa,-march=. const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ); StringRef ArchName; if (WaArch) { if (ArchArg) D.Diag(clang::diag::warn_drv_unused_argument) << ArchArg->getAsString(Args); ArchName = StringRef(WaArch->getValue()).substr(7); checkARMArchName(D, WaArch, Args, ArchName, Features, Triple); // FIXME: Set Arch. D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args); } else if (ArchArg) { ArchName = ArchArg->getValue(); checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple); } // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=. const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); StringRef CPUName; if (WaCPU) { if (CPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << CPUArg->getAsString(Args); CPUName = StringRef(WaCPU->getValue()).substr(6); checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Features, Triple); } else if (CPUArg) { CPUName = CPUArg->getValue(); checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Features, Triple); } // Add CPU features for generic CPUs if (CPUName == "native") { llvm::StringMap HostFeatures; if (llvm::sys::getHostCPUFeatures(HostFeatures)) for (auto &F : HostFeatures) Features.push_back( Args.MakeArgString((F.second ? "+" : "-") + F.first())); } // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=. const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ); if (WaFPU) { if (FPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << FPUArg->getAsString(Args); getARMFPUFeatures(D, WaFPU, Args, StringRef(WaFPU->getValue()).substr(6), Features); } else if (FPUArg) { getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); } // Honor -mhwdiv=. ClangAs gives preference to -Wa,-mhwdiv=. const Arg *HDivArg = Args.getLastArg(options::OPT_mhwdiv_EQ); if (WaHDiv) { if (HDivArg) D.Diag(clang::diag::warn_drv_unused_argument) << HDivArg->getAsString(Args); getARMHWDivFeatures(D, WaHDiv, Args, StringRef(WaHDiv->getValue()).substr(8), Features); } else if (HDivArg) getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features); // Setting -msoft-float effectively disables NEON because of the GCC // implementation, although the same isn't true of VFP or VFP3. if (ABI == arm::FloatABI::Soft) { Features.push_back("-neon"); // Also need to explicitly disable features which imply NEON. Features.push_back("-crypto"); } // En/disable crc code generation. if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { if (A->getOption().matches(options::OPT_mcrc)) Features.push_back("+crc"); else Features.push_back("-crc"); } // Look for the last occurrence of -mlong-calls or -mno-long-calls. If // neither options are specified, see if we are compiling for kernel/kext and // decide whether to pass "+long-calls" based on the OS and its version. if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, options::OPT_mno_long_calls)) { if (A->getOption().matches(options::OPT_mlong_calls)) Features.push_back("+long-calls"); } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)) && !Triple.isWatchOS()) { Features.push_back("+long-calls"); } // Generate execute-only output (no data access to code sections). // Supported only on ARMv6T2 and ARMv7 and above. // Cannot be combined with -mno-movt or -mlong-calls if (Arg *A = Args.getLastArg(options::OPT_mexecute_only, options::OPT_mno_execute_only)) { if (A->getOption().matches(options::OPT_mexecute_only)) { if (getARMSubArchVersionNumber(Triple) < 7 && llvm::ARM::parseArch(Triple.getArchName()) != llvm::ARM::AK_ARMV6T2) D.Diag(diag::err_target_unsupported_execute_only) << Triple.getArchName(); else if (Arg *B = Args.getLastArg(options::OPT_mno_movt)) D.Diag(diag::err_opt_not_valid_with_opt) << A->getAsString(Args) << B->getAsString(Args); // Long calls create constant pool entries and have not yet been fixed up // to play nicely with execute-only. Hence, they cannot be used in // execute-only code for now else if (Arg *B = Args.getLastArg(options::OPT_mlong_calls, options::OPT_mno_long_calls)) { if (B->getOption().matches(options::OPT_mlong_calls)) D.Diag(diag::err_opt_not_valid_with_opt) << A->getAsString(Args) << B->getAsString(Args); } CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-execute-only"); } } // Kernel code has more strict alignment requirements. if (KernelOrKext) Features.push_back("+strict-align"); else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, options::OPT_munaligned_access)) { if (A->getOption().matches(options::OPT_munaligned_access)) { // No v6M core supports unaligned memory access (v6M ARM ARM A3.2). if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) D.Diag(diag::err_target_unsupported_unaligned) << "v6m"; // v8M Baseline follows on from v6M, so doesn't support unaligned memory // access either. else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline) D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base"; } else Features.push_back("+strict-align"); } else { // Assume pre-ARMv6 doesn't support unaligned accesses. // // ARMv6 may or may not support unaligned accesses depending on the // SCTLR.U bit, which is architecture-specific. We assume ARMv6 // Darwin and NetBSD targets support unaligned accesses, and others don't. // // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit // which raises an alignment fault on unaligned accesses. Linux // defaults this bit to 0 and handles it as a system-wide (not // per-process) setting. It is therefore safe to assume that ARMv7+ // Linux targets support unaligned accesses. The same goes for NaCl. // // The above behavior is consistent with GCC. int VersionNum = getARMSubArchVersionNumber(Triple); if (Triple.isOSDarwin() || Triple.isOSNetBSD()) { if (VersionNum < 6 || Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) Features.push_back("+strict-align"); } else if (Triple.isOSLinux() || Triple.isOSNaCl()) { if (VersionNum < 7) Features.push_back("+strict-align"); } else Features.push_back("+strict-align"); } // llvm does not support reserving registers in general. There is support // for reserving r9 on ARM though (defined as a platform-specific register // in ARM EABI). if (Args.hasArg(options::OPT_ffixed_r9)) Features.push_back("+reserve-r9"); // The kext linker doesn't know how to deal with movw/movt. if (KernelOrKext || Args.hasArg(options::OPT_mno_movt)) Features.push_back("+no-movt"); } void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, bool KernelOrKext) const { // Select the ABI to use. // FIXME: Support -meabi. // FIXME: Parts of this are duplicated in the backend, unify this somehow. const char *ABIName = nullptr; if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ABIName = A->getValue(); } else if (Triple.isOSBinFormatMachO()) { if (useAAPCSForMachO(Triple)) { ABIName = "aapcs"; } else if (Triple.isWatchABI()) { ABIName = "aapcs16"; } else { ABIName = "apcs-gnu"; } } else if (Triple.isOSWindows()) { // FIXME: this is invalid for WindowsCE ABIName = "aapcs"; } else { // Select the default based on the platform. switch (Triple.getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: ABIName = "aapcs-linux"; break; case llvm::Triple::EABIHF: case llvm::Triple::EABI: ABIName = "aapcs"; break; default: if (Triple.getOS() == llvm::Triple::NetBSD) ABIName = "apcs-gnu"; else ABIName = "aapcs"; break; } } CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); // Determine floating point ABI from the options & target defaults. arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args); if (ABI == arm::FloatABI::Soft) { // Floating point operations and argument passing are soft. // FIXME: This changes CPP defines, we need -target-soft-float. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else if (ABI == arm::FloatABI::SoftFP) { // Floating point operations are hard, but argument passing is soft. CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(ABI == arm::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } // Forward the -mglobal-merge option for explicit control over the pass. if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge, options::OPT_mno_global_merge)) { CmdArgs.push_back("-backend-option"); if (A->getOption().matches(options::OPT_mno_global_merge)) CmdArgs.push_back("-arm-global-merge=false"); else CmdArgs.push_back("-arm-global-merge=true"); } if (!Args.hasFlag(options::OPT_mimplicit_float, options::OPT_mno_implicit_float, true)) CmdArgs.push_back("-no-implicit-float"); } // ARM tools end. /// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are /// targeting. Set \p A to the Arg corresponding to the -mcpu or -mtune /// arguments if they are provided, or to nullptr otherwise. static std::string getAArch64TargetCPU(const ArgList &Args, Arg *&A) { std::string CPU; // If we have -mtune or -mcpu, use that. if ((A = Args.getLastArg(options::OPT_mtune_EQ))) { CPU = StringRef(A->getValue()).lower(); } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) { StringRef Mcpu = A->getValue(); CPU = Mcpu.split("+").first.lower(); } // Handle CPU name is 'native'. if (CPU == "native") return llvm::sys::getHostCPUName(); else if (CPU.size()) return CPU; // Make sure we pick "cyclone" if -arch is used. // FIXME: Should this be picked by checking the target triple instead? if (Args.getLastArg(options::OPT_arch)) return "cyclone"; return "generic"; } void Clang::AddAArch64TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) CmdArgs.push_back("-disable-red-zone"); if (!Args.hasFlag(options::OPT_mimplicit_float, options::OPT_mno_implicit_float, true)) CmdArgs.push_back("-no-implicit-float"); const char *ABIName = nullptr; if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) ABIName = A->getValue(); else if (Triple.isOSDarwin()) ABIName = "darwinpcs"; else ABIName = "aapcs"; CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769, options::OPT_mno_fix_cortex_a53_835769)) { CmdArgs.push_back("-backend-option"); if (A->getOption().matches(options::OPT_mfix_cortex_a53_835769)) CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1"); else CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0"); } else if (Triple.isAndroid()) { // Enabled A53 errata (835769) workaround by default on android CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1"); } // Forward the -mglobal-merge option for explicit control over the pass. if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge, options::OPT_mno_global_merge)) { CmdArgs.push_back("-backend-option"); if (A->getOption().matches(options::OPT_mno_global_merge)) CmdArgs.push_back("-aarch64-global-merge=false"); else CmdArgs.push_back("-aarch64-global-merge=true"); } } // Get CPU and ABI names. They are not independent // so we have to calculate them together. void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, StringRef &CPUName, StringRef &ABIName) { const char *DefMips32CPU = "mips32r2"; const char *DefMips64CPU = "mips64r2"; // MIPS32r6 is the default for mips(el)?-img-linux-gnu and MIPS64r6 is the // default for mips64(el)?-img-linux-gnu. if (Triple.getVendor() == llvm::Triple::ImaginationTechnologies && Triple.getEnvironment() == llvm::Triple::GNU) { DefMips32CPU = "mips32r6"; DefMips64CPU = "mips64r6"; } // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android). if (Triple.isAndroid()) { DefMips32CPU = "mips32"; DefMips64CPU = "mips64r6"; } // MIPS3 is the default for mips64*-unknown-openbsd. if (Triple.getOS() == llvm::Triple::OpenBSD) DefMips64CPU = "mips3"; if (Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ)) CPUName = A->getValue(); if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { ABIName = A->getValue(); // Convert a GNU style Mips ABI name to the name // accepted by LLVM Mips backend. ABIName = llvm::StringSwitch(ABIName) .Case("32", "o32") .Case("64", "n64") .Default(ABIName); } // Setup default CPU and ABI names. if (CPUName.empty() && ABIName.empty()) { switch (Triple.getArch()) { default: llvm_unreachable("Unexpected triple arch name"); case llvm::Triple::mips: case llvm::Triple::mipsel: CPUName = DefMips32CPU; break; case llvm::Triple::mips64: case llvm::Triple::mips64el: CPUName = DefMips64CPU; break; } } if (ABIName.empty() && (Triple.getVendor() == llvm::Triple::MipsTechnologies || Triple.getVendor() == llvm::Triple::ImaginationTechnologies)) { ABIName = llvm::StringSwitch(CPUName) .Case("mips1", "o32") .Case("mips2", "o32") .Case("mips3", "n64") .Case("mips4", "n64") .Case("mips5", "n64") .Case("mips32", "o32") .Case("mips32r2", "o32") .Case("mips32r3", "o32") .Case("mips32r5", "o32") .Case("mips32r6", "o32") .Case("mips64", "n64") .Case("mips64r2", "n64") .Case("mips64r3", "n64") .Case("mips64r5", "n64") .Case("mips64r6", "n64") .Case("octeon", "n64") .Case("p5600", "o32") .Default(""); } if (ABIName.empty()) { // Deduce ABI name from the target triple. if (Triple.getArch() == llvm::Triple::mips || Triple.getArch() == llvm::Triple::mipsel) ABIName = "o32"; else ABIName = "n64"; } if (CPUName.empty()) { // Deduce CPU name from ABI name. CPUName = llvm::StringSwitch(ABIName) .Case("o32", DefMips32CPU) .Cases("n32", "n64", DefMips64CPU) .Default(""); } // FIXME: Warn on inconsistent use of -march and -mabi. } std::string mips::getMipsABILibSuffix(const ArgList &Args, const llvm::Triple &Triple) { StringRef CPUName, ABIName; tools::mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); return llvm::StringSwitch(ABIName) .Case("o32", "") .Case("n32", "32") .Case("n64", "64"); } // Convert ABI name to the GNU tools acceptable variant. static StringRef getGnuCompatibleMipsABIName(StringRef ABI) { return llvm::StringSwitch(ABI) .Case("o32", "32") .Case("n64", "64") .Default(ABI); } // Select the MIPS float ABI as determined by -msoft-float, -mhard-float, // and -mfloat-abi=. static mips::FloatABI getMipsFloatABI(const Driver &D, const ArgList &Args) { mips::FloatABI ABI = mips::FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) ABI = mips::FloatABI::Soft; else if (A->getOption().matches(options::OPT_mhard_float)) ABI = mips::FloatABI::Hard; else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", mips::FloatABI::Soft) .Case("hard", mips::FloatABI::Hard) .Default(mips::FloatABI::Invalid); if (ABI == mips::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = mips::FloatABI::Hard; } } } // If unspecified, choose the default based on the platform. if (ABI == mips::FloatABI::Invalid) { // Assume "hard", because it's a default value used by gcc. // When we start to recognize specific target MIPS processors, // we will be able to select the default more correctly. ABI = mips::FloatABI::Hard; } assert(ABI != mips::FloatABI::Invalid && "must select an ABI"); return ABI; } static void AddTargetFeature(const ArgList &Args, std::vector &Features, OptSpecifier OnOpt, OptSpecifier OffOpt, StringRef FeatureName) { if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) { if (A->getOption().matches(OnOpt)) Features.push_back(Args.MakeArgString("+" + FeatureName)); else Features.push_back(Args.MakeArgString("-" + FeatureName)); } } static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); ABIName = getGnuCompatibleMipsABIName(ABIName); AddTargetFeature(Args, Features, options::OPT_mno_abicalls, options::OPT_mabicalls, "noabicalls"); mips::FloatABI FloatABI = getMipsFloatABI(D, Args); if (FloatABI == mips::FloatABI::Soft) { // FIXME: Note, this is a hack. We need to pass the selected float // mode to the MipsTargetInfoBase to define appropriate macros there. // Now it is the only method. Features.push_back("+soft-float"); } if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { StringRef Val = StringRef(A->getValue()); if (Val == "2008") { if (mips::getSupportedNanEncoding(CPUName) & mips::Nan2008) Features.push_back("+nan2008"); else { Features.push_back("-nan2008"); D.Diag(diag::warn_target_unsupported_nan2008) << CPUName; } } else if (Val == "legacy") { if (mips::getSupportedNanEncoding(CPUName) & mips::NanLegacy) Features.push_back("-nan2008"); else { Features.push_back("+nan2008"); D.Diag(diag::warn_target_unsupported_nanlegacy) << CPUName; } } else D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } AddTargetFeature(Args, Features, options::OPT_msingle_float, options::OPT_mdouble_float, "single-float"); AddTargetFeature(Args, Features, options::OPT_mips16, options::OPT_mno_mips16, "mips16"); AddTargetFeature(Args, Features, options::OPT_mmicromips, options::OPT_mno_micromips, "micromips"); AddTargetFeature(Args, Features, options::OPT_mdsp, options::OPT_mno_dsp, "dsp"); AddTargetFeature(Args, Features, options::OPT_mdspr2, options::OPT_mno_dspr2, "dspr2"); AddTargetFeature(Args, Features, options::OPT_mmsa, options::OPT_mno_msa, "msa"); // Add the last -mfp32/-mfpxx/-mfp64, if none are given and the ABI is O32 // pass -mfpxx, or if none are given and fp64a is default, pass fp64 and // nooddspreg. if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, options::OPT_mfp64)) { if (A->getOption().matches(options::OPT_mfp32)) Features.push_back(Args.MakeArgString("-fp64")); else if (A->getOption().matches(options::OPT_mfpxx)) { Features.push_back(Args.MakeArgString("+fpxx")); Features.push_back(Args.MakeArgString("+nooddspreg")); } else Features.push_back(Args.MakeArgString("+fp64")); } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) { Features.push_back(Args.MakeArgString("+fpxx")); Features.push_back(Args.MakeArgString("+nooddspreg")); } else if (mips::isFP64ADefault(Triple, CPUName)) { Features.push_back(Args.MakeArgString("+fp64")); Features.push_back(Args.MakeArgString("+nooddspreg")); } AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg, options::OPT_modd_spreg, "nooddspreg"); } void Clang::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); StringRef CPUName; StringRef ABIName; const llvm::Triple &Triple = getToolChain().getTriple(); mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName.data()); mips::FloatABI ABI = getMipsFloatABI(D, Args); if (ABI == mips::FloatABI::Soft) { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(ABI == mips::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } if (Arg *A = Args.getLastArg(options::OPT_mxgot, options::OPT_mno_xgot)) { if (A->getOption().matches(options::OPT_mxgot)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mxgot"); } } if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1, options::OPT_mno_ldc1_sdc1)) { if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mno-ldc1-sdc1"); } } if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, options::OPT_mno_check_zero_division)) { if (A->getOption().matches(options::OPT_mno_check_zero_division)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-mno-check-zero-division"); } } if (Arg *A = Args.getLastArg(options::OPT_G)) { StringRef v = A->getValue(); CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v)); A->claim(); } if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) { StringRef Val = StringRef(A->getValue()); if (mips::hasCompactBranches(CPUName)) { if (Val == "never" || Val == "always" || Val == "optimal") { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val)); } else D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } else D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName; } } /// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting. static std::string getPPCTargetCPU(const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUName = A->getValue(); if (CPUName == "native") { std::string CPU = llvm::sys::getHostCPUName(); if (!CPU.empty() && CPU != "generic") return CPU; else return ""; } return llvm::StringSwitch(CPUName) .Case("common", "generic") .Case("440", "440") .Case("440fp", "440") .Case("450", "450") .Case("601", "601") .Case("602", "602") .Case("603", "603") .Case("603e", "603e") .Case("603ev", "603ev") .Case("604", "604") .Case("604e", "604e") .Case("620", "620") .Case("630", "pwr3") .Case("G3", "g3") .Case("7400", "7400") .Case("G4", "g4") .Case("7450", "7450") .Case("G4+", "g4+") .Case("750", "750") .Case("970", "970") .Case("G5", "g5") .Case("a2", "a2") .Case("a2q", "a2q") .Case("e500mc", "e500mc") .Case("e5500", "e5500") .Case("power3", "pwr3") .Case("power4", "pwr4") .Case("power5", "pwr5") .Case("power5x", "pwr5x") .Case("power6", "pwr6") .Case("power6x", "pwr6x") .Case("power7", "pwr7") .Case("power8", "pwr8") .Case("power9", "pwr9") .Case("pwr3", "pwr3") .Case("pwr4", "pwr4") .Case("pwr5", "pwr5") .Case("pwr5x", "pwr5x") .Case("pwr6", "pwr6") .Case("pwr6x", "pwr6x") .Case("pwr7", "pwr7") .Case("pwr8", "pwr8") .Case("pwr9", "pwr9") .Case("powerpc", "ppc") .Case("powerpc64", "ppc64") .Case("powerpc64le", "ppc64le") .Default(""); } return ""; } static void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group); ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args); if (FloatABI == ppc::FloatABI::Soft) Features.push_back("-hard-float"); // Altivec is a bit weird, allow overriding of the Altivec feature here. AddTargetFeature(Args, Features, options::OPT_faltivec, options::OPT_fno_altivec, "altivec"); } ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) { ppc::FloatABI ABI = ppc::FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) ABI = ppc::FloatABI::Soft; else if (A->getOption().matches(options::OPT_mhard_float)) ABI = ppc::FloatABI::Hard; else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", ppc::FloatABI::Soft) .Case("hard", ppc::FloatABI::Hard) .Default(ppc::FloatABI::Invalid); if (ABI == ppc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = ppc::FloatABI::Hard; } } } // If unspecified, choose the default based on the platform. if (ABI == ppc::FloatABI::Invalid) { ABI = ppc::FloatABI::Hard; } return ABI; } void Clang::AddPPCTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { // Select the ABI to use. const char *ABIName = nullptr; if (getToolChain().getTriple().isOSLinux()) switch (getToolChain().getArch()) { case llvm::Triple::ppc64: { // When targeting a processor that supports QPX, or if QPX is // specifically enabled, default to using the ABI that supports QPX (so // long as it is not specifically disabled). bool HasQPX = false; if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) HasQPX = A->getValue() == StringRef("a2q"); HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX); if (HasQPX) { ABIName = "elfv1-qpx"; break; } ABIName = "elfv1"; break; } case llvm::Triple::ppc64le: ABIName = "elfv2"; break; default: break; } if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore // the option if given as we don't have backend support for any targets // that don't use the altivec abi. if (StringRef(A->getValue()) != "altivec") ABIName = A->getValue(); ppc::FloatABI FloatABI = ppc::getPPCFloatABI(getToolChain().getDriver(), Args); if (FloatABI == ppc::FloatABI::Soft) { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } if (ABIName) { CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName); } } bool ppc::hasPPCAbiArg(const ArgList &Args, const char *Value) { Arg *A = Args.getLastArg(options::OPT_mabi_EQ); return A && (A->getValue() == StringRef(Value)); } /// Get the (LLVM) name of the R600 gpu we are targeting. static std::string getR600TargetGPU(const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { const char *GPUName = A->getValue(); return llvm::StringSwitch(GPUName) .Cases("rv630", "rv635", "r600") .Cases("rv610", "rv620", "rs780", "rs880") .Case("rv740", "rv770") .Case("palm", "cedar") .Cases("sumo", "sumo2", "sumo") .Case("hemlock", "cypress") .Case("aruba", "cayman") .Default(GPUName); } return ""; } static std::string getLanaiTargetCPU(const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { return A->getValue(); } return ""; } sparc::FloatABI sparc::getSparcFloatABI(const Driver &D, const ArgList &Args) { sparc::FloatABI ABI = sparc::FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) ABI = sparc::FloatABI::Soft; else if (A->getOption().matches(options::OPT_mhard_float)) ABI = sparc::FloatABI::Hard; else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", sparc::FloatABI::Soft) .Case("hard", sparc::FloatABI::Hard) .Default(sparc::FloatABI::Invalid); if (ABI == sparc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = sparc::FloatABI::Hard; } } } // If unspecified, choose the default based on the platform. // Only the hard-float ABI on Sparc is standardized, and it is the // default. GCC also supports a nonstandard soft-float ABI mode, also // implemented in LLVM. However as this is not standard we set the default // to be hard-float. if (ABI == sparc::FloatABI::Invalid) { ABI = sparc::FloatABI::Hard; } return ABI; } static void getSparcTargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args); if (FloatABI == sparc::FloatABI::Soft) Features.push_back("+soft-float"); } void Clang::AddSparcTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { sparc::FloatABI FloatABI = sparc::getSparcFloatABI(getToolChain().getDriver(), Args); if (FloatABI == sparc::FloatABI::Soft) { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); } else { // Floating point operations and argument passing are hard. assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!"); CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } } void Clang::AddSystemZTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false)) CmdArgs.push_back("-mbackchain"); } static const char *getSystemZTargetCPU(const ArgList &Args) { if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) return A->getValue(); return "z10"; } static void getSystemZTargetFeatures(const ArgList &Args, std::vector &Features) { // -m(no-)htm overrides use of the transactional-execution facility. if (Arg *A = Args.getLastArg(options::OPT_mhtm, options::OPT_mno_htm)) { if (A->getOption().matches(options::OPT_mhtm)) Features.push_back("+transactional-execution"); else Features.push_back("-transactional-execution"); } // -m(no-)vx overrides use of the vector facility. if (Arg *A = Args.getLastArg(options::OPT_mvx, options::OPT_mno_vx)) { if (A->getOption().matches(options::OPT_mvx)) Features.push_back("+vector"); else Features.push_back("-vector"); } } static const char *getX86TargetCPU(const ArgList &Args, const llvm::Triple &Triple) { if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { if (StringRef(A->getValue()) != "native") { if (Triple.isOSDarwin() && Triple.getArchName() == "x86_64h") return "core-avx2"; return A->getValue(); } // FIXME: Reject attempts to use -march=native unless the target matches // the host. // // FIXME: We should also incorporate the detected target features for use // with -native. std::string CPU = llvm::sys::getHostCPUName(); if (!CPU.empty() && CPU != "generic") return Args.MakeArgString(CPU); } if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) { // Mapping built by referring to X86TargetInfo::getDefaultFeatures(). StringRef Arch = A->getValue(); const char *CPU; if (Triple.getArch() == llvm::Triple::x86) { CPU = llvm::StringSwitch(Arch) .Case("IA32", "i386") .Case("SSE", "pentium3") .Case("SSE2", "pentium4") .Case("AVX", "sandybridge") .Case("AVX2", "haswell") .Default(nullptr); } else { CPU = llvm::StringSwitch(Arch) .Case("AVX", "sandybridge") .Case("AVX2", "haswell") .Default(nullptr); } if (CPU) return CPU; } // Select the default CPU if none was given (or detection failed). if (Triple.getArch() != llvm::Triple::x86_64 && Triple.getArch() != llvm::Triple::x86) return nullptr; // This routine is only handling x86 targets. bool Is64Bit = Triple.getArch() == llvm::Triple::x86_64; // FIXME: Need target hooks. if (Triple.isOSDarwin()) { if (Triple.getArchName() == "x86_64h") return "core-avx2"; // macosx10.12 drops support for all pre-Penryn Macs. // Simulators can still run on 10.11 though, like Xcode. if (Triple.isMacOSX() && !Triple.isOSVersionLT(10, 12)) return "penryn"; // The oldest x86_64 Macs have core2/Merom; the oldest x86 Macs have Yonah. return Is64Bit ? "core2" : "yonah"; } // Set up default CPU name for PS4 compilers. if (Triple.isPS4CPU()) return "btver2"; // On Android use targets compatible with gcc if (Triple.isAndroid()) return Is64Bit ? "x86-64" : "i686"; // Everything else goes to x86-64 in 64-bit mode. if (Is64Bit) return "x86-64"; switch (Triple.getOS()) { case llvm::Triple::FreeBSD: case llvm::Triple::NetBSD: case llvm::Triple::OpenBSD: return "i486"; case llvm::Triple::Haiku: return "i586"; case llvm::Triple::Bitrig: return "i686"; default: // Fallback to p4. return "pentium4"; } } /// Get the (LLVM) name of the WebAssembly cpu we are targeting. static StringRef getWebAssemblyTargetCPU(const ArgList &Args) { // If we have -mcpu=, use that. if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPU = A->getValue(); #ifdef __wasm__ // Handle "native" by examining the host. "native" isn't meaningful when // cross compiling, so only support this when the host is also WebAssembly. if (CPU == "native") return llvm::sys::getHostCPUName(); #endif return CPU; } return "generic"; } static std::string getCPUName(const ArgList &Args, const llvm::Triple &T, bool FromAs = false) { Arg *A; switch (T.getArch()) { default: return ""; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: return getAArch64TargetCPU(Args, A); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { StringRef MArch, MCPU; getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs); return arm::getARMTargetCPU(MCPU, MArch, T); } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, T, CPUName, ABIName); return CPUName; } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) return A->getValue(); return ""; case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: { std::string TargetCPUName = getPPCTargetCPU(Args); // LLVM may default to generating code for the native CPU, // but, like gcc, we default to a more generic option for // each architecture. (except on Darwin) if (TargetCPUName.empty() && !T.isOSDarwin()) { if (T.getArch() == llvm::Triple::ppc64) TargetCPUName = "ppc64"; else if (T.getArch() == llvm::Triple::ppc64le) TargetCPUName = "ppc64le"; else TargetCPUName = "ppc"; } return TargetCPUName; } case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) return A->getValue(); return ""; case llvm::Triple::x86: case llvm::Triple::x86_64: return getX86TargetCPU(Args, T); case llvm::Triple::hexagon: return "hexagon" + toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); case llvm::Triple::lanai: return getLanaiTargetCPU(Args); case llvm::Triple::systemz: return getSystemZTargetCPU(Args); case llvm::Triple::r600: case llvm::Triple::amdgcn: return getR600TargetGPU(Args); case llvm::Triple::wasm32: case llvm::Triple::wasm64: return getWebAssemblyTargetCPU(Args); } } static unsigned getLTOParallelism(const ArgList &Args, const Driver &D) { unsigned Parallelism = 0; Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ); if (LtoJobsArg && StringRef(LtoJobsArg->getValue()).getAsInteger(10, Parallelism)) D.Diag(diag::err_drv_invalid_int_value) << LtoJobsArg->getAsString(Args) << LtoJobsArg->getValue(); return Parallelism; } // CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by // default. static bool isUseSeparateSections(const llvm::Triple &Triple) { return Triple.getOS() == llvm::Triple::CloudABI || Triple.getArch() == llvm::Triple::wasm32 || Triple.getArch() == llvm::Triple::wasm64; } static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs, bool IsThinLTO, const Driver &D) { // Tell the linker to load the plugin. This has to come before AddLinkerInputs // as gold requires -plugin to come before any -plugin-opt that -Wl might // forward. CmdArgs.push_back("-plugin"); std::string Plugin = ToolChain.getDriver().Dir + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold.so"; CmdArgs.push_back(Args.MakeArgString(Plugin)); // Try to pass driver level flags relevant to LTO code generation down to // the plugin. // Handle flags for selecting CPU variants. std::string CPU = getCPUName(Args, ToolChain.getTriple()); if (!CPU.empty()) CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU)); if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt; if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) OOpt = "3"; else if (A->getOption().matches(options::OPT_O)) OOpt = A->getValue(); else if (A->getOption().matches(options::OPT_O0)) OOpt = "0"; if (!OOpt.empty()) CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt)); } if (IsThinLTO) CmdArgs.push_back("-plugin-opt=thinlto"); if (unsigned Parallelism = getLTOParallelism(Args, D)) CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") + llvm::to_string(Parallelism))); // If an explicit debugger tuning argument appeared, pass it along. if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) { if (A->getOption().matches(options::OPT_glldb)) CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb"); else if (A->getOption().matches(options::OPT_gsce)) CmdArgs.push_back("-plugin-opt=-debugger-tune=sce"); else CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb"); } bool UseSeparateSections = isUseSeparateSections(ToolChain.getEffectiveTriple()); if (Args.hasFlag(options::OPT_ffunction_sections, options::OPT_fno_function_sections, UseSeparateSections)) { CmdArgs.push_back("-plugin-opt=-function-sections"); } if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections, UseSeparateSections)) { CmdArgs.push_back("-plugin-opt=-data-sections"); } if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) { StringRef FName = A->getValue(); if (!llvm::sys::fs::exists(FName)) D.Diag(diag::err_drv_no_such_file) << FName; else CmdArgs.push_back( Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName)); } } /// This is a helper function for validating the optional refinement step /// parameter in reciprocal argument strings. Return false if there is an error /// parsing the refinement step. Otherwise, return true and set the Position /// of the refinement step in the input string. static bool getRefinementStep(StringRef In, const Driver &D, const Arg &A, size_t &Position) { const char RefinementStepToken = ':'; Position = In.find(RefinementStepToken); if (Position != StringRef::npos) { StringRef Option = A.getOption().getName(); StringRef RefStep = In.substr(Position + 1); // Allow exactly one numeric character for the additional refinement // step parameter. This is reasonable for all currently-supported // operations and architectures because we would expect that a larger value // of refinement steps would cause the estimate "optimization" to // under-perform the native operation. Also, if the estimate does not // converge quickly, it probably will not ever converge, so further // refinement steps will not produce a better answer. if (RefStep.size() != 1) { D.Diag(diag::err_drv_invalid_value) << Option << RefStep; return false; } char RefStepChar = RefStep[0]; if (RefStepChar < '0' || RefStepChar > '9') { D.Diag(diag::err_drv_invalid_value) << Option << RefStep; return false; } } return true; } /// The -mrecip flag requires processing of many optional parameters. static void ParseMRecip(const Driver &D, const ArgList &Args, ArgStringList &OutStrings) { StringRef DisabledPrefixIn = "!"; StringRef DisabledPrefixOut = "!"; StringRef EnabledPrefixOut = ""; StringRef Out = "-mrecip="; Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ); if (!A) return; unsigned NumOptions = A->getNumValues(); if (NumOptions == 0) { // No option is the same as "all". OutStrings.push_back(Args.MakeArgString(Out + "all")); return; } // Pass through "all", "none", or "default" with an optional refinement step. if (NumOptions == 1) { StringRef Val = A->getValue(0); size_t RefStepLoc; if (!getRefinementStep(Val, D, *A, RefStepLoc)) return; StringRef ValBase = Val.slice(0, RefStepLoc); if (ValBase == "all" || ValBase == "none" || ValBase == "default") { OutStrings.push_back(Args.MakeArgString(Out + Val)); return; } } // Each reciprocal type may be enabled or disabled individually. // Check each input value for validity, concatenate them all back together, // and pass through. llvm::StringMap OptionStrings; OptionStrings.insert(std::make_pair("divd", false)); OptionStrings.insert(std::make_pair("divf", false)); OptionStrings.insert(std::make_pair("vec-divd", false)); OptionStrings.insert(std::make_pair("vec-divf", false)); OptionStrings.insert(std::make_pair("sqrtd", false)); OptionStrings.insert(std::make_pair("sqrtf", false)); OptionStrings.insert(std::make_pair("vec-sqrtd", false)); OptionStrings.insert(std::make_pair("vec-sqrtf", false)); for (unsigned i = 0; i != NumOptions; ++i) { StringRef Val = A->getValue(i); bool IsDisabled = Val.startswith(DisabledPrefixIn); // Ignore the disablement token for string matching. if (IsDisabled) Val = Val.substr(1); size_t RefStep; if (!getRefinementStep(Val, D, *A, RefStep)) return; StringRef ValBase = Val.slice(0, RefStep); llvm::StringMap::iterator OptionIter = OptionStrings.find(ValBase); if (OptionIter == OptionStrings.end()) { // Try again specifying float suffix. OptionIter = OptionStrings.find(ValBase.str() + 'f'); if (OptionIter == OptionStrings.end()) { // The input name did not match any known option string. D.Diag(diag::err_drv_unknown_argument) << Val; return; } // The option was specified without a float or double suffix. // Make sure that the double entry was not already specified. // The float entry will be checked below. if (OptionStrings[ValBase.str() + 'd']) { D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val; return; } } if (OptionIter->second == true) { // Duplicate option specified. D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val; return; } // Mark the matched option as found. Do not allow duplicate specifiers. OptionIter->second = true; // If the precision was not specified, also mark the double entry as found. if (ValBase.back() != 'f' && ValBase.back() != 'd') OptionStrings[ValBase.str() + 'd'] = true; // Build the output string. StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut; Out = Args.MakeArgString(Out + Prefix + Val); if (i != NumOptions - 1) Out = Args.MakeArgString(Out + ","); } OutStrings.push_back(Args.MakeArgString(Out)); } static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { // If -march=native, autodetect the feature list. if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { if (StringRef(A->getValue()) == "native") { llvm::StringMap HostFeatures; if (llvm::sys::getHostCPUFeatures(HostFeatures)) for (auto &F : HostFeatures) Features.push_back( Args.MakeArgString((F.second ? "+" : "-") + F.first())); } } if (Triple.getArchName() == "x86_64h") { // x86_64h implies quite a few of the more modern subtarget features // for Haswell class CPUs, but not all of them. Opt-out of a few. Features.push_back("-rdrnd"); Features.push_back("-aes"); Features.push_back("-pclmul"); Features.push_back("-rtm"); Features.push_back("-hle"); Features.push_back("-fsgsbase"); } const llvm::Triple::ArchType ArchType = Triple.getArch(); // Add features to be compatible with gcc for Android. if (Triple.isAndroid()) { if (ArchType == llvm::Triple::x86_64) { Features.push_back("+sse4.2"); Features.push_back("+popcnt"); } else Features.push_back("+ssse3"); } // Set features according to the -arch flag on MSVC. if (Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) { StringRef Arch = A->getValue(); bool ArchUsed = false; // First, look for flags that are shared in x86 and x86-64. if (ArchType == llvm::Triple::x86_64 || ArchType == llvm::Triple::x86) { if (Arch == "AVX" || Arch == "AVX2") { ArchUsed = true; Features.push_back(Args.MakeArgString("+" + Arch.lower())); } } // Then, look for x86-specific flags. if (ArchType == llvm::Triple::x86) { if (Arch == "IA32") { ArchUsed = true; } else if (Arch == "SSE" || Arch == "SSE2") { ArchUsed = true; Features.push_back(Args.MakeArgString("+" + Arch.lower())); } } if (!ArchUsed) D.Diag(clang::diag::warn_drv_unused_argument) << A->getAsString(Args); } // Now add any that the user explicitly requested on the command line, // which may override the defaults. handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group); } void Clang::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) CmdArgs.push_back("-disable-red-zone"); // Default to avoid implicit floating-point for kernel/kext code, but allow // that to be overridden with -mno-soft-float. bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)); if (Arg *A = Args.getLastArg( options::OPT_msoft_float, options::OPT_mno_soft_float, options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) { const Option &O = A->getOption(); NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) || O.matches(options::OPT_msoft_float)); } if (NoImplicitFloat) CmdArgs.push_back("-no-implicit-float"); if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { StringRef Value = A->getValue(); if (Value == "intel" || Value == "att") { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value)); } else { getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } // Set flags to support MCU ABI. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("soft"); CmdArgs.push_back("-mstack-alignment=4"); } } void Clang::AddHexagonTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { CmdArgs.push_back("-mqdsp6-compat"); CmdArgs.push_back("-Wreturn-type"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { std::string N = llvm::utostr(G.getValue()); std::string Opt = std::string("-hexagon-small-data-threshold=") + N; CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString(Opt)); } if (!Args.hasArg(options::OPT_fno_short_enums)) CmdArgs.push_back("-fshort-enums"); if (Args.getLastArg(options::OPT_mieee_rnd_near)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-enable-hexagon-ieee-rnd-near"); } CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-machine-sink-split=0"); } void Clang::AddLanaiTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUName = A->getValue(); CmdArgs.push_back("-target-cpu"); CmdArgs.push_back(Args.MakeArgString(CPUName)); } if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) { StringRef Value = A->getValue(); // Only support mregparm=4 to support old usage. Report error for all other // cases. int Mregparm; if (Value.getAsInteger(10, Mregparm)) { if (Mregparm != 4) { getToolChain().getDriver().Diag( diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } } } void Clang::AddWebAssemblyTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { // Default to "hidden" visibility. if (!Args.hasArg(options::OPT_fvisibility_EQ, options::OPT_fvisibility_ms_compat)) { CmdArgs.push_back("-fvisibility"); CmdArgs.push_back("hidden"); } } // Decode AArch64 features from string like +[no]featureA+[no]featureB+... static bool DecodeAArch64Features(const Driver &D, StringRef text, std::vector &Features) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); for (StringRef Feature : Split) { StringRef FeatureName = llvm::AArch64::getArchExtFeature(Feature); if (!FeatureName.empty()) Features.push_back(FeatureName); else if (Feature == "neon" || Feature == "noneon") D.Diag(diag::err_drv_no_neon_modifier); else return false; } return true; } // Check if the CPU name and feature modifiers in -mcpu are legal. If yes, // decode CPU and feature. static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU, std::vector &Features) { std::pair Split = Mcpu.split("+"); CPU = Split.first; if (CPU == "generic") { Features.push_back("+neon"); } else { unsigned ArchKind = llvm::AArch64::parseCPUArch(CPU); if (!llvm::AArch64::getArchFeatures(ArchKind, Features)) return false; unsigned Extension = llvm::AArch64::getDefaultExtensions(CPU, ArchKind); if (!llvm::AArch64::getExtensionFeatures(Extension, Features)) return false; } if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)) return false; return true; } static bool getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March, const ArgList &Args, std::vector &Features) { std::string MarchLowerCase = March.lower(); std::pair Split = StringRef(MarchLowerCase).split("+"); unsigned ArchKind = llvm::AArch64::parseArch(Split.first); if (ArchKind == static_cast(llvm::AArch64::ArchKind::AK_INVALID) || !llvm::AArch64::getArchFeatures(ArchKind, Features) || (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))) return false; return true; } static bool getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, const ArgList &Args, std::vector &Features) { StringRef CPU; std::string McpuLowerCase = Mcpu.lower(); if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Features)) return false; return true; } static bool getAArch64MicroArchFeaturesFromMtune(const Driver &D, StringRef Mtune, const ArgList &Args, std::vector &Features) { std::string MtuneLowerCase = Mtune.lower(); // Handle CPU name is 'native'. if (MtuneLowerCase == "native") MtuneLowerCase = llvm::sys::getHostCPUName(); if (MtuneLowerCase == "cyclone") { Features.push_back("+zcm"); Features.push_back("+zcz"); } return true; } static bool getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, const ArgList &Args, std::vector &Features) { StringRef CPU; std::vector DecodedFeature; std::string McpuLowerCase = Mcpu.lower(); if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, DecodedFeature)) return false; return getAArch64MicroArchFeaturesFromMtune(D, CPU, Args, Features); } static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { Arg *A; bool success = true; // Enable NEON by default. Features.push_back("+neon"); if ((A = Args.getLastArg(options::OPT_march_EQ))) success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features); else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Features); else if (Args.hasArg(options::OPT_arch)) success = getAArch64ArchFeaturesFromMcpu(D, getAArch64TargetCPU(Args, A), Args, Features); if (success && (A = Args.getLastArg(options::OPT_mtune_EQ))) success = getAArch64MicroArchFeaturesFromMtune(D, A->getValue(), Args, Features); else if (success && (A = Args.getLastArg(options::OPT_mcpu_EQ))) success = getAArch64MicroArchFeaturesFromMcpu(D, A->getValue(), Args, Features); else if (success && Args.hasArg(options::OPT_arch)) success = getAArch64MicroArchFeaturesFromMcpu( D, getAArch64TargetCPU(Args, A), Args, Features); if (!success) D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); if (Args.getLastArg(options::OPT_mgeneral_regs_only)) { Features.push_back("-fp-armv8"); Features.push_back("-crypto"); Features.push_back("-neon"); } // En/disable crc if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { if (A->getOption().matches(options::OPT_mcrc)) Features.push_back("+crc"); else Features.push_back("-crc"); } if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, options::OPT_munaligned_access)) if (A->getOption().matches(options::OPT_mno_unaligned_access)) Features.push_back("+strict-align"); if (Args.hasArg(options::OPT_ffixed_x18)) Features.push_back("+reserve-x18"); } static void getHexagonTargetFeatures(const ArgList &Args, std::vector &Features) { handleTargetFeaturesGroup(Args, Features, options::OPT_m_hexagon_Features_Group); bool UseLongCalls = false; if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, options::OPT_mno_long_calls)) { if (A->getOption().matches(options::OPT_mlong_calls)) UseLongCalls = true; } Features.push_back(UseLongCalls ? "+long-calls" : "-long-calls"); } static void getWebAssemblyTargetFeatures(const ArgList &Args, std::vector &Features) { handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group); } static void getAMDGPUTargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features) { if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) { StringRef value = dAbi->getValue(); if (value == "1.0") { Features.push_back("+amdgpu-debugger-insert-nops"); Features.push_back("+amdgpu-debugger-reserve-regs"); Features.push_back("+amdgpu-debugger-emit-prologue"); } else { D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); } } handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); } static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, bool ForAS) { const Driver &D = TC.getDriver(); std::vector Features; switch (Triple.getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: getMIPSTargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: getARMTargetFeatures(TC, Triple, Args, CmdArgs, Features, ForAS); break; case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: getPPCTargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::systemz: getSystemZTargetFeatures(Args, Features); break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: getAArch64TargetFeatures(D, Args, Features); break; case llvm::Triple::x86: case llvm::Triple::x86_64: getX86TargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::hexagon: getHexagonTargetFeatures(Args, Features); break; case llvm::Triple::wasm32: case llvm::Triple::wasm64: getWebAssemblyTargetFeatures(Args, Features); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: getSparcTargetFeatures(D, Args, Features); break; case llvm::Triple::r600: case llvm::Triple::amdgcn: getAMDGPUTargetFeatures(D, Args, Features); break; } // Find the last of each feature. llvm::StringMap LastOpt; for (unsigned I = 0, N = Features.size(); I < N; ++I) { StringRef Name = Features[I]; assert(Name[0] == '-' || Name[0] == '+'); LastOpt[Name.drop_front(1)] = I; } for (unsigned I = 0, N = Features.size(); I < N; ++I) { // If this feature was overridden, ignore it. StringRef Name = Features[I]; llvm::StringMap::iterator LastI = LastOpt.find(Name.drop_front(1)); assert(LastI != LastOpt.end()); unsigned Last = LastI->second; if (Last != I) continue; CmdArgs.push_back("-target-feature"); CmdArgs.push_back(Name.data()); } } static bool shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, const llvm::Triple &Triple) { // We use the zero-cost exception tables for Objective-C if the non-fragile // ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and // later. if (runtime.isNonFragile()) return true; if (!Triple.isMacOSX()) return false; return (!Triple.isMacOSXVersionLT(10, 5) && (Triple.getArch() == llvm::Triple::x86_64 || Triple.getArch() == llvm::Triple::arm)); } /// Adds exception related arguments to the driver command arguments. There's a /// master flag, -fexceptions and also language specific flags to enable/disable /// C++ and Objective-C exceptions. This makes it possible to for example /// disable C++ exceptions but enable Objective-C exceptions. static void addExceptionArgs(const ArgList &Args, types::ID InputType, const ToolChain &TC, bool KernelOrKext, const ObjCRuntime &objcRuntime, ArgStringList &CmdArgs) { const Driver &D = TC.getDriver(); const llvm::Triple &Triple = TC.getTriple(); if (KernelOrKext) { // -mkernel and -fapple-kext imply no exceptions, so claim exception related // arguments now to avoid warnings about unused arguments. Args.ClaimAllArgs(options::OPT_fexceptions); Args.ClaimAllArgs(options::OPT_fno_exceptions); Args.ClaimAllArgs(options::OPT_fobjc_exceptions); Args.ClaimAllArgs(options::OPT_fno_objc_exceptions); Args.ClaimAllArgs(options::OPT_fcxx_exceptions); Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions); return; } // See if the user explicitly enabled exceptions. bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, false); // Obj-C exceptions are enabled by default, regardless of -fexceptions. This // is not necessarily sensible, but follows GCC. if (types::isObjC(InputType) && Args.hasFlag(options::OPT_fobjc_exceptions, options::OPT_fno_objc_exceptions, true)) { CmdArgs.push_back("-fobjc-exceptions"); EH |= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple); } if (types::isCXX(InputType)) { // Disable C++ EH by default on XCore and PS4. bool CXXExceptionsEnabled = Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU(); Arg *ExceptionArg = Args.getLastArg( options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions, options::OPT_fexceptions, options::OPT_fno_exceptions); if (ExceptionArg) CXXExceptionsEnabled = ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) || ExceptionArg->getOption().matches(options::OPT_fexceptions); if (CXXExceptionsEnabled) { if (Triple.isPS4CPU()) { ToolChain::RTTIMode RTTIMode = TC.getRTTIMode(); assert(ExceptionArg && "On the PS4 exceptions should only be enabled if passing " "an argument"); if (RTTIMode == ToolChain::RM_DisabledExplicitly) { const Arg *RTTIArg = TC.getRTTIArg(); assert(RTTIArg && "RTTI disabled explicitly but no RTTIArg!"); D.Diag(diag::err_drv_argument_not_allowed_with) << RTTIArg->getAsString(Args) << ExceptionArg->getAsString(Args); } else if (RTTIMode == ToolChain::RM_EnabledImplicitly) D.Diag(diag::warn_drv_enabling_rtti_with_exceptions); } else assert(TC.getRTTIMode() != ToolChain::RM_DisabledImplicitly); CmdArgs.push_back("-fcxx-exceptions"); EH = true; } } if (EH) CmdArgs.push_back("-fexceptions"); } static bool ShouldDisableAutolink(const ArgList &Args, const ToolChain &TC) { bool Default = true; if (TC.getTriple().isOSDarwin()) { // The native darwin assembler doesn't support the linker_option directives, // so we disable them if we think the .s file will be passed to it. Default = TC.useIntegratedAs(); } return !Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink, Default); } static bool ShouldDisableDwarfDirectory(const ArgList &Args, const ToolChain &TC) { bool UseDwarfDirectory = Args.hasFlag(options::OPT_fdwarf_directory_asm, options::OPT_fno_dwarf_directory_asm, TC.useIntegratedAs()); return !UseDwarfDirectory; } /// \brief Check whether the given input tree contains any compilation actions. static bool ContainsCompileAction(const Action *A) { if (isa(A) || isa(A)) return true; for (const auto &AI : A->inputs()) if (ContainsCompileAction(AI)) return true; return false; } /// \brief Check if -relax-all should be passed to the internal assembler. /// This is done by default when compiling non-assembler source with -O0. static bool UseRelaxAll(Compilation &C, const ArgList &Args) { bool RelaxDefault = true; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) RelaxDefault = A->getOption().matches(options::OPT_O0); if (RelaxDefault) { RelaxDefault = false; for (const auto &Act : C.getActions()) { if (ContainsCompileAction(Act)) { RelaxDefault = true; break; } } } return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all, RelaxDefault); } // Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases // to the corresponding DebugInfoKind. static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) { assert(A.getOption().matches(options::OPT_gN_Group) && "Not a -g option that specifies a debug-info level"); if (A.getOption().matches(options::OPT_g0) || A.getOption().matches(options::OPT_ggdb0)) return codegenoptions::NoDebugInfo; if (A.getOption().matches(options::OPT_gline_tables_only) || A.getOption().matches(options::OPT_ggdb1)) return codegenoptions::DebugLineTablesOnly; return codegenoptions::LimitedDebugInfo; } // Extract the integer N from a string spelled "-dwarf-N", returning 0 // on mismatch. The StringRef input (rather than an Arg) allows // for use by the "-Xassembler" option parser. static unsigned DwarfVersionNum(StringRef ArgValue) { return llvm::StringSwitch(ArgValue) .Case("-gdwarf-2", 2) .Case("-gdwarf-3", 3) .Case("-gdwarf-4", 4) .Case("-gdwarf-5", 5) .Default(0); } static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs, codegenoptions::DebugInfoKind DebugInfoKind, unsigned DwarfVersion, llvm::DebuggerKind DebuggerTuning) { switch (DebugInfoKind) { case codegenoptions::DebugLineTablesOnly: CmdArgs.push_back("-debug-info-kind=line-tables-only"); break; case codegenoptions::LimitedDebugInfo: CmdArgs.push_back("-debug-info-kind=limited"); break; case codegenoptions::FullDebugInfo: CmdArgs.push_back("-debug-info-kind=standalone"); break; default: break; } if (DwarfVersion > 0) CmdArgs.push_back( Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion))); switch (DebuggerTuning) { case llvm::DebuggerKind::GDB: CmdArgs.push_back("-debugger-tuning=gdb"); break; case llvm::DebuggerKind::LLDB: CmdArgs.push_back("-debugger-tuning=lldb"); break; case llvm::DebuggerKind::SCE: CmdArgs.push_back("-debugger-tuning=sce"); break; default: break; } } static void CollectArgsForIntegratedAssembler(Compilation &C, const ArgList &Args, ArgStringList &CmdArgs, const Driver &D) { if (UseRelaxAll(C, Args)) CmdArgs.push_back("-mrelax-all"); // Only default to -mincremental-linker-compatible if we think we are // targeting the MSVC linker. bool DefaultIncrementalLinkerCompatible = C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); if (Args.hasFlag(options::OPT_mincremental_linker_compatible, options::OPT_mno_incremental_linker_compatible, DefaultIncrementalLinkerCompatible)) CmdArgs.push_back("-mincremental-linker-compatible"); switch (C.getDefaultToolChain().getArch()) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: if (Arg *A = Args.getLastArg(options::OPT_mimplicit_it_EQ)) { StringRef Value = A->getValue(); if (Value == "always" || Value == "never" || Value == "arm" || Value == "thumb") { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-arm-implicit-it=" + Value)); } else { D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } break; default: break; } // When passing -I arguments to the assembler we sometimes need to // unconditionally take the next argument. For example, when parsing // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the // -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo' // arg after parsing the '-I' arg. bool TakeNextArg = false; // When using an integrated assembler, translate -Wa, and -Xassembler // options. bool CompressDebugSections = false; bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS; const char *MipsTargetFeature = nullptr; for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { A->claim(); for (StringRef Value : A->getValues()) { if (TakeNextArg) { CmdArgs.push_back(Value.data()); TakeNextArg = false; continue; } if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() && Value == "-mbig-obj") continue; // LLVM handles bigobj automatically switch (C.getDefaultToolChain().getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: if (Value == "--trap") { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("+use-tcc-in-div"); continue; } if (Value == "--break") { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("-use-tcc-in-div"); continue; } if (Value.startswith("-msoft-float")) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("+soft-float"); continue; } if (Value.startswith("-mhard-float")) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("-soft-float"); continue; } MipsTargetFeature = llvm::StringSwitch(Value) .Case("-mips1", "+mips1") .Case("-mips2", "+mips2") .Case("-mips3", "+mips3") .Case("-mips4", "+mips4") .Case("-mips5", "+mips5") .Case("-mips32", "+mips32") .Case("-mips32r2", "+mips32r2") .Case("-mips32r3", "+mips32r3") .Case("-mips32r5", "+mips32r5") .Case("-mips32r6", "+mips32r6") .Case("-mips64", "+mips64") .Case("-mips64r2", "+mips64r2") .Case("-mips64r3", "+mips64r3") .Case("-mips64r5", "+mips64r5") .Case("-mips64r6", "+mips64r6") .Default(nullptr); if (MipsTargetFeature) continue; } if (Value == "-force_cpusubtype_ALL") { // Do nothing, this is the default and we don't support anything else. } else if (Value == "-L") { CmdArgs.push_back("-msave-temp-labels"); } else if (Value == "--fatal-warnings") { CmdArgs.push_back("-massembler-fatal-warnings"); } else if (Value == "--noexecstack") { CmdArgs.push_back("-mnoexecstack"); } else if (Value == "-compress-debug-sections" || Value == "--compress-debug-sections") { CompressDebugSections = true; } else if (Value == "-nocompress-debug-sections" || Value == "--nocompress-debug-sections") { CompressDebugSections = false; } else if (Value == "-mrelax-relocations=yes" || Value == "--mrelax-relocations=yes") { UseRelaxRelocations = true; } else if (Value == "-mrelax-relocations=no" || Value == "--mrelax-relocations=no") { UseRelaxRelocations = false; } else if (Value.startswith("-I")) { CmdArgs.push_back(Value.data()); // We need to consume the next argument if the current arg is a plain // -I. The next arg will be the include directory. if (Value == "-I") TakeNextArg = true; } else if (Value.startswith("-gdwarf-")) { // "-gdwarf-N" options are not cc1as options. unsigned DwarfVersion = DwarfVersionNum(Value); if (DwarfVersion == 0) { // Send it onward, and let cc1as complain. CmdArgs.push_back(Value.data()); } else { RenderDebugEnablingArgs(Args, CmdArgs, codegenoptions::LimitedDebugInfo, DwarfVersion, llvm::DebuggerKind::Default); } } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") || Value.startswith("-mhwdiv") || Value.startswith("-march")) { // Do nothing, we'll validate it later. } else if (Value == "-defsym") { if (A->getNumValues() != 2) { D.Diag(diag::err_drv_defsym_invalid_format) << Value; break; } const char *S = A->getValue(1); auto Pair = StringRef(S).split('='); auto Sym = Pair.first; auto SVal = Pair.second; if (Sym.empty() || SVal.empty()) { D.Diag(diag::err_drv_defsym_invalid_format) << S; break; } int64_t IVal; if (SVal.getAsInteger(0, IVal)) { D.Diag(diag::err_drv_defsym_invalid_symval) << SVal; break; } CmdArgs.push_back(Value.data()); TakeNextArg = true; } else { D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } } if (CompressDebugSections) { if (llvm::zlib::isAvailable()) CmdArgs.push_back("-compress-debug-sections"); else D.Diag(diag::warn_debug_compression_unavailable); } if (UseRelaxRelocations) CmdArgs.push_back("--mrelax-relocations"); if (MipsTargetFeature != nullptr) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back(MipsTargetFeature); } } // This adds the static libclang_rt.builtins-arch.a directly to the command line // FIXME: Make sure we can also emit shared objects if they're requested // and available, check for possible errors, etc. static void addClangRT(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins")); } static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, const ArgList &Args) { if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) return; switch (TC.getDriver().getOpenMPRuntime(Args)) { case Driver::OMPRT_OMP: CmdArgs.push_back("-lomp"); break; case Driver::OMPRT_GOMP: CmdArgs.push_back("-lgomp"); break; case Driver::OMPRT_IOMP5: CmdArgs.push_back("-liomp5"); break; case Driver::OMPRT_Unknown: // Already diagnosed. break; } } static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, StringRef Sanitizer, bool IsShared, bool IsWhole) { // Wrap any static runtimes that must be forced into executable in // whole-archive. if (IsWhole) CmdArgs.push_back("-whole-archive"); CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared)); if (IsWhole) CmdArgs.push_back("-no-whole-archive"); } // Tries to use a file with the list of dynamic symbols that need to be exported // from the runtime library. Returns true if the file was found. static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs, StringRef Sanitizer) { SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer)); if (llvm::sys::fs::exists(SanRT + ".syms")) { CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms")); return true; } return false; } static void linkSanitizerRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) { // Force linking against the system libraries sanitizers depends on // (see PR15823 why this is necessary). CmdArgs.push_back("--no-as-needed"); // There's no libpthread or librt on RTEMS. if (TC.getTriple().getOS() != llvm::Triple::RTEMS) { CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lrt"); } CmdArgs.push_back("-lm"); // There's no libdl on FreeBSD or RTEMS. if (TC.getTriple().getOS() != llvm::Triple::FreeBSD && TC.getTriple().getOS() != llvm::Triple::RTEMS) CmdArgs.push_back("-ldl"); } static void collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, SmallVectorImpl &SharedRuntimes, SmallVectorImpl &StaticRuntimes, SmallVectorImpl &NonWholeStaticRuntimes, SmallVectorImpl &HelperStaticRuntimes, SmallVectorImpl &RequiredSymbols) { const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); // Collect shared runtimes. if (SanArgs.needsAsanRt() && SanArgs.needsSharedAsanRt()) { SharedRuntimes.push_back("asan"); } // The stats_client library is also statically linked into DSOs. if (SanArgs.needsStatsRt()) StaticRuntimes.push_back("stats_client"); // Collect static runtimes. if (Args.hasArg(options::OPT_shared) || TC.getTriple().isAndroid()) { // Don't link static runtimes into DSOs or if compiling for Android. return; } if (SanArgs.needsAsanRt()) { if (SanArgs.needsSharedAsanRt()) { HelperStaticRuntimes.push_back("asan-preinit"); } else { StaticRuntimes.push_back("asan"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("asan_cxx"); } } if (SanArgs.needsDfsanRt()) StaticRuntimes.push_back("dfsan"); if (SanArgs.needsLsanRt()) StaticRuntimes.push_back("lsan"); if (SanArgs.needsMsanRt()) { StaticRuntimes.push_back("msan"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("msan_cxx"); } if (SanArgs.needsTsanRt()) { StaticRuntimes.push_back("tsan"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("tsan_cxx"); } if (SanArgs.needsUbsanRt()) { StaticRuntimes.push_back("ubsan_standalone"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("ubsan_standalone_cxx"); } if (SanArgs.needsSafeStackRt()) StaticRuntimes.push_back("safestack"); if (SanArgs.needsCfiRt()) StaticRuntimes.push_back("cfi"); if (SanArgs.needsCfiDiagRt()) { StaticRuntimes.push_back("cfi_diag"); if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("ubsan_standalone_cxx"); } if (SanArgs.needsStatsRt()) { NonWholeStaticRuntimes.push_back("stats"); RequiredSymbols.push_back("__sanitizer_stats_register"); } if (SanArgs.needsEsanRt()) StaticRuntimes.push_back("esan"); } // Should be called before we add system libraries (C++ ABI, libstdc++/libc++, // C runtime, etc). Returns true if sanitizer system deps need to be linked in. static bool addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { SmallVector SharedRuntimes, StaticRuntimes, NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols; collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes, NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols); for (auto RT : SharedRuntimes) addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false); for (auto RT : HelperStaticRuntimes) addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true); bool AddExportDynamic = false; for (auto RT : StaticRuntimes) { addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true); AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT); } for (auto RT : NonWholeStaticRuntimes) { addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false); AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT); } for (auto S : RequiredSymbols) { CmdArgs.push_back("-u"); CmdArgs.push_back(Args.MakeArgString(S)); } // If there is a static runtime with no dynamic list, force all the symbols // to be dynamic to be sure we export sanitizer interface functions. if (AddExportDynamic) CmdArgs.push_back("-export-dynamic"); const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); if (SanArgs.hasCrossDsoCfi() && !AddExportDynamic) CmdArgs.push_back("-export-dynamic-symbol=__cfi_check"); return !StaticRuntimes.empty(); } static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { if (Args.hasFlag(options::OPT_fxray_instrument, options::OPT_fnoxray_instrument, false)) { CmdArgs.push_back("-whole-archive"); CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false)); CmdArgs.push_back("-no-whole-archive"); return true; } return false; } static void linkXRayRuntimeDeps(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { CmdArgs.push_back("--no-as-needed"); CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lrt"); CmdArgs.push_back("-lm"); CmdArgs.push_back("-latomic"); if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) CmdArgs.push_back("-lc++"); else CmdArgs.push_back("-lstdc++"); if (TC.getTriple().getOS() != llvm::Triple::FreeBSD) CmdArgs.push_back("-ldl"); } static bool areOptimizationsEnabled(const ArgList &Args) { // Find the last -O arg and see if it is non-zero. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) return !A->getOption().matches(options::OPT_O0); // Defaults to -O0. return false; } static bool mustUseFramePointerForTarget(const llvm::Triple &Triple) { switch (Triple.getArch()){ default: return false; case llvm::Triple::arm: case llvm::Triple::thumb: // ARM Darwin targets require a frame pointer to be always present to aid // offline debugging via backtraces. return Triple.isOSDarwin(); } } static bool useFramePointerForTargetByDefault(const ArgList &Args, const llvm::Triple &Triple) { switch (Triple.getArch()) { case llvm::Triple::xcore: case llvm::Triple::wasm32: case llvm::Triple::wasm64: // XCore never wants frame pointers, regardless of OS. // WebAssembly never wants frame pointers. return false; default: break; } if (Triple.isOSLinux() || Triple.getOS() == llvm::Triple::CloudABI) { switch (Triple.getArch()) { // Don't use a frame pointer on linux if optimizing for certain targets. case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::systemz: case llvm::Triple::x86: case llvm::Triple::x86_64: return !areOptimizationsEnabled(Args); default: return true; } } if (Triple.isOSWindows()) { switch (Triple.getArch()) { case llvm::Triple::x86: return !areOptimizationsEnabled(Args); case llvm::Triple::x86_64: return Triple.isOSBinFormatMachO(); case llvm::Triple::arm: case llvm::Triple::thumb: // Windows on ARM builds with FPO disabled to aid fast stack walking return true; default: // All other supported Windows ISAs use xdata unwind information, so frame // pointers are not generally useful. return false; } } return true; } static bool shouldUseFramePointer(const ArgList &Args, const llvm::Triple &Triple) { if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer, options::OPT_fomit_frame_pointer)) return A->getOption().matches(options::OPT_fno_omit_frame_pointer) || mustUseFramePointerForTarget(Triple); if (Args.hasArg(options::OPT_pg)) return true; return useFramePointerForTargetByDefault(Args, Triple); } static bool shouldUseLeafFramePointer(const ArgList &Args, const llvm::Triple &Triple) { if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer, options::OPT_momit_leaf_frame_pointer)) return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer) || mustUseFramePointerForTarget(Triple); if (Args.hasArg(options::OPT_pg)) return true; if (Triple.isPS4CPU()) return false; return useFramePointerForTargetByDefault(Args, Triple); } /// Add a CC1 option to specify the debug compilation directory. static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) { SmallString<128> cwd; if (!llvm::sys::fs::current_path(cwd)) { CmdArgs.push_back("-fdebug-compilation-dir"); CmdArgs.push_back(Args.MakeArgString(cwd)); } } static const char *SplitDebugName(const ArgList &Args, const InputInfo &Input) { Arg *FinalOutput = Args.getLastArg(options::OPT_o); if (FinalOutput && Args.hasArg(options::OPT_c)) { SmallString<128> T(FinalOutput->getValue()); llvm::sys::path::replace_extension(T, "dwo"); return Args.MakeArgString(T); } else { // Use the compilation dir. SmallString<128> T( Args.getLastArgValue(options::OPT_fdebug_compilation_dir)); SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput())); llvm::sys::path::replace_extension(F, "dwo"); T += F; return Args.MakeArgString(F); } } static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T, const JobAction &JA, const ArgList &Args, const InputInfo &Output, const char *OutFile) { ArgStringList ExtractArgs; ExtractArgs.push_back("--extract-dwo"); ArgStringList StripArgs; StripArgs.push_back("--strip-dwo"); // Grabbing the output of the earlier compile step. StripArgs.push_back(Output.getFilename()); ExtractArgs.push_back(Output.getFilename()); ExtractArgs.push_back(OutFile); const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy")); InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename()); // First extract the dwo sections. C.addCommand(llvm::make_unique(JA, T, Exec, ExtractArgs, II)); // Then remove them from the original .o file. C.addCommand(llvm::make_unique(JA, T, Exec, StripArgs, II)); } /// \brief Vectorize at all optimization levels greater than 1 except for -Oz. /// For -Oz the loop vectorizer is disable, while the slp vectorizer is enabled. static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) return true; if (A->getOption().matches(options::OPT_O0)) return false; assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); // Vectorize -Os. StringRef S(A->getValue()); if (S == "s") return true; // Don't vectorize -Oz, unless it's the slp vectorizer. if (S == "z") return isSlpVec; unsigned OptLevel = 0; if (S.getAsInteger(10, OptLevel)) return false; return OptLevel > 1; } return false; } /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { // When using -verify-pch, we don't want to provide the type // 'precompiled-header' if it was inferred from the file extension if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH) return; CmdArgs.push_back("-x"); if (Args.hasArg(options::OPT_rewrite_objc)) CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX)); else CmdArgs.push_back(types::getTypeName(Input.getType())); } // Claim options we don't want to warn if they are unused. We do this for // options that build systems might add but are unused when assembling or only // running the preprocessor for example. static void claimNoWarnArgs(const ArgList &Args) { // Don't warn about unused -f(no-)?lto. This can happen when we're // preprocessing, precompiling or assembling. Args.ClaimAllArgs(options::OPT_flto_EQ); Args.ClaimAllArgs(options::OPT_flto); Args.ClaimAllArgs(options::OPT_fno_lto); } static void appendUserToPath(SmallVectorImpl &Result) { #ifdef LLVM_ON_UNIX const char *Username = getenv("LOGNAME"); #else const char *Username = getenv("USERNAME"); #endif if (Username) { // Validate that LoginName can be used in a path, and get its length. size_t Len = 0; for (const char *P = Username; *P; ++P, ++Len) { if (!isAlphanumeric(*P) && *P != '_') { Username = nullptr; break; } } if (Username && Len > 0) { Result.append(Username, Username + Len); return; } } // Fallback to user id. #ifdef LLVM_ON_UNIX std::string UID = llvm::utostr(getuid()); #else // FIXME: Windows seems to have an 'SID' that might work. std::string UID = "9999"; #endif Result.append(UID.begin(), UID.end()); } static Arg *getLastProfileUseArg(const ArgList &Args) { auto *ProfileUseArg = Args.getLastArg( options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ, options::OPT_fprofile_use, options::OPT_fprofile_use_EQ, options::OPT_fno_profile_instr_use); if (ProfileUseArg && ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use)) ProfileUseArg = nullptr; return ProfileUseArg; } static void addPGOAndCoverageFlags(Compilation &C, const Driver &D, const InputInfo &Output, const ArgList &Args, ArgStringList &CmdArgs) { auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ, options::OPT_fno_profile_generate); if (PGOGenerateArg && PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate)) PGOGenerateArg = nullptr; auto *ProfileGenerateArg = Args.getLastArg( options::OPT_fprofile_instr_generate, options::OPT_fprofile_instr_generate_EQ, options::OPT_fno_profile_instr_generate); if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches( options::OPT_fno_profile_instr_generate)) ProfileGenerateArg = nullptr; if (PGOGenerateArg && ProfileGenerateArg) D.Diag(diag::err_drv_argument_not_allowed_with) << PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling(); auto *ProfileUseArg = getLastProfileUseArg(Args); if (PGOGenerateArg && ProfileUseArg) D.Diag(diag::err_drv_argument_not_allowed_with) << ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling(); if (ProfileGenerateArg && ProfileUseArg) D.Diag(diag::err_drv_argument_not_allowed_with) << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling(); if (ProfileGenerateArg) { if (ProfileGenerateArg->getOption().matches( options::OPT_fprofile_instr_generate_EQ)) CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") + ProfileGenerateArg->getValue())); // The default is to use Clang Instrumentation. CmdArgs.push_back("-fprofile-instrument=clang"); } if (PGOGenerateArg) { CmdArgs.push_back("-fprofile-instrument=llvm"); if (PGOGenerateArg->getOption().matches( options::OPT_fprofile_generate_EQ)) { SmallString<128> Path(PGOGenerateArg->getValue()); llvm::sys::path::append(Path, "default_%m.profraw"); CmdArgs.push_back( Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path)); } } if (ProfileUseArg) { if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ)) CmdArgs.push_back(Args.MakeArgString( Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue())); else if ((ProfileUseArg->getOption().matches( options::OPT_fprofile_use_EQ) || ProfileUseArg->getOption().matches( options::OPT_fprofile_instr_use))) { SmallString<128> Path( ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue()); if (Path.empty() || llvm::sys::fs::is_directory(Path)) llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back( Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path)); } } if (Args.hasArg(options::OPT_ftest_coverage) || Args.hasArg(options::OPT_coverage)) CmdArgs.push_back("-femit-coverage-notes"); if (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, false) || Args.hasArg(options::OPT_coverage)) CmdArgs.push_back("-femit-coverage-data"); if (Args.hasFlag(options::OPT_fcoverage_mapping, options::OPT_fno_coverage_mapping, false) && !ProfileGenerateArg) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fcoverage-mapping" << "-fprofile-instr-generate"; if (Args.hasFlag(options::OPT_fcoverage_mapping, options::OPT_fno_coverage_mapping, false)) CmdArgs.push_back("-fcoverage-mapping"); if (C.getArgs().hasArg(options::OPT_c) || C.getArgs().hasArg(options::OPT_S)) { if (Output.isFilename()) { CmdArgs.push_back("-coverage-notes-file"); SmallString<128> OutputFilename; if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) OutputFilename = FinalOutput->getValue(); else OutputFilename = llvm::sys::path::filename(Output.getBaseInput()); SmallString<128> CoverageFilename = OutputFilename; if (llvm::sys::path::is_relative(CoverageFilename)) { SmallString<128> Pwd; if (!llvm::sys::fs::current_path(Pwd)) { llvm::sys::path::append(Pwd, CoverageFilename); CoverageFilename.swap(Pwd); } } llvm::sys::path::replace_extension(CoverageFilename, "gcno"); CmdArgs.push_back(Args.MakeArgString(CoverageFilename)); // Leave -fprofile-dir= an unused argument unless .gcda emission is // enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider // the flag used. There is no -fno-profile-dir, so the user has no // targeted way to suppress the warning. if (Args.hasArg(options::OPT_fprofile_arcs) || Args.hasArg(options::OPT_coverage)) { CmdArgs.push_back("-coverage-data-file"); if (Arg *FProfileDir = Args.getLastArg(options::OPT_fprofile_dir)) { CoverageFilename = FProfileDir->getValue(); llvm::sys::path::append(CoverageFilename, OutputFilename); } llvm::sys::path::replace_extension(CoverageFilename, "gcda"); CmdArgs.push_back(Args.MakeArgString(CoverageFilename)); } } } } static void addPS4ProfileRTArgs(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { if ((Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, false) || Args.hasFlag(options::OPT_fprofile_generate, options::OPT_fno_profile_instr_generate, false) || Args.hasFlag(options::OPT_fprofile_generate_EQ, options::OPT_fno_profile_instr_generate, false) || Args.hasFlag(options::OPT_fprofile_instr_generate, options::OPT_fno_profile_instr_generate, false) || Args.hasFlag(options::OPT_fprofile_instr_generate_EQ, options::OPT_fno_profile_instr_generate, false) || Args.hasArg(options::OPT_fcreate_profile) || Args.hasArg(options::OPT_coverage))) CmdArgs.push_back("--dependent-lib=libclang_rt.profile-x86_64.a"); } /// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments. Then, /// smooshes them together with platform defaults, to decide whether /// this compile should be using PIC mode or not. Returns a tuple of /// (RelocationModel, PICLevel, IsPIE). static std::tuple ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { const llvm::Triple &EffectiveTriple = ToolChain.getEffectiveTriple(); const llvm::Triple &Triple = ToolChain.getTriple(); bool PIE = ToolChain.isPIEDefault(); bool PIC = PIE || ToolChain.isPICDefault(); // The Darwin/MachO default to use PIC does not apply when using -static. if (Triple.isOSBinFormatMachO() && Args.hasArg(options::OPT_static)) PIE = PIC = false; bool IsPICLevelTwo = PIC; bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); // Android-specific defaults for PIC/PIE if (Triple.isAndroid()) { switch (Triple.getArch()) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: case llvm::Triple::aarch64: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: PIC = true; // "-fpic" break; case llvm::Triple::x86: case llvm::Triple::x86_64: PIC = true; // "-fPIC" IsPICLevelTwo = true; break; default: break; } } // OpenBSD-specific defaults for PIE if (Triple.getOS() == llvm::Triple::OpenBSD) { switch (ToolChain.getArch()) { case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::sparcel: case llvm::Triple::x86: case llvm::Triple::x86_64: IsPICLevelTwo = false; // "-fpie" break; case llvm::Triple::ppc: case llvm::Triple::sparc: case llvm::Triple::sparcv9: IsPICLevelTwo = true; // "-fPIE" break; default: break; } } // The last argument relating to either PIC or PIE wins, and no // other argument is used. If the last argument is any flavor of the // '-fno-...' arguments, both PIC and PIE are disabled. Any PIE // option implicitly enables PIC at the same level. Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, options::OPT_fpic, options::OPT_fno_pic, options::OPT_fPIE, options::OPT_fno_PIE, options::OPT_fpie, options::OPT_fno_pie); if (Triple.isOSWindows() && LastPICArg && LastPICArg == Args.getLastArg(options::OPT_fPIC, options::OPT_fpic, options::OPT_fPIE, options::OPT_fpie)) { ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << LastPICArg->getSpelling() << Triple.str(); if (Triple.getArch() == llvm::Triple::x86_64) return std::make_tuple(llvm::Reloc::PIC_, 2U, false); return std::make_tuple(llvm::Reloc::Static, 0U, false); } // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness // is forced, then neither PIC nor PIE flags will have no effect. if (!ToolChain.isPICDefaultForced()) { if (LastPICArg) { Option O = LastPICArg->getOption(); if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) || O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) { PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie); PIC = PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic); IsPICLevelTwo = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC); } else { PIE = PIC = false; if (EffectiveTriple.isPS4CPU()) { Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ); StringRef Model = ModelArg ? ModelArg->getValue() : ""; if (Model != "kernel") { PIC = true; ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic) << LastPICArg->getSpelling(); } } } } } // Introduce a Darwin and PS4-specific hack. If the default is PIC, but the // PIC level would've been set to level 1, force it back to level 2 PIC // instead. if (PIC && (Triple.isOSDarwin() || EffectiveTriple.isPS4CPU())) IsPICLevelTwo |= ToolChain.isPICDefault(); // This kernel flags are a trump-card: they will disable PIC/PIE // generation, independent of the argument order. if (KernelOrKext && ((!EffectiveTriple.isiOS() || EffectiveTriple.isOSVersionLT(6)) && !EffectiveTriple.isWatchOS())) PIC = PIE = false; if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) { // This is a very special mode. It trumps the other modes, almost no one // uses it, and it isn't even valid on any OS but Darwin. if (!Triple.isOSDarwin()) ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << Triple.str(); // FIXME: Warn when this flag trumps some other PIC or PIE flag. // Only a forced PIC mode can cause the actual compile to have PIC defines // etc., no flags are sufficient. This behavior was selected to closely // match that of llvm-gcc and Apple GCC before that. PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced(); return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2U : 0U, false); } bool EmbeddedPISupported; switch (Triple.getArch()) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: EmbeddedPISupported = true; break; default: EmbeddedPISupported = false; break; } bool ROPI = false, RWPI = false; Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi); if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) { if (!EmbeddedPISupported) ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << LastROPIArg->getSpelling() << Triple.str(); ROPI = true; } Arg *LastRWPIArg = Args.getLastArg(options::OPT_frwpi, options::OPT_fno_rwpi); if (LastRWPIArg && LastRWPIArg->getOption().matches(options::OPT_frwpi)) { if (!EmbeddedPISupported) ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) << LastRWPIArg->getSpelling() << Triple.str(); RWPI = true; } // ROPI and RWPI are not comaptible with PIC or PIE. if ((ROPI || RWPI) && (PIC || PIE)) ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); if (PIC) return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2U : 1U, PIE); llvm::Reloc::Model RelocM = llvm::Reloc::Static; if (ROPI && RWPI) RelocM = llvm::Reloc::ROPI_RWPI; else if (ROPI) RelocM = llvm::Reloc::ROPI; else if (RWPI) RelocM = llvm::Reloc::RWPI; return std::make_tuple(RelocM, 0U, false); } static const char *RelocationModelName(llvm::Reloc::Model Model) { switch (Model) { case llvm::Reloc::Static: return "static"; case llvm::Reloc::PIC_: return "pic"; case llvm::Reloc::DynamicNoPIC: return "dynamic-no-pic"; case llvm::Reloc::ROPI: return "ropi"; case llvm::Reloc::RWPI: return "rwpi"; case llvm::Reloc::ROPI_RWPI: return "ropi-rwpi"; } llvm_unreachable("Unknown Reloc::Model kind"); } static void AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs) { llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(ToolChain, Args); if (RelocationModel != llvm::Reloc::Static) CmdArgs.push_back("-KPIC"); } void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename, StringRef Target, const InputInfo &Output, const InputInfo &Input, const ArgList &Args) const { // If this is a dry run, do not create the compilation database file. if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) return; using llvm::yaml::escape; const Driver &D = getToolChain().getDriver(); if (!CompilationDatabase) { std::error_code EC; auto File = llvm::make_unique(Filename, EC, llvm::sys::fs::F_Text); if (EC) { D.Diag(clang::diag::err_drv_compilationdatabase) << Filename << EC.message(); return; } CompilationDatabase = std::move(File); } auto &CDB = *CompilationDatabase; SmallString<128> Buf; if (llvm::sys::fs::current_path(Buf)) Buf = "."; CDB << "{ \"directory\": \"" << escape(Buf) << "\""; CDB << ", \"file\": \"" << escape(Input.getFilename()) << "\""; CDB << ", \"output\": \"" << escape(Output.getFilename()) << "\""; CDB << ", \"arguments\": [\"" << escape(D.ClangExecutable) << "\""; Buf = "-x"; Buf += types::getTypeName(Input.getType()); CDB << ", \"" << escape(Buf) << "\""; if (!D.SysRoot.empty() && !Args.hasArg(options::OPT__sysroot_EQ)) { Buf = "--sysroot="; Buf += D.SysRoot; CDB << ", \"" << escape(Buf) << "\""; } CDB << ", \"" << escape(Input.getFilename()) << "\""; for (auto &A: Args) { auto &O = A->getOption(); // Skip language selection, which is positional. if (O.getID() == options::OPT_x) continue; // Skip writing dependency output and the compilation database itself. if (O.getGroup().isValid() && O.getGroup().getID() == options::OPT_M_Group) continue; // Skip inputs. if (O.getKind() == Option::InputClass) continue; // All other arguments are quoted and appended. ArgStringList ASL; A->render(Args, ASL); for (auto &it: ASL) CDB << ", \"" << escape(it) << "\""; } Buf = "--target="; Buf += Target; CDB << ", \"" << escape(Buf) << "\"]},\n"; } void Clang::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); const std::string &TripleStr = Triple.getTriple(); bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; // Check number of inputs for sanity. We need at least one input. assert(Inputs.size() >= 1 && "Must have at least one input."); const InputInfo &Input = Inputs[0]; // CUDA compilation may have multiple inputs (source file + results of // device-side compilations). OpenMP device jobs also take the host IR as a // second input. All other jobs are expected to have exactly one // input. bool IsCuda = JA.isOffloading(Action::OFK_Cuda); bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); assert((IsCuda || (IsOpenMPDevice && Inputs.size() == 2) || Inputs.size() == 1) && "Unable to handle multiple inputs."); bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment(); bool IsWindowsCygnus = getToolChain().getTriple().isWindowsCygwinEnvironment(); bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsPS4CPU = getToolChain().getTriple().isPS4CPU(); bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); // Adjust IsWindowsXYZ for CUDA compilations. Even when compiling in device // mode (i.e., getToolchain().getTriple() is NVPTX, not Windows), we need to // pass Windows-specific flags to cc1. if (IsCuda) { const llvm::Triple *AuxTriple = getToolChain().getAuxTriple(); IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment(); IsWindowsGNU |= AuxTriple && AuxTriple->isWindowsGNUEnvironment(); IsWindowsCygnus |= AuxTriple && AuxTriple->isWindowsCygwinEnvironment(); } // C++ is not supported for IAMCU. if (IsIAMCU && types::isCXX(Input.getType())) D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU"; // Invoke ourselves in -cc1 mode. // // FIXME: Implement custom jobs for internal actions. CmdArgs.push_back("-cc1"); // Add the "effective" target triple. CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); if (const Arg *MJ = Args.getLastArg(options::OPT_MJ)) { DumpCompilationDatabase(C, MJ->getValue(), TripleStr, Output, Input, Args); Args.ClaimAllArgs(options::OPT_MJ); } if (IsCuda) { // We have to pass the triple of the host if compiling for a CUDA device and // vice-versa. std::string NormalizedTriple; if (JA.isDeviceOffloading(Action::OFK_Cuda)) NormalizedTriple = C.getSingleOffloadToolChain() ->getTriple() .normalize(); else NormalizedTriple = C.getSingleOffloadToolChain() ->getTriple() .normalize(); CmdArgs.push_back("-aux-triple"); CmdArgs.push_back(Args.MakeArgString(NormalizedTriple)); } if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb)) { unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6; unsigned Version; Triple.getArchName().substr(Offset).getAsInteger(10, Version); if (Version < 7) D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName() << TripleStr; } // Push all default warning arguments that are specific to // the given target. These come before user provided warning options // are provided. getToolChain().addClangWarningOptions(CmdArgs); // Select the appropriate action. RewriteKind rewriteKind = RK_None; if (isa(JA)) { assert(JA.getType() == types::TY_Plist && "Invalid output type."); CmdArgs.push_back("-analyze"); } else if (isa(JA)) { CmdArgs.push_back("-migrate"); } else if (isa(JA)) { if (Output.getType() == types::TY_Dependencies) CmdArgs.push_back("-Eonly"); else { CmdArgs.push_back("-E"); if (Args.hasArg(options::OPT_rewrite_objc) && !Args.hasArg(options::OPT_g_Group)) CmdArgs.push_back("-P"); } } else if (isa(JA)) { CmdArgs.push_back("-emit-obj"); CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D); // Also ignore explicit -force_cpusubtype_ALL option. (void)Args.hasArg(options::OPT_force__cpusubtype__ALL); } else if (isa(JA)) { // Use PCH if the user requested it. bool UsePCH = D.CCCUsePCH; if (JA.getType() == types::TY_Nothing) CmdArgs.push_back("-fsyntax-only"); else if (JA.getType() == types::TY_ModuleFile) CmdArgs.push_back("-emit-module-interface"); else if (UsePCH) CmdArgs.push_back("-emit-pch"); else CmdArgs.push_back("-emit-pth"); } else if (isa(JA)) { CmdArgs.push_back("-verify-pch"); } else { assert((isa(JA) || isa(JA)) && "Invalid action for clang tool."); if (JA.getType() == types::TY_Nothing) { CmdArgs.push_back("-fsyntax-only"); } else if (JA.getType() == types::TY_LLVM_IR || JA.getType() == types::TY_LTO_IR) { CmdArgs.push_back("-emit-llvm"); } else if (JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC) { CmdArgs.push_back("-emit-llvm-bc"); } else if (JA.getType() == types::TY_PP_Asm) { CmdArgs.push_back("-S"); } else if (JA.getType() == types::TY_AST) { CmdArgs.push_back("-emit-pch"); } else if (JA.getType() == types::TY_ModuleFile) { CmdArgs.push_back("-module-file-info"); } else if (JA.getType() == types::TY_RewrittenObjC) { CmdArgs.push_back("-rewrite-objc"); rewriteKind = RK_NonFragile; } else if (JA.getType() == types::TY_RewrittenLegacyObjC) { CmdArgs.push_back("-rewrite-objc"); rewriteKind = RK_Fragile; } else { assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!"); } // Preserve use-list order by default when emitting bitcode, so that // loading the bitcode up in 'opt' or 'llc' and running passes gives the // same result as running passes here. For LTO, we don't need to preserve // the use-list order, since serialization to bitcode is part of the flow. if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); if (D.isUsingLTO()) Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); } if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) { if (!types::isLLVMIR(Input.getType())) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-x ir"; Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ); } // Embed-bitcode option. if (C.getDriver().embedBitcodeInObject() && (isa(JA) || isa(JA))) { // Add flags implied by -fembed-bitcode. Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ); // Disable all llvm IR level optimizations. CmdArgs.push_back("-disable-llvm-passes"); } if (C.getDriver().embedBitcodeMarkerOnly()) CmdArgs.push_back("-fembed-bitcode=marker"); // We normally speed up the clang process a bit by skipping destructors at // exit, but when we're generating diagnostics we can rely on some of the // cleanup. if (!C.isForDiagnostics()) CmdArgs.push_back("-disable-free"); // Disable the verification pass in -asserts builds. #ifdef NDEBUG CmdArgs.push_back("-disable-llvm-verifier"); // Discard LLVM value names in -asserts builds. CmdArgs.push_back("-discard-value-names"); #endif // Set the main file name, so that debug info works even with // -save-temps. CmdArgs.push_back("-main-file-name"); CmdArgs.push_back(getBaseInputName(Args, Input)); // Some flags which affect the language (via preprocessor // defines). if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-static-define"); if (isa(JA)) { // Enable region store model by default. CmdArgs.push_back("-analyzer-store=region"); // Treat blocks as analysis entry points. CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks"); CmdArgs.push_back("-analyzer-eagerly-assume"); // Add default argument set. if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) { CmdArgs.push_back("-analyzer-checker=core"); CmdArgs.push_back("-analyzer-checker=apiModeling"); if (!IsWindowsMSVC) { CmdArgs.push_back("-analyzer-checker=unix"); } else { // Enable "unix" checkers that also work on Windows. CmdArgs.push_back("-analyzer-checker=unix.API"); CmdArgs.push_back("-analyzer-checker=unix.Malloc"); CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof"); CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator"); CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg"); CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg"); } // Disable some unix checkers for PS4. if (IsPS4CPU) { CmdArgs.push_back("-analyzer-disable-checker=unix.API"); CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork"); } if (getToolChain().getTriple().getVendor() == llvm::Triple::Apple) CmdArgs.push_back("-analyzer-checker=osx"); CmdArgs.push_back("-analyzer-checker=deadcode"); if (types::isCXX(Input.getType())) CmdArgs.push_back("-analyzer-checker=cplusplus"); if (!IsPS4CPU) { CmdArgs.push_back( "-analyzer-checker=security.insecureAPI.UncheckedReturn"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp"); CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork"); } // Default nullability checks. CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull"); CmdArgs.push_back( "-analyzer-checker=nullability.NullReturnedFromNonnull"); } // Set the output format. The default is plist, for (lame) historical // reasons. CmdArgs.push_back("-analyzer-output"); if (Arg *A = Args.getLastArg(options::OPT__analyzer_output)) CmdArgs.push_back(A->getValue()); else CmdArgs.push_back("plist"); // Disable the presentation of standard compiler warnings when // using --analyze. We only want to show static analyzer diagnostics // or frontend errors. CmdArgs.push_back("-w"); // Add -Xanalyzer arguments when running as analyzer. Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer); } CheckCodeGenerationOptions(D, Args); llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Args); const char *RMName = RelocationModelName(RelocationModel); if ((RelocationModel == llvm::Reloc::ROPI || RelocationModel == llvm::Reloc::ROPI_RWPI) && types::isCXX(Input.getType()) && !Args.hasArg(options::OPT_fallow_unsupported)) D.Diag(diag::err_drv_ropi_incompatible_with_cxx); if (RMName) { CmdArgs.push_back("-mrelocation-model"); CmdArgs.push_back(RMName); } if (PICLevel > 0) { CmdArgs.push_back("-pic-level"); CmdArgs.push_back(PICLevel == 1 ? "1" : "2"); if (IsPIE) CmdArgs.push_back("-pic-is-pie"); } if (Arg *A = Args.getLastArg(options::OPT_meabi)) { CmdArgs.push_back("-meabi"); CmdArgs.push_back(A->getValue()); } CmdArgs.push_back("-mthread-model"); if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) CmdArgs.push_back(A->getValue()); else CmdArgs.push_back(Args.MakeArgString(getToolChain().getThreadModel())); Args.AddLastArg(CmdArgs, options::OPT_fveclib); if (!Args.hasFlag(options::OPT_fmerge_all_constants, options::OPT_fno_merge_all_constants)) CmdArgs.push_back("-fno-merge-all-constants"); // LLVM Code Generator Options. if (Args.hasArg(options::OPT_frewrite_map_file) || Args.hasArg(options::OPT_frewrite_map_file_EQ)) { for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file, options::OPT_frewrite_map_file_EQ)) { StringRef Map = A->getValue(); if (!llvm::sys::fs::exists(Map)) { D.Diag(diag::err_drv_no_such_file) << Map; } else { CmdArgs.push_back("-frewrite-map-file"); CmdArgs.push_back(A->getValue()); A->claim(); } } } if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { StringRef v = A->getValue(); CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); A->claim(); } if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) CmdArgs.push_back("-fno-jump-tables"); if (!Args.hasFlag(options::OPT_fpreserve_as_comments, options::OPT_fno_preserve_as_comments, true)) CmdArgs.push_back("-fno-preserve-as-comments"); if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) { CmdArgs.push_back("-mregparm"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return, options::OPT_freg_struct_return)) { if (getToolChain().getArch() != llvm::Triple::x86) { D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << getToolChain().getTriple().str(); } else if (A->getOption().matches(options::OPT_fpcc_struct_return)) { CmdArgs.push_back("-fpcc-struct-return"); } else { assert(A->getOption().matches(options::OPT_freg_struct_return)); CmdArgs.push_back("-freg-struct-return"); } } if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false)) CmdArgs.push_back("-fdefault-calling-conv=stdcall"); if (shouldUseFramePointer(Args, getToolChain().getTriple())) CmdArgs.push_back("-mdisable-fp-elim"); if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss, options::OPT_fno_zero_initialized_in_bss)) CmdArgs.push_back("-mno-zero-initialized-in-bss"); bool OFastEnabled = isOptimizationLevelFast(Args); // If -Ofast is the optimization level, then -fstrict-aliasing should be // enabled. This alias option is being used to simplify the hasFlag logic. OptSpecifier StrictAliasingAliasOption = OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing; // We turn strict aliasing off by default if we're in CL mode, since MSVC // doesn't do any TBAA. bool TBAAOnByDefault = !getToolChain().getDriver().IsCLMode(); if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption, options::OPT_fno_strict_aliasing, TBAAOnByDefault)) CmdArgs.push_back("-relaxed-aliasing"); if (!Args.hasFlag(options::OPT_fstruct_path_tbaa, options::OPT_fno_struct_path_tbaa)) CmdArgs.push_back("-no-struct-path-tbaa"); if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums, false)) CmdArgs.push_back("-fstrict-enums"); if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return, true)) CmdArgs.push_back("-fno-strict-return"); if (Args.hasFlag(options::OPT_fstrict_vtable_pointers, options::OPT_fno_strict_vtable_pointers, false)) CmdArgs.push_back("-fstrict-vtable-pointers"); if (!Args.hasFlag(options::OPT_foptimize_sibling_calls, options::OPT_fno_optimize_sibling_calls)) CmdArgs.push_back("-mdisable-tail-calls"); // Handle segmented stacks. if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("-split-stacks"); // If -Ofast is the optimization level, then -ffast-math should be enabled. // This alias option is being used to simplify the getLastArg logic. OptSpecifier FastMathAliasOption = OFastEnabled ? options::OPT_Ofast : options::OPT_ffast_math; // Handle various floating point optimization flags, mapping them to the // appropriate LLVM code generation flags. The pattern for all of these is to // default off the codegen optimizations, and if any flag enables them and no // flag disables them after the flag enabling them, enable the codegen // optimization. This is complicated by several "umbrella" flags. if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, options::OPT_fhonor_infinities, options::OPT_fno_honor_infinities)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_finite_math_only && A->getOption().getID() != options::OPT_fhonor_infinities) CmdArgs.push_back("-menable-no-infs"); if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, options::OPT_fhonor_nans, options::OPT_fno_honor_nans)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_finite_math_only && A->getOption().getID() != options::OPT_fhonor_nans) CmdArgs.push_back("-menable-no-nans"); // -fmath-errno is the default on some platforms, e.g. BSD-derived OSes. bool MathErrno = getToolChain().IsMathErrnoDefault(); if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_fmath_errno, options::OPT_fno_math_errno)) { // Turning on -ffast_math (with either flag) removes the need for MathErrno. // However, turning *off* -ffast_math merely restores the toolchain default // (which may be false). if (A->getOption().getID() == options::OPT_fno_math_errno || A->getOption().getID() == options::OPT_ffast_math || A->getOption().getID() == options::OPT_Ofast) MathErrno = false; else if (A->getOption().getID() == options::OPT_fmath_errno) MathErrno = true; } if (MathErrno) CmdArgs.push_back("-fmath-errno"); // There are several flags which require disabling very specific // optimizations. Any of these being disabled forces us to turn off the // entire set of LLVM optimizations, so collect them through all the flag // madness. bool AssociativeMath = false; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_fassociative_math, options::OPT_fno_associative_math)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_fno_associative_math) AssociativeMath = true; bool ReciprocalMath = false; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_freciprocal_math, options::OPT_fno_reciprocal_math)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_fno_reciprocal_math) ReciprocalMath = true; bool SignedZeros = true; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_fsigned_zeros, options::OPT_fno_signed_zeros)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_fsigned_zeros) SignedZeros = false; bool TrappingMath = true; if (Arg *A = Args.getLastArg( options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_ftrapping_math, options::OPT_fno_trapping_math)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && A->getOption().getID() != options::OPT_ftrapping_math) TrappingMath = false; if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && !TrappingMath) CmdArgs.push_back("-menable-unsafe-fp-math"); if (!SignedZeros) CmdArgs.push_back("-fno-signed-zeros"); if (ReciprocalMath) CmdArgs.push_back("-freciprocal-math"); if (!TrappingMath) CmdArgs.push_back("-fno-trapping-math"); if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, options::OPT_fno_unsafe_math_optimizations, options::OPT_fdenormal_fp_math_EQ)) if (A->getOption().getID() != options::OPT_fno_fast_math && A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations) Args.AddLastArg(CmdArgs, options::OPT_fdenormal_fp_math_EQ); // Validate and pass through -fp-contract option. if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math, options::OPT_ffp_contract)) { if (A->getOption().getID() == options::OPT_ffp_contract) { StringRef Val = A->getValue(); if (Val == "fast" || Val == "on" || Val == "off") { CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val)); } else { D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } } else if (A->getOption().matches(options::OPT_ffast_math) || (OFastEnabled && A->getOption().matches(options::OPT_Ofast))) { // If fast-math is set then set the fp-contract mode to fast. CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast")); } } ParseMRecip(getToolChain().getDriver(), Args, CmdArgs); // We separately look for the '-ffast-math' and '-ffinite-math-only' flags, // and if we find them, tell the frontend to provide the appropriate // preprocessor macros. This is distinct from enabling any optimizations as // these options induce language changes which must survive serialization // and deserialization, etc. if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, options::OPT_fno_fast_math)) if (!A->getOption().matches(options::OPT_fno_fast_math)) CmdArgs.push_back("-ffast-math"); if (Arg *A = Args.getLastArg(options::OPT_ffinite_math_only, options::OPT_fno_fast_math)) if (A->getOption().matches(options::OPT_ffinite_math_only)) CmdArgs.push_back("-ffinite-math-only"); // Decide whether to use verbose asm. Verbose assembly is the default on // toolchains which have the integrated assembler on by default. bool IsIntegratedAssemblerDefault = getToolChain().IsIntegratedAssemblerDefault(); if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, IsIntegratedAssemblerDefault) || Args.hasArg(options::OPT_dA)) CmdArgs.push_back("-masm-verbose"); if (!Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as, IsIntegratedAssemblerDefault)) CmdArgs.push_back("-no-integrated-as"); if (Args.hasArg(options::OPT_fdebug_pass_structure)) { CmdArgs.push_back("-mdebug-pass"); CmdArgs.push_back("Structure"); } if (Args.hasArg(options::OPT_fdebug_pass_arguments)) { CmdArgs.push_back("-mdebug-pass"); CmdArgs.push_back("Arguments"); } // Enable -mconstructor-aliases except on darwin, where we have to work around // a linker bug (see ), and CUDA device code, where // aliases aren't supported. if (!getToolChain().getTriple().isOSDarwin() && !getToolChain().getTriple().isNVPTX()) CmdArgs.push_back("-mconstructor-aliases"); // Darwin's kernel doesn't support guard variables; just die if we // try to use them. if (KernelOrKext && getToolChain().getTriple().isOSDarwin()) CmdArgs.push_back("-fforbid-guard-variables"); if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields, false)) { CmdArgs.push_back("-mms-bitfields"); } if (Args.hasFlag(options::OPT_mpie_copy_relocations, options::OPT_mno_pie_copy_relocations, false)) { CmdArgs.push_back("-mpie-copy-relocations"); } // This is a coarse approximation of what llvm-gcc actually does, both // -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more // complicated ways. bool AsynchronousUnwindTables = Args.hasFlag(options::OPT_fasynchronous_unwind_tables, options::OPT_fno_asynchronous_unwind_tables, (getToolChain().IsUnwindTablesDefault() || getToolChain().getSanitizerArgs().needsUnwindTables()) && !KernelOrKext); if (Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables, AsynchronousUnwindTables)) CmdArgs.push_back("-munwind-tables"); getToolChain().addClangTargetOptions(Args, CmdArgs); if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); CmdArgs.push_back(A->getValue()); } // FIXME: Handle -mtune=. (void)Args.hasArg(options::OPT_mtune_EQ); if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { CmdArgs.push_back("-mcode-model"); CmdArgs.push_back(A->getValue()); } // Add the target cpu std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false); if (!CPU.empty()) { CmdArgs.push_back("-target-cpu"); CmdArgs.push_back(Args.MakeArgString(CPU)); } if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) { CmdArgs.push_back("-mfpmath"); CmdArgs.push_back(A->getValue()); } // Add the target features getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, false); // Add target specific flags. switch (getToolChain().getArch()) { default: break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: // Use the effective triple, which takes into account the deployment target. AddARMTargetArgs(Triple, Args, CmdArgs, KernelOrKext); break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: AddAArch64TargetArgs(Args, CmdArgs); break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: AddMIPSTargetArgs(Args, CmdArgs); break; case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: AddPPCTargetArgs(Args, CmdArgs); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: AddSparcTargetArgs(Args, CmdArgs); break; case llvm::Triple::systemz: AddSystemZTargetArgs(Args, CmdArgs); break; case llvm::Triple::x86: case llvm::Triple::x86_64: AddX86TargetArgs(Args, CmdArgs); break; case llvm::Triple::lanai: AddLanaiTargetArgs(Args, CmdArgs); break; case llvm::Triple::hexagon: AddHexagonTargetArgs(Args, CmdArgs); break; case llvm::Triple::wasm32: case llvm::Triple::wasm64: AddWebAssemblyTargetArgs(Args, CmdArgs); break; } // The 'g' groups options involve a somewhat intricate sequence of decisions // about what to pass from the driver to the frontend, but by the time they // reach cc1 they've been factored into three well-defined orthogonal choices: // * what level of debug info to generate // * what dwarf version to write // * what debugger tuning to use // This avoids having to monkey around further in cc1 other than to disable // codeview if not running in a Windows environment. Perhaps even that // decision should be made in the driver as well though. unsigned DwarfVersion = 0; llvm::DebuggerKind DebuggerTuning = getToolChain().getDefaultDebuggerTuning(); // These two are potentially updated by AddClangCLArgs. codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; bool EmitCodeView = false; // Add clang-cl arguments. types::ID InputType = Input.getType(); if (getToolChain().getDriver().IsCLMode()) AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView); // Pass the linker version in use. if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { CmdArgs.push_back("-target-linker-version"); CmdArgs.push_back(A->getValue()); } if (!shouldUseLeafFramePointer(Args, getToolChain().getTriple())) CmdArgs.push_back("-momit-leaf-frame-pointer"); // Explicitly error on some things we know we don't support and can't just // ignore. if (!Args.hasArg(options::OPT_fallow_unsupported)) { Arg *Unsupported; if (types::isCXX(InputType) && getToolChain().getTriple().isOSDarwin() && getToolChain().getArch() == llvm::Triple::x86) { if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) || (Unsupported = Args.getLastArg(options::OPT_mkernel))) D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386) << Unsupported->getOption().getName(); } } Args.AddAllArgs(CmdArgs, options::OPT_v); Args.AddLastArg(CmdArgs, options::OPT_H); if (D.CCPrintHeaders && !D.CCGenDiagnostics) { CmdArgs.push_back("-header-include-file"); CmdArgs.push_back(D.CCPrintHeadersFilename ? D.CCPrintHeadersFilename : "-"); } Args.AddLastArg(CmdArgs, options::OPT_P); Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout); if (D.CCLogDiagnostics && !D.CCGenDiagnostics) { CmdArgs.push_back("-diagnostic-log-file"); CmdArgs.push_back(D.CCLogDiagnosticsFilename ? D.CCLogDiagnosticsFilename : "-"); } bool splitDwarfInlining = Args.hasFlag(options::OPT_fsplit_dwarf_inlining, options::OPT_fno_split_dwarf_inlining, true); Args.ClaimAllArgs(options::OPT_g_Group); Arg *SplitDwarfArg = Args.getLastArg(options::OPT_gsplit_dwarf); if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { // If the last option explicitly specified a debug-info level, use it. if (A->getOption().matches(options::OPT_gN_Group)) { DebugInfoKind = DebugLevelToInfoKind(*A); // If you say "-gsplit-dwarf -gline-tables-only", -gsplit-dwarf loses. // But -gsplit-dwarf is not a g_group option, hence we have to check the // order explicitly. (If -gsplit-dwarf wins, we fix DebugInfoKind later.) // This gets a bit more complicated if you've disabled inline info in the // skeleton CUs (splitDwarfInlining) - then there's value in composing // split-dwarf and line-tables-only, so let those compose naturally in // that case. // And if you just turned off debug info, (-gsplit-dwarf -g0) - do that. if (SplitDwarfArg) { if (A->getIndex() > SplitDwarfArg->getIndex()) { if (DebugInfoKind == codegenoptions::NoDebugInfo || (DebugInfoKind == codegenoptions::DebugLineTablesOnly && splitDwarfInlining)) SplitDwarfArg = nullptr; } else if (splitDwarfInlining) DebugInfoKind = codegenoptions::NoDebugInfo; } } else // For any other 'g' option, use Limited. DebugInfoKind = codegenoptions::LimitedDebugInfo; } // If a debugger tuning argument appeared, remember it. if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) { if (A->getOption().matches(options::OPT_glldb)) DebuggerTuning = llvm::DebuggerKind::LLDB; else if (A->getOption().matches(options::OPT_gsce)) DebuggerTuning = llvm::DebuggerKind::SCE; else DebuggerTuning = llvm::DebuggerKind::GDB; } // If a -gdwarf argument appeared, remember it. if (Arg *A = Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3, options::OPT_gdwarf_4, options::OPT_gdwarf_5)) DwarfVersion = DwarfVersionNum(A->getSpelling()); // Forward -gcodeview. EmitCodeView might have been set by CL-compatibility // argument parsing. if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) { // DwarfVersion remains at 0 if no explicit choice was made. CmdArgs.push_back("-gcodeview"); } else if (DwarfVersion == 0 && DebugInfoKind != codegenoptions::NoDebugInfo) { DwarfVersion = getToolChain().GetDefaultDwarfVersion(); } // We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now. Args.ClaimAllArgs(options::OPT_g_flags_Group); // Column info is included by default for everything except PS4 and CodeView. // Clang doesn't track end columns, just starting columns, which, in theory, // is fine for CodeView (and PDB). In practice, however, the Microsoft // debuggers don't handle missing end columns well, so it's better not to // include any column info. if (Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info, /*Default=*/ !IsPS4CPU && !(IsWindowsMSVC && EmitCodeView))) CmdArgs.push_back("-dwarf-column-info"); // FIXME: Move backend command line options to the module. // If -gline-tables-only is the last option it wins. if (DebugInfoKind != codegenoptions::DebugLineTablesOnly && Args.hasArg(options::OPT_gmodules)) { DebugInfoKind = codegenoptions::LimitedDebugInfo; CmdArgs.push_back("-dwarf-ext-refs"); CmdArgs.push_back("-fmodule-format=obj"); } // -gsplit-dwarf should turn on -g and enable the backend dwarf // splitting and extraction. // FIXME: Currently only works on Linux. if (getToolChain().getTriple().isOSLinux() && SplitDwarfArg) { if (!splitDwarfInlining) CmdArgs.push_back("-fno-split-dwarf-inlining"); if (DebugInfoKind == codegenoptions::NoDebugInfo) DebugInfoKind = codegenoptions::LimitedDebugInfo; CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-split-dwarf=Enable"); } // After we've dealt with all combinations of things that could // make DebugInfoKind be other than None or DebugLineTablesOnly, // figure out if we need to "upgrade" it to standalone debug info. // We parse these two '-f' options whether or not they will be used, // to claim them even if you wrote "-fstandalone-debug -gline-tables-only" bool NeedFullDebug = Args.hasFlag(options::OPT_fstandalone_debug, options::OPT_fno_standalone_debug, getToolChain().GetDefaultStandaloneDebug()); if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug) DebugInfoKind = codegenoptions::FullDebugInfo; RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion, DebuggerTuning); // -ggnu-pubnames turns on gnu style pubnames in the backend. if (Args.hasArg(options::OPT_ggnu_pubnames)) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-generate-gnu-dwarf-pub-sections"); } // -gdwarf-aranges turns on the emission of the aranges section in the // backend. // Always enabled on the PS4. if (Args.hasArg(options::OPT_gdwarf_aranges) || IsPS4CPU) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-generate-arange-section"); } if (Args.hasFlag(options::OPT_fdebug_types_section, options::OPT_fno_debug_types_section, false)) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-generate-type-units"); } bool UseSeparateSections = isUseSeparateSections(Triple); if (Args.hasFlag(options::OPT_ffunction_sections, options::OPT_fno_function_sections, UseSeparateSections)) { CmdArgs.push_back("-ffunction-sections"); } if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections, UseSeparateSections)) { CmdArgs.push_back("-fdata-sections"); } if (!Args.hasFlag(options::OPT_funique_section_names, options::OPT_fno_unique_section_names, true)) CmdArgs.push_back("-fno-unique-section-names"); Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions); if (Args.hasFlag(options::OPT_fxray_instrument, options::OPT_fnoxray_instrument, false)) { const char *const XRayInstrumentOption = "-fxray-instrument"; if (Triple.getOS() == llvm::Triple::Linux) switch (Triple.getArch()) { case llvm::Triple::x86_64: case llvm::Triple::arm: case llvm::Triple::aarch64: // Supported. break; default: D.Diag(diag::err_drv_clang_unsupported) << (std::string(XRayInstrumentOption) + " on " + Triple.str()); } else D.Diag(diag::err_drv_clang_unsupported) << (std::string(XRayInstrumentOption) + " on non-Linux target OS"); CmdArgs.push_back(XRayInstrumentOption); if (const Arg *A = Args.getLastArg(options::OPT_fxray_instruction_threshold_, options::OPT_fxray_instruction_threshold_EQ)) { CmdArgs.push_back("-fxray-instruction-threshold"); CmdArgs.push_back(A->getValue()); } } addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs); // Add runtime flag for PS4 when PGO or Coverage are enabled. if (getToolChain().getTriple().isPS4CPU()) addPS4ProfileRTArgs(getToolChain(), Args, CmdArgs); // Pass options for controlling the default header search paths. if (Args.hasArg(options::OPT_nostdinc)) { CmdArgs.push_back("-nostdsysteminc"); CmdArgs.push_back("-nobuiltininc"); } else { if (Args.hasArg(options::OPT_nostdlibinc)) CmdArgs.push_back("-nostdsysteminc"); Args.AddLastArg(CmdArgs, options::OPT_nostdincxx); Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc); } // Pass the path to compiler resource files. CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); Args.AddLastArg(CmdArgs, options::OPT_working_directory); bool ARCMTEnabled = false; if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) { if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check, options::OPT_ccc_arcmt_modify, options::OPT_ccc_arcmt_migrate)) { ARCMTEnabled = true; switch (A->getOption().getID()) { default: llvm_unreachable("missed a case"); case options::OPT_ccc_arcmt_check: CmdArgs.push_back("-arcmt-check"); break; case options::OPT_ccc_arcmt_modify: CmdArgs.push_back("-arcmt-modify"); break; case options::OPT_ccc_arcmt_migrate: CmdArgs.push_back("-arcmt-migrate"); CmdArgs.push_back("-mt-migrate-directory"); CmdArgs.push_back(A->getValue()); Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output); Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors); break; } } } else { Args.ClaimAllArgs(options::OPT_ccc_arcmt_check); Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify); Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate); } if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) { if (ARCMTEnabled) { D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-ccc-arcmt-migrate"; } CmdArgs.push_back("-mt-migrate-directory"); CmdArgs.push_back(A->getValue()); if (!Args.hasArg(options::OPT_objcmt_migrate_literals, options::OPT_objcmt_migrate_subscripting, options::OPT_objcmt_migrate_property)) { // None specified, means enable them all. CmdArgs.push_back("-objcmt-migrate-literals"); CmdArgs.push_back("-objcmt-migrate-subscripting"); CmdArgs.push_back("-objcmt-migrate-property"); } else { Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); } } else { Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance); Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init); Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path); } // Add preprocessing options like -I, -D, etc. if we are using the // preprocessor. // // FIXME: Support -fpreprocessed if (types::getPreprocessedType(InputType) != types::TY_INVALID) AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs); // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes // that "The compiler can only warn and ignore the option if not recognized". // When building with ccache, it will pass -D options to clang even on // preprocessed inputs and configure concludes that -fPIC is not supported. Args.ClaimAllArgs(options::OPT_D); // Manually translate -O4 to -O3; let clang reject others. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O4)) { CmdArgs.push_back("-O3"); D.Diag(diag::warn_O4_is_O3); } else { A->render(Args, CmdArgs); } } // Warn about ignored options to clang. for (const Arg *A : Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) { D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args); A->claim(); } claimNoWarnArgs(Args); Args.AddAllArgs(CmdArgs, options::OPT_R_Group); Args.AddAllArgs(CmdArgs, options::OPT_W_Group); if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false)) CmdArgs.push_back("-pedantic"); Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors); Args.AddLastArg(CmdArgs, options::OPT_w); // Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi} // (-ansi is equivalent to -std=c89 or -std=c++98). // // If a std is supplied, only add -trigraphs if it follows the // option. bool ImplyVCPPCXXVer = false; if (Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi)) { if (Std->getOption().matches(options::OPT_ansi)) if (types::isCXX(InputType)) CmdArgs.push_back("-std=c++98"); else CmdArgs.push_back("-std=c89"); else Std->render(Args, CmdArgs); // If -f(no-)trigraphs appears after the language standard flag, honor it. if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi, options::OPT_ftrigraphs, options::OPT_fno_trigraphs)) if (A != Std) A->render(Args, CmdArgs); } else { // Honor -std-default. // // FIXME: Clang doesn't correctly handle -std= when the input language // doesn't match. For the time being just ignore this for C++ inputs; // eventually we want to do all the standard defaulting here instead of // splitting it between the driver and clang -cc1. if (!types::isCXX(InputType)) Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=", /*Joined=*/true); else if (IsWindowsMSVC) ImplyVCPPCXXVer = true; Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs, options::OPT_fno_trigraphs); } // GCC's behavior for -Wwrite-strings is a bit strange: // * In C, this "warning flag" changes the types of string literals from // 'char[N]' to 'const char[N]', and thus triggers an unrelated warning // for the discarded qualifier. // * In C++, this is just a normal warning flag. // // Implementing this warning correctly in C is hard, so we follow GCC's // behavior for now. FIXME: Directly diagnose uses of a string literal as // a non-const char* in C, rather than using this crude hack. if (!types::isCXX(InputType)) { // FIXME: This should behave just like a warning flag, and thus should also // respect -Weverything, -Wno-everything, -Werror=write-strings, and so on. Arg *WriteStrings = Args.getLastArg(options::OPT_Wwrite_strings, options::OPT_Wno_write_strings, options::OPT_w); if (WriteStrings && WriteStrings->getOption().matches(options::OPT_Wwrite_strings)) CmdArgs.push_back("-fconst-strings"); } // GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active // during C++ compilation, which it is by default. GCC keeps this define even // in the presence of '-w', match this behavior bug-for-bug. if (types::isCXX(InputType) && Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated, true)) { CmdArgs.push_back("-fdeprecated-macro"); } // Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'. if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) { if (Asm->getOption().matches(options::OPT_fasm)) CmdArgs.push_back("-fgnu-keywords"); else CmdArgs.push_back("-fno-gnu-keywords"); } if (ShouldDisableDwarfDirectory(Args, getToolChain())) CmdArgs.push_back("-fno-dwarf-directory-asm"); if (ShouldDisableAutolink(Args, getToolChain())) CmdArgs.push_back("-fno-autolink"); // Add in -fdebug-compilation-dir if necessary. addDebugCompDirArg(Args, CmdArgs); for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) { StringRef Map = A->getValue(); if (Map.find('=') == StringRef::npos) D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map; else CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map)); A->claim(); } if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_, options::OPT_ftemplate_depth_EQ)) { CmdArgs.push_back("-ftemplate-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) { CmdArgs.push_back("-foperator-arrow-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) { CmdArgs.push_back("-fconstexpr-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) { CmdArgs.push_back("-fconstexpr-steps"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) { CmdArgs.push_back("-fbracket-depth"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ, options::OPT_Wlarge_by_value_copy_def)) { if (A->getNumValues()) { StringRef bytes = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes)); } else CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value } if (Args.hasArg(options::OPT_relocatable_pch)) CmdArgs.push_back("-relocatable-pch"); if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) { CmdArgs.push_back("-fconstant-string-class"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) { CmdArgs.push_back("-ftabstop"); CmdArgs.push_back(A->getValue()); } CmdArgs.push_back("-ferror-limit"); if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ)) CmdArgs.push_back(A->getValue()); else CmdArgs.push_back("19"); if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) { CmdArgs.push_back("-fmacro-backtrace-limit"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) { CmdArgs.push_back("-ftemplate-backtrace-limit"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) { CmdArgs.push_back("-fconstexpr-backtrace-limit"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) { CmdArgs.push_back("-fspell-checking-limit"); CmdArgs.push_back(A->getValue()); } // Pass -fmessage-length=. CmdArgs.push_back("-fmessage-length"); if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) { CmdArgs.push_back(A->getValue()); } else { // If -fmessage-length=N was not specified, determine whether this is a // terminal and, if so, implicitly define -fmessage-length appropriately. unsigned N = llvm::sys::Process::StandardErrColumns(); CmdArgs.push_back(Args.MakeArgString(Twine(N))); } // -fvisibility= and -fvisibility-ms-compat are of a piece. if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ, options::OPT_fvisibility_ms_compat)) { if (A->getOption().matches(options::OPT_fvisibility_EQ)) { CmdArgs.push_back("-fvisibility"); CmdArgs.push_back(A->getValue()); } else { assert(A->getOption().matches(options::OPT_fvisibility_ms_compat)); CmdArgs.push_back("-fvisibility"); CmdArgs.push_back("hidden"); CmdArgs.push_back("-ftype-visibility"); CmdArgs.push_back("default"); } } Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden); Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); // -fhosted is default. bool IsHosted = true; if (Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) || KernelOrKext) { CmdArgs.push_back("-ffreestanding"); IsHosted = false; } // Forward -f (flag) options which we can pass directly. Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls); Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions); Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names); // Emulated TLS is enabled by default on Android, and can be enabled manually // with -femulated-tls. bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment(); if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, EmulatedTLSDefault)) CmdArgs.push_back("-femulated-tls"); // AltiVec-like language extensions aren't relevant for assembling. if (!isa(JA) || Output.getType() != types::TY_PP_Asm) { Args.AddLastArg(CmdArgs, options::OPT_faltivec); Args.AddLastArg(CmdArgs, options::OPT_fzvector); } Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree); Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type); // Forward flags for OpenMP. We don't do this if the current action is an // device offloading action other than OpenMP. if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false) && (JA.isDeviceOffloading(Action::OFK_None) || JA.isDeviceOffloading(Action::OFK_OpenMP))) { switch (getToolChain().getDriver().getOpenMPRuntime(Args)) { case Driver::OMPRT_OMP: case Driver::OMPRT_IOMP5: // Clang can generate useful OpenMP code for these two runtime libraries. CmdArgs.push_back("-fopenmp"); // If no option regarding the use of TLS in OpenMP codegeneration is // given, decide a default based on the target. Otherwise rely on the // options and pass the right information to the frontend. if (!Args.hasFlag(options::OPT_fopenmp_use_tls, options::OPT_fnoopenmp_use_tls, /*Default=*/true)) CmdArgs.push_back("-fnoopenmp-use-tls"); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code // for a specific runtime library, we just don't pass the '-fopenmp' flag // down to the actual compilation. // FIXME: It would be better to have a mode which *only* omits IR // generation based on the OpenMP support so that we get consistent // semantic analysis, etc. break; } } const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs(); Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType); // Report an error for -faltivec on anything other than PowerPC. if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) { const llvm::Triple::ArchType Arch = getToolChain().getArch(); if (!(Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || Arch == llvm::Triple::ppc64le)) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "ppc/ppc64/ppc64le"; } // -fzvector is incompatible with -faltivec. if (Arg *A = Args.getLastArg(options::OPT_fzvector)) if (Args.hasArg(options::OPT_faltivec)) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-faltivec"; if (getToolChain().SupportsProfiling()) Args.AddLastArg(CmdArgs, options::OPT_pg); // -flax-vector-conversions is default. if (!Args.hasFlag(options::OPT_flax_vector_conversions, options::OPT_fno_lax_vector_conversions)) CmdArgs.push_back("-fno-lax-vector-conversions"); if (Args.getLastArg(options::OPT_fapple_kext) || (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) CmdArgs.push_back("-fapple-kext"); Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits); Args.AddLastArg(CmdArgs, options::OPT_ftime_report); Args.AddLastArg(CmdArgs, options::OPT_ftrapv); if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) { CmdArgs.push_back("-ftrapv-handler"); CmdArgs.push_back(A->getValue()); } Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ); // -fno-strict-overflow implies -fwrapv if it isn't disabled, but // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { if (A->getOption().matches(options::OPT_fwrapv)) CmdArgs.push_back("-fwrapv"); } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow, options::OPT_fno_strict_overflow)) { if (A->getOption().matches(options::OPT_fno_strict_overflow)) CmdArgs.push_back("-fwrapv"); } if (Arg *A = Args.getLastArg(options::OPT_freroll_loops, options::OPT_fno_reroll_loops)) if (A->getOption().matches(options::OPT_freroll_loops)) CmdArgs.push_back("-freroll-loops"); Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings); Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, options::OPT_fno_unroll_loops); Args.AddLastArg(CmdArgs, options::OPT_pthread); // -stack-protector=0 is default. unsigned StackProtectorLevel = 0; if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector, options::OPT_fstack_protector_all, options::OPT_fstack_protector_strong, options::OPT_fstack_protector)) { if (A->getOption().matches(options::OPT_fstack_protector)) { StackProtectorLevel = std::max( LangOptions::SSPOn, getToolChain().GetDefaultStackProtectorLevel(KernelOrKext)); } else if (A->getOption().matches(options::OPT_fstack_protector_strong)) StackProtectorLevel = LangOptions::SSPStrong; else if (A->getOption().matches(options::OPT_fstack_protector_all)) StackProtectorLevel = LangOptions::SSPReq; } else { StackProtectorLevel = getToolChain().GetDefaultStackProtectorLevel(KernelOrKext); // Only use a default stack protector on Darwin in case -ffreestanding // is not specified. if (Triple.isOSDarwin() && !IsHosted) StackProtectorLevel = 0; } if (StackProtectorLevel) { CmdArgs.push_back("-stack-protector"); CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel))); } // --param ssp-buffer-size= for (const Arg *A : Args.filtered(options::OPT__param)) { StringRef Str(A->getValue()); if (Str.startswith("ssp-buffer-size=")) { if (StackProtectorLevel) { CmdArgs.push_back("-stack-protector-buffer-size"); // FIXME: Verify the argument is a valid integer. CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16))); } A->claim(); } } // Translate -mstackrealign if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign, false)) CmdArgs.push_back(Args.MakeArgString("-mstackrealign")); if (Args.hasArg(options::OPT_mstack_alignment)) { StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment); CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment)); } if (Args.hasArg(options::OPT_mstack_probe_size)) { StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size); if (!Size.empty()) CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size)); else CmdArgs.push_back("-mstack-probe-size=0"); } switch (getToolChain().getArch()) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: CmdArgs.push_back("-fallow-half-arguments-and-returns"); break; default: break; } if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it, options::OPT_mno_restrict_it)) { if (A->getOption().matches(options::OPT_mrestrict_it)) { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-restrict-it"); } else { CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-no-restrict-it"); } } else if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb)) { // Windows on ARM expects restricted IT blocks CmdArgs.push_back("-backend-option"); CmdArgs.push_back("-arm-restrict-it"); } // Forward -cl options to -cc1 if (Args.getLastArg(options::OPT_cl_opt_disable)) { CmdArgs.push_back("-cl-opt-disable"); } if (Args.getLastArg(options::OPT_cl_strict_aliasing)) { CmdArgs.push_back("-cl-strict-aliasing"); } if (Args.getLastArg(options::OPT_cl_single_precision_constant)) { CmdArgs.push_back("-cl-single-precision-constant"); } if (Args.getLastArg(options::OPT_cl_finite_math_only)) { CmdArgs.push_back("-cl-finite-math-only"); } if (Args.getLastArg(options::OPT_cl_kernel_arg_info)) { CmdArgs.push_back("-cl-kernel-arg-info"); } if (Args.getLastArg(options::OPT_cl_unsafe_math_optimizations)) { CmdArgs.push_back("-cl-unsafe-math-optimizations"); } if (Args.getLastArg(options::OPT_cl_fast_relaxed_math)) { CmdArgs.push_back("-cl-fast-relaxed-math"); } if (Args.getLastArg(options::OPT_cl_mad_enable)) { CmdArgs.push_back("-cl-mad-enable"); } if (Args.getLastArg(options::OPT_cl_no_signed_zeros)) { CmdArgs.push_back("-cl-no-signed-zeros"); } if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { std::string CLStdStr = "-cl-std="; CLStdStr += A->getValue(); CmdArgs.push_back(Args.MakeArgString(CLStdStr)); } if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) { CmdArgs.push_back("-cl-denorms-are-zero"); } if (Args.getLastArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt)) { CmdArgs.push_back("-cl-fp32-correctly-rounded-divide-sqrt"); } // Forward -f options with positive and negative forms; we translate // these by hand. if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) { StringRef fname = A->getValue(); if (!llvm::sys::fs::exists(fname)) D.Diag(diag::err_drv_no_such_file) << fname; else A->render(Args, CmdArgs); } // -fbuiltin is default unless -mkernel is used. bool UseBuiltins = Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin, !Args.hasArg(options::OPT_mkernel)); if (!UseBuiltins) CmdArgs.push_back("-fno-builtin"); // -ffreestanding implies -fno-builtin. if (Args.hasArg(options::OPT_ffreestanding)) UseBuiltins = false; // Process the -fno-builtin-* options. for (const auto &Arg : Args) { const Option &O = Arg->getOption(); if (!O.matches(options::OPT_fno_builtin_)) continue; Arg->claim(); // If -fno-builtin is specified, then there's no need to pass the option to // the frontend. if (!UseBuiltins) continue; StringRef FuncName = Arg->getValue(); CmdArgs.push_back(Args.MakeArgString("-fno-builtin-" + FuncName)); } if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, options::OPT_fno_assume_sane_operator_new)) CmdArgs.push_back("-fno-assume-sane-operator-new"); // -fblocks=0 is default. if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks, getToolChain().IsBlocksDefault()) || (Args.hasArg(options::OPT_fgnu_runtime) && Args.hasArg(options::OPT_fobjc_nonfragile_abi) && !Args.hasArg(options::OPT_fno_blocks))) { CmdArgs.push_back("-fblocks"); if (!Args.hasArg(options::OPT_fgnu_runtime) && !getToolChain().hasBlocksRuntime()) CmdArgs.push_back("-fblocks-runtime-optional"); } if (Args.hasFlag(options::OPT_fcoroutines_ts, options::OPT_fno_coroutines_ts, false) && types::isCXX(InputType)) { CmdArgs.push_back("-fcoroutines-ts"); } // -fmodules enables the use of precompiled modules (off by default). // Users can pass -fno-cxx-modules to turn off modules support for // C++/Objective-C++ programs. bool HaveClangModules = false; if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) { bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules, options::OPT_fno_cxx_modules, true); if (AllowedInCXX || !types::isCXX(InputType)) { CmdArgs.push_back("-fmodules"); HaveClangModules = true; } } bool HaveAnyModules = HaveClangModules; if (Args.hasArg(options::OPT_fmodules_ts)) { CmdArgs.push_back("-fmodules-ts"); HaveAnyModules = true; } // -fmodule-maps enables implicit reading of module map files. By default, // this is enabled if we are using Clang's flavor of precompiled modules. if (Args.hasFlag(options::OPT_fimplicit_module_maps, options::OPT_fno_implicit_module_maps, HaveClangModules)) { CmdArgs.push_back("-fimplicit-module-maps"); } // -fmodules-decluse checks that modules used are declared so (off by // default). if (Args.hasFlag(options::OPT_fmodules_decluse, options::OPT_fno_modules_decluse, false)) { CmdArgs.push_back("-fmodules-decluse"); } // -fmodules-strict-decluse is like -fmodule-decluse, but also checks that // all #included headers are part of modules. if (Args.hasFlag(options::OPT_fmodules_strict_decluse, options::OPT_fno_modules_strict_decluse, false)) { CmdArgs.push_back("-fmodules-strict-decluse"); } // -fno-implicit-modules turns off implicitly compiling modules on demand. if (!Args.hasFlag(options::OPT_fimplicit_modules, options::OPT_fno_implicit_modules, HaveClangModules)) { if (HaveAnyModules) CmdArgs.push_back("-fno-implicit-modules"); } else if (HaveAnyModules) { // -fmodule-cache-path specifies where our implicitly-built module files // should be written. SmallString<128> Path; if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path)) Path = A->getValue(); if (C.isForDiagnostics()) { // When generating crash reports, we want to emit the modules along with // the reproduction sources, so we ignore any provided module path. Path = Output.getFilename(); llvm::sys::path::replace_extension(Path, ".cache"); llvm::sys::path::append(Path, "modules"); } else if (Path.empty()) { // No module path was provided: use the default. llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false, Path); llvm::sys::path::append(Path, "org.llvm.clang."); appendUserToPath(Path); llvm::sys::path::append(Path, "ModuleCache"); } const char Arg[] = "-fmodules-cache-path="; Path.insert(Path.begin(), Arg, Arg + strlen(Arg)); CmdArgs.push_back(Args.MakeArgString(Path)); } if (HaveAnyModules) { // -fprebuilt-module-path specifies where to load the prebuilt module files. for (const Arg *A : Args.filtered(options::OPT_fprebuilt_module_path)) CmdArgs.push_back(Args.MakeArgString( std::string("-fprebuilt-module-path=") + A->getValue())); } // -fmodule-name specifies the module that is currently being built (or // used for header checking by -fmodule-maps). Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ); // -fmodule-map-file can be used to specify files containing module // definitions. Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file); // -fbuiltin-module-map can be used to load the clang // builtin headers modulemap file. if (Args.hasArg(options::OPT_fbuiltin_module_map)) { SmallString<128> BuiltinModuleMap(getToolChain().getDriver().ResourceDir); llvm::sys::path::append(BuiltinModuleMap, "include"); llvm::sys::path::append(BuiltinModuleMap, "module.modulemap"); if (llvm::sys::fs::exists(BuiltinModuleMap)) { CmdArgs.push_back(Args.MakeArgString("-fmodule-map-file=" + BuiltinModuleMap)); } } // -fmodule-file can be used to specify files containing precompiled modules. if (HaveAnyModules) Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file); else Args.ClaimAllArgs(options::OPT_fmodule_file); // When building modules and generating crashdumps, we need to dump a module // dependency VFS alongside the output. if (HaveClangModules && C.isForDiagnostics()) { SmallString<128> VFSDir(Output.getFilename()); llvm::sys::path::replace_extension(VFSDir, ".cache"); // Add the cache directory as a temp so the crash diagnostics pick it up. C.addTempFile(Args.MakeArgString(VFSDir)); llvm::sys::path::append(VFSDir, "vfs"); CmdArgs.push_back("-module-dependency-dir"); CmdArgs.push_back(Args.MakeArgString(VFSDir)); } if (HaveClangModules) Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path); // Pass through all -fmodules-ignore-macro arguments. Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro); Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval); Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after); Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp); if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) { if (Args.hasArg(options::OPT_fbuild_session_timestamp)) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-fbuild-session-timestamp"; llvm::sys::fs::file_status Status; if (llvm::sys::fs::status(A->getValue(), Status)) D.Diag(diag::err_drv_no_such_file) << A->getValue(); CmdArgs.push_back( Args.MakeArgString("-fbuild-session-timestamp=" + Twine((uint64_t)Status.getLastModificationTime() .time_since_epoch() .count()))); } if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) { if (!Args.getLastArg(options::OPT_fbuild_session_timestamp, options::OPT_fbuild_session_file)) D.Diag(diag::err_drv_modules_validate_once_requires_timestamp); Args.AddLastArg(CmdArgs, options::OPT_fmodules_validate_once_per_build_session); } Args.AddLastArg(CmdArgs, options::OPT_fmodules_validate_system_headers); Args.AddLastArg(CmdArgs, options::OPT_fmodules_disable_diagnostic_validation); // -faccess-control is default. if (Args.hasFlag(options::OPT_fno_access_control, options::OPT_faccess_control, false)) CmdArgs.push_back("-fno-access-control"); // -felide-constructors is the default. if (Args.hasFlag(options::OPT_fno_elide_constructors, options::OPT_felide_constructors, false)) CmdArgs.push_back("-fno-elide-constructors"); ToolChain::RTTIMode RTTIMode = getToolChain().getRTTIMode(); if (KernelOrKext || (types::isCXX(InputType) && (RTTIMode == ToolChain::RM_DisabledExplicitly || RTTIMode == ToolChain::RM_DisabledImplicitly))) CmdArgs.push_back("-fno-rtti"); // -fshort-enums=0 is default for all architectures except Hexagon. if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums, getToolChain().getArch() == llvm::Triple::hexagon)) CmdArgs.push_back("-fshort-enums"); // -fsigned-char is default. if (Arg *A = Args.getLastArg( options::OPT_fsigned_char, options::OPT_fno_signed_char, options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) { if (A->getOption().matches(options::OPT_funsigned_char) || A->getOption().matches(options::OPT_fno_signed_char)) { CmdArgs.push_back("-fno-signed-char"); } } else if (!isSignedCharDefault(getToolChain().getTriple())) { CmdArgs.push_back("-fno-signed-char"); } // -fuse-cxa-atexit is default. if (!Args.hasFlag( options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit, !IsWindowsCygnus && !IsWindowsGNU && getToolChain().getTriple().getOS() != llvm::Triple::Solaris && getToolChain().getArch() != llvm::Triple::hexagon && getToolChain().getArch() != llvm::Triple::xcore && ((getToolChain().getTriple().getVendor() != llvm::Triple::MipsTechnologies) || getToolChain().getTriple().hasEnvironment())) || KernelOrKext) CmdArgs.push_back("-fno-use-cxa-atexit"); // -fms-extensions=0 is default. if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, IsWindowsMSVC)) CmdArgs.push_back("-fms-extensions"); // -fno-use-line-directives is default. if (Args.hasFlag(options::OPT_fuse_line_directives, options::OPT_fno_use_line_directives, false)) CmdArgs.push_back("-fuse-line-directives"); // -fms-compatibility=0 is default. if (Args.hasFlag(options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility, (IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, true)))) CmdArgs.push_back("-fms-compatibility"); VersionTuple MSVT = getToolChain().computeMSVCVersion(&getToolChain().getDriver(), Args); if (!MSVT.empty()) CmdArgs.push_back( Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString())); bool IsMSVC2015Compatible = MSVT.getMajor() >= 19; if (ImplyVCPPCXXVer) { StringRef LanguageStandard; if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) { LanguageStandard = llvm::StringSwitch(StdArg->getValue()) .Case("c++14", "-std=c++14") .Case("c++latest", "-std=c++1z") .Default(""); if (LanguageStandard.empty()) D.Diag(clang::diag::warn_drv_unused_argument) << StdArg->getAsString(Args); } if (LanguageStandard.empty()) { if (IsMSVC2015Compatible) LanguageStandard = "-std=c++14"; else LanguageStandard = "-std=c++11"; } CmdArgs.push_back(LanguageStandard.data()); } // -fno-borland-extensions is default. if (Args.hasFlag(options::OPT_fborland_extensions, options::OPT_fno_borland_extensions, false)) CmdArgs.push_back("-fborland-extensions"); // -fno-declspec is default, except for PS4. if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec, getToolChain().getTriple().isPS4())) CmdArgs.push_back("-fdeclspec"); else if (Args.hasArg(options::OPT_fno_declspec)) CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec. // -fthreadsafe-static is default, except for MSVC compatibility versions less // than 19. if (!Args.hasFlag(options::OPT_fthreadsafe_statics, options::OPT_fno_threadsafe_statics, !IsWindowsMSVC || IsMSVC2015Compatible)) CmdArgs.push_back("-fno-threadsafe-statics"); // -fno-delayed-template-parsing is default, except for Windows where MSVC STL // needs it. if (Args.hasFlag(options::OPT_fdelayed_template_parsing, options::OPT_fno_delayed_template_parsing, IsWindowsMSVC)) CmdArgs.push_back("-fdelayed-template-parsing"); // -fgnu-keywords default varies depending on language; only pass if // specified. if (Arg *A = Args.getLastArg(options::OPT_fgnu_keywords, options::OPT_fno_gnu_keywords)) A->render(Args, CmdArgs); if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline, false)) CmdArgs.push_back("-fgnu89-inline"); if (Args.hasArg(options::OPT_fno_inline)) CmdArgs.push_back("-fno-inline"); if (Arg* InlineArg = Args.getLastArg(options::OPT_finline_functions, options::OPT_finline_hint_functions, options::OPT_fno_inline_functions)) InlineArg->render(Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fexperimental_new_pass_manager, options::OPT_fno_experimental_new_pass_manager); ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind); // -fobjc-dispatch-method is only relevant with the nonfragile-abi, and // legacy is the default. Except for deployment target of 10.5, // next runtime is always legacy dispatch and -fno-objc-legacy-dispatch // gets ignored silently. if (objcRuntime.isNonFragile()) { if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch, options::OPT_fno_objc_legacy_dispatch, objcRuntime.isLegacyDispatchDefaultForArch( getToolChain().getArch()))) { if (getToolChain().UseObjCMixedDispatch()) CmdArgs.push_back("-fobjc-dispatch-method=mixed"); else CmdArgs.push_back("-fobjc-dispatch-method=non-legacy"); } } // When ObjectiveC legacy runtime is in effect on MacOSX, // turn on the option to do Array/Dictionary subscripting // by default. if (getToolChain().getArch() == llvm::Triple::x86 && getToolChain().getTriple().isMacOSX() && !getToolChain().getTriple().isMacOSXVersionLT(10, 7) && objcRuntime.getKind() == ObjCRuntime::FragileMacOSX && objcRuntime.isNeXTFamily()) CmdArgs.push_back("-fobjc-subscripting-legacy-runtime"); // -fencode-extended-block-signature=1 is default. if (getToolChain().IsEncodeExtendedBlockSignatureDefault()) { CmdArgs.push_back("-fencode-extended-block-signature"); } // Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc. // NOTE: This logic is duplicated in ToolChains.cpp. bool ARC = isObjCAutoRefCount(Args); if (ARC) { getToolChain().CheckObjCARC(); CmdArgs.push_back("-fobjc-arc"); // FIXME: It seems like this entire block, and several around it should be // wrapped in isObjC, but for now we just use it here as this is where it // was being used previously. if (types::isCXX(InputType) && types::isObjC(InputType)) { if (getToolChain().GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) CmdArgs.push_back("-fobjc-arc-cxxlib=libc++"); else CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++"); } // Allow the user to enable full exceptions code emission. // We define off for Objective-CC, on for Objective-C++. if (Args.hasFlag(options::OPT_fobjc_arc_exceptions, options::OPT_fno_objc_arc_exceptions, /*default*/ types::isCXX(InputType))) CmdArgs.push_back("-fobjc-arc-exceptions"); } // -fobjc-infer-related-result-type is the default, except in the Objective-C // rewriter. if (rewriteKind != RK_None) CmdArgs.push_back("-fno-objc-infer-related-result-type"); // Pass down -fobjc-weak or -fno-objc-weak if present. if (types::isObjC(InputType)) { auto WeakArg = Args.getLastArg(options::OPT_fobjc_weak, options::OPT_fno_objc_weak); if (!WeakArg) { // nothing to do } else if (!objcRuntime.allowsWeak()) { if (WeakArg->getOption().matches(options::OPT_fobjc_weak)) D.Diag(diag::err_objc_weak_unsupported); } else { WeakArg->render(Args, CmdArgs); } } if (Args.hasFlag(options::OPT_fapplication_extension, options::OPT_fno_application_extension, false)) CmdArgs.push_back("-fapplication-extension"); // Handle GCC-style exception args. if (!C.getDriver().IsCLMode()) addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime, CmdArgs); if (Args.hasArg(options::OPT_fsjlj_exceptions) || getToolChain().UseSjLjExceptions(Args)) CmdArgs.push_back("-fsjlj-exceptions"); // C++ "sane" operator new. if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, options::OPT_fno_assume_sane_operator_new)) CmdArgs.push_back("-fno-assume-sane-operator-new"); // -frelaxed-template-template-args is off by default, as it is a severe // breaking change until a corresponding change to template partial ordering // is provided. if (Args.hasFlag(options::OPT_frelaxed_template_template_args, options::OPT_fno_relaxed_template_template_args, false)) CmdArgs.push_back("-frelaxed-template-template-args"); // -fsized-deallocation is off by default, as it is an ABI-breaking change for // most platforms. if (Args.hasFlag(options::OPT_fsized_deallocation, options::OPT_fno_sized_deallocation, false)) CmdArgs.push_back("-fsized-deallocation"); // -faligned-allocation is on by default in C++17 onwards and otherwise off // by default. if (Arg *A = Args.getLastArg(options::OPT_faligned_allocation, options::OPT_fno_aligned_allocation, options::OPT_faligned_new_EQ)) { if (A->getOption().matches(options::OPT_fno_aligned_allocation)) CmdArgs.push_back("-fno-aligned-allocation"); else CmdArgs.push_back("-faligned-allocation"); } // The default new alignment can be specified using a dedicated option or via // a GCC-compatible option that also turns on aligned allocation. if (Arg *A = Args.getLastArg(options::OPT_fnew_alignment_EQ, options::OPT_faligned_new_EQ)) CmdArgs.push_back( Args.MakeArgString(Twine("-fnew-alignment=") + A->getValue())); // -fconstant-cfstrings is default, and may be subject to argument translation // on Darwin. if (!Args.hasFlag(options::OPT_fconstant_cfstrings, options::OPT_fno_constant_cfstrings) || !Args.hasFlag(options::OPT_mconstant_cfstrings, options::OPT_mno_constant_cfstrings)) CmdArgs.push_back("-fno-constant-cfstrings"); // -fshort-wchar default varies depending on platform; only // pass if specified. if (Arg *A = Args.getLastArg(options::OPT_fshort_wchar, options::OPT_fno_short_wchar)) A->render(Args, CmdArgs); // -fno-pascal-strings is default, only pass non-default. if (Args.hasFlag(options::OPT_fpascal_strings, options::OPT_fno_pascal_strings, false)) CmdArgs.push_back("-fpascal-strings"); // Honor -fpack-struct= and -fpack-struct, if given. Note that // -fno-pack-struct doesn't apply to -fpack-struct=. if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) { std::string PackStructStr = "-fpack-struct="; PackStructStr += A->getValue(); CmdArgs.push_back(Args.MakeArgString(PackStructStr)); } else if (Args.hasFlag(options::OPT_fpack_struct, options::OPT_fno_pack_struct, false)) { CmdArgs.push_back("-fpack-struct=1"); } // Handle -fmax-type-align=N and -fno-type-align bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align); if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) { if (!SkipMaxTypeAlign) { std::string MaxTypeAlignStr = "-fmax-type-align="; MaxTypeAlignStr += A->getValue(); CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); } } else if (getToolChain().getTriple().isOSDarwin()) { if (!SkipMaxTypeAlign) { std::string MaxTypeAlignStr = "-fmax-type-align=16"; CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); } } // -fcommon is the default unless compiling kernel code or the target says so bool NoCommonDefault = KernelOrKext || isNoCommonDefault(getToolChain().getTriple()); if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common, !NoCommonDefault)) CmdArgs.push_back("-fno-common"); // -fsigned-bitfields is default, and clang doesn't yet support // -funsigned-bitfields. if (!Args.hasFlag(options::OPT_fsigned_bitfields, options::OPT_funsigned_bitfields)) D.Diag(diag::warn_drv_clang_unsupported) << Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args); // -fsigned-bitfields is default, and clang doesn't support -fno-for-scope. if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope)) D.Diag(diag::err_drv_clang_unsupported) << Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args); // -finput_charset=UTF-8 is default. Reject others if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) { StringRef value = inputCharset->getValue(); if (!value.equals_lower("utf-8")) D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args) << value; } // -fexec_charset=UTF-8 is default. Reject others if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { StringRef value = execCharset->getValue(); if (!value.equals_lower("utf-8")) D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) << value; } // -fcaret-diagnostics is default. if (!Args.hasFlag(options::OPT_fcaret_diagnostics, options::OPT_fno_caret_diagnostics, true)) CmdArgs.push_back("-fno-caret-diagnostics"); // -fdiagnostics-fixit-info is default, only pass non-default. if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info, options::OPT_fno_diagnostics_fixit_info)) CmdArgs.push_back("-fno-diagnostics-fixit-info"); // Enable -fdiagnostics-show-option by default. if (Args.hasFlag(options::OPT_fdiagnostics_show_option, options::OPT_fno_diagnostics_show_option)) CmdArgs.push_back("-fdiagnostics-show-option"); if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) { CmdArgs.push_back("-fdiagnostics-show-category"); CmdArgs.push_back(A->getValue()); } if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness, options::OPT_fno_diagnostics_show_hotness, false)) CmdArgs.push_back("-fdiagnostics-show-hotness"); if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) { CmdArgs.push_back("-fdiagnostics-format"); CmdArgs.push_back(A->getValue()); } if (Arg *A = Args.getLastArg( options::OPT_fdiagnostics_show_note_include_stack, options::OPT_fno_diagnostics_show_note_include_stack)) { if (A->getOption().matches( options::OPT_fdiagnostics_show_note_include_stack)) CmdArgs.push_back("-fdiagnostics-show-note-include-stack"); else CmdArgs.push_back("-fno-diagnostics-show-note-include-stack"); } // Color diagnostics are parsed by the driver directly from argv // and later re-parsed to construct this job; claim any possible // color diagnostic here to avoid warn_drv_unused_argument and // diagnose bad OPT_fdiagnostics_color_EQ values. for (Arg *A : Args) { const Option &O = A->getOption(); if (!O.matches(options::OPT_fcolor_diagnostics) && !O.matches(options::OPT_fdiagnostics_color) && !O.matches(options::OPT_fno_color_diagnostics) && !O.matches(options::OPT_fno_diagnostics_color) && !O.matches(options::OPT_fdiagnostics_color_EQ)) continue; if (O.matches(options::OPT_fdiagnostics_color_EQ)) { StringRef Value(A->getValue()); if (Value != "always" && Value != "never" && Value != "auto") getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << ("-fdiagnostics-color=" + Value).str(); } A->claim(); } if (D.getDiags().getDiagnosticOptions().ShowColors) CmdArgs.push_back("-fcolor-diagnostics"); if (Args.hasArg(options::OPT_fansi_escape_codes)) CmdArgs.push_back("-fansi-escape-codes"); if (!Args.hasFlag(options::OPT_fshow_source_location, options::OPT_fno_show_source_location)) CmdArgs.push_back("-fno-show-source-location"); if (Args.hasArg(options::OPT_fdiagnostics_absolute_paths)) CmdArgs.push_back("-fdiagnostics-absolute-paths"); if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column, true)) CmdArgs.push_back("-fno-show-column"); if (!Args.hasFlag(options::OPT_fspell_checking, options::OPT_fno_spell_checking)) CmdArgs.push_back("-fno-spell-checking"); // -fno-asm-blocks is default. if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks, false)) CmdArgs.push_back("-fasm-blocks"); // -fgnu-inline-asm is default. if (!Args.hasFlag(options::OPT_fgnu_inline_asm, options::OPT_fno_gnu_inline_asm, true)) CmdArgs.push_back("-fno-gnu-inline-asm"); // Enable vectorization per default according to the optimization level // selected. For optimization levels that want vectorization we use the alias // option to simplify the hasFlag logic. bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false); OptSpecifier VectorizeAliasOption = EnableVec ? options::OPT_O_Group : options::OPT_fvectorize; if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption, options::OPT_fno_vectorize, EnableVec)) CmdArgs.push_back("-vectorize-loops"); // -fslp-vectorize is enabled based on the optimization level selected. bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true); OptSpecifier SLPVectAliasOption = EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize; if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption, options::OPT_fno_slp_vectorize, EnableSLPVec)) CmdArgs.push_back("-vectorize-slp"); // -fno-slp-vectorize-aggressive is default. if (Args.hasFlag(options::OPT_fslp_vectorize_aggressive, options::OPT_fno_slp_vectorize_aggressive, false)) CmdArgs.push_back("-vectorize-slp-aggressive"); if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ)) A->render(Args, CmdArgs); if (Arg *A = Args.getLastArg( options::OPT_fsanitize_undefined_strip_path_components_EQ)) A->render(Args, CmdArgs); // -fdollars-in-identifiers default varies depending on platform and // language; only pass if specified. if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers, options::OPT_fno_dollars_in_identifiers)) { if (A->getOption().matches(options::OPT_fdollars_in_identifiers)) CmdArgs.push_back("-fdollars-in-identifiers"); else CmdArgs.push_back("-fno-dollars-in-identifiers"); } // -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for // practical purposes. if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time, options::OPT_fno_unit_at_a_time)) { if (A->getOption().matches(options::OPT_fno_unit_at_a_time)) D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args); } if (Args.hasFlag(options::OPT_fapple_pragma_pack, options::OPT_fno_apple_pragma_pack, false)) CmdArgs.push_back("-fapple-pragma-pack"); // le32-specific flags: // -fno-math-builtin: clang should not convert math builtins to intrinsics // by default. if (getToolChain().getArch() == llvm::Triple::le32) { CmdArgs.push_back("-fno-math-builtin"); } if (Args.hasFlag(options::OPT_fsave_optimization_record, options::OPT_fno_save_optimization_record, false)) { CmdArgs.push_back("-opt-record-file"); const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ); if (A) { CmdArgs.push_back(A->getValue()); } else { SmallString<128> F; if (Output.isFilename() && (Args.hasArg(options::OPT_c) || Args.hasArg(options::OPT_S))) { F = Output.getFilename(); } else { // Use the input filename. F = llvm::sys::path::stem(Input.getBaseInput()); // If we're compiling for an offload architecture (i.e. a CUDA device), // we need to make the file name for the device compilation different // from the host compilation. if (!JA.isDeviceOffloading(Action::OFK_None) && !JA.isDeviceOffloading(Action::OFK_Host)) { llvm::sys::path::replace_extension(F, ""); F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(), Triple.normalize()); F += "-"; F += JA.getOffloadingArch(); } } llvm::sys::path::replace_extension(F, "opt.yaml"); CmdArgs.push_back(Args.MakeArgString(F)); } } // Default to -fno-builtin-str{cat,cpy} on Darwin for ARM. // // FIXME: Now that PR4941 has been fixed this can be enabled. #if 0 if (getToolChain().getTriple().isOSDarwin() && (getToolChain().getArch() == llvm::Triple::arm || getToolChain().getArch() == llvm::Triple::thumb)) { if (!Args.hasArg(options::OPT_fbuiltin_strcat)) CmdArgs.push_back("-fno-builtin-strcat"); if (!Args.hasArg(options::OPT_fbuiltin_strcpy)) CmdArgs.push_back("-fno-builtin-strcpy"); } #endif // Enable rewrite includes if the user's asked for it or if we're generating // diagnostics. // TODO: Once -module-dependency-dir works with -frewrite-includes it'd be // nice to enable this when doing a crashdump for modules as well. if (Args.hasFlag(options::OPT_frewrite_includes, options::OPT_fno_rewrite_includes, false) || (C.isForDiagnostics() && !HaveAnyModules)) CmdArgs.push_back("-frewrite-includes"); // Only allow -traditional or -traditional-cpp outside in preprocessing modes. if (Arg *A = Args.getLastArg(options::OPT_traditional, options::OPT_traditional_cpp)) { if (isa(JA)) CmdArgs.push_back("-traditional-cpp"); else D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); } Args.AddLastArg(CmdArgs, options::OPT_dM); Args.AddLastArg(CmdArgs, options::OPT_dD); // Handle serialized diagnostics. if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) { CmdArgs.push_back("-serialize-diagnostic-file"); CmdArgs.push_back(Args.MakeArgString(A->getValue())); } if (Args.hasArg(options::OPT_fretain_comments_from_system_headers)) CmdArgs.push_back("-fretain-comments-from-system-headers"); // Forward -fcomment-block-commands to -cc1. Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands); // Forward -fparse-all-comments to -cc1. Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments); // Turn -fplugin=name.so into -load name.so for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) { CmdArgs.push_back("-load"); CmdArgs.push_back(A->getValue()); A->claim(); } // Setup statistics file output. if (const Arg *A = Args.getLastArg(options::OPT_save_stats_EQ)) { StringRef SaveStats = A->getValue(); SmallString<128> StatsFile; bool DoSaveStats = false; if (SaveStats == "obj") { if (Output.isFilename()) { StatsFile.assign(Output.getFilename()); llvm::sys::path::remove_filename(StatsFile); } DoSaveStats = true; } else if (SaveStats == "cwd") { DoSaveStats = true; } else { D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << SaveStats; } if (DoSaveStats) { StringRef BaseName = llvm::sys::path::filename(Input.getBaseInput()); llvm::sys::path::append(StatsFile, BaseName); llvm::sys::path::replace_extension(StatsFile, "stats"); CmdArgs.push_back(Args.MakeArgString(Twine("-stats-file=") + StatsFile)); } } // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option // parser. Args.AddAllArgValues(CmdArgs, options::OPT_Xclang); for (const Arg *A : Args.filtered(options::OPT_mllvm)) { A->claim(); // We translate this by hand to the -cc1 argument, since nightly test uses // it and developers have been trained to spell it with -mllvm. Both // spellings are now deprecated and should be removed. if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") { CmdArgs.push_back("-disable-llvm-optzns"); } else { A->render(Args, CmdArgs); } } // With -save-temps, we want to save the unoptimized bitcode output from the // CompileJobAction, use -disable-llvm-passes to get pristine IR generated // by the frontend. // When -fembed-bitcode is enabled, optimized bitcode is emitted because it // has slightly different breakdown between stages. // FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of // pristine IR generated by the frontend. Ideally, a new compile action should // be added so both IR can be captured. if (C.getDriver().isSaveTempsEnabled() && !C.getDriver().embedBitcodeInObject() && isa(JA)) CmdArgs.push_back("-disable-llvm-passes"); if (Output.getType() == types::TY_Dependencies) { // Handled with other dependency code. } else if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } addDashXForInput(Args, Input, CmdArgs); if (Input.isFilename()) CmdArgs.push_back(Input.getFilename()); else Input.getInputArg().renderAsInput(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_undef); const char *Exec = getToolChain().getDriver().getClangProgramPath(); // Optionally embed the -cc1 level arguments into the debug info, for build // analysis. if (getToolChain().UseDwarfDebugFlags()) { ArgStringList OriginalArgs; for (const auto &Arg : Args) Arg->render(Args, OriginalArgs); SmallString<256> Flags; Flags += Exec; for (const char *OriginalArg : OriginalArgs) { SmallString<128> EscapedArg; EscapeSpacesAndBackslashes(OriginalArg, EscapedArg); Flags += " "; Flags += EscapedArg; } CmdArgs.push_back("-dwarf-debug-flags"); CmdArgs.push_back(Args.MakeArgString(Flags)); } // Add the split debug info name to the command lines here so we // can propagate it to the backend. bool SplitDwarf = SplitDwarfArg && getToolChain().getTriple().isOSLinux() && (isa(JA) || isa(JA) || isa(JA)); const char *SplitDwarfOut; if (SplitDwarf) { CmdArgs.push_back("-split-dwarf-file"); SplitDwarfOut = SplitDebugName(Args, Input); CmdArgs.push_back(SplitDwarfOut); } // Host-side cuda compilation receives device-side outputs as Inputs[1...]. // Include them with -fcuda-include-gpubinary. if (IsCuda && Inputs.size() > 1) for (auto I = std::next(Inputs.begin()), E = Inputs.end(); I != E; ++I) { CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(I->getFilename()); } // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path // to specify the result of the compile phase on the host, so the meaningful // device declarations can be identified. Also, -fopenmp-is-device is passed // along to tell the frontend that it is generating code for a device, so that // only the relevant declarations are emitted. if (IsOpenMPDevice && Inputs.size() == 2) { CmdArgs.push_back("-fopenmp-is-device"); CmdArgs.push_back("-fopenmp-host-ir-file-path"); CmdArgs.push_back(Args.MakeArgString(Inputs.back().getFilename())); } // For all the host OpenMP offloading compile jobs we need to pass the targets // information using -fopenmp-targets= option. if (isa(JA) && JA.isHostOffloading(Action::OFK_OpenMP)) { SmallString<128> TargetInfo("-fopenmp-targets="); Arg *Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); assert(Tgts && Tgts->getNumValues() && "OpenMP offloading has to have targets specified."); for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { if (i) TargetInfo += ','; // We need to get the string from the triple because it may be not exactly // the same as the one we get directly from the arguments. llvm::Triple T(Tgts->getValue(i)); TargetInfo += T.getTriple(); } CmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); } bool WholeProgramVTables = Args.hasFlag(options::OPT_fwhole_program_vtables, options::OPT_fno_whole_program_vtables, false); if (WholeProgramVTables) { if (!D.isUsingLTO()) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; CmdArgs.push_back("-fwhole-program-vtables"); } // Finally add the compile command to the compilation. if (Args.hasArg(options::OPT__SLASH_fallback) && Output.getType() == types::TY_Object && (InputType == types::TY_C || InputType == types::TY_CXX)) { auto CLCommand = getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput); C.addCommand(llvm::make_unique( JA, *this, Exec, CmdArgs, Inputs, std::move(CLCommand))); } else if (Args.hasArg(options::OPT__SLASH_fallback) && isa(JA)) { // In /fallback builds, run the main compilation even if the pch generation // fails, so that the main compilation's fallback to cl.exe runs. C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } else { C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (SplitDwarf && Output.getType() == types::TY_Object) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDwarfOut); if (Arg *A = Args.getLastArg(options::OPT_pg)) if (Args.hasArg(options::OPT_fomit_frame_pointer)) D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer" << A->getAsString(Args); // Claim some arguments which clang supports automatically. // -fpch-preprocess is used with gcc to add a special marker in the output to // include the PCH file. Clang's PTH solution is completely transparent, so we // do not need to deal with it at all. Args.ClaimAllArgs(options::OPT_fpch_preprocess); // Claim some arguments which clang doesn't support, but we don't // care to warn the user about. Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group); Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group); // Disable warnings for clang -E -emit-llvm foo.c Args.ClaimAllArgs(options::OPT_emit_llvm); } /// Add options related to the Objective-C runtime/ABI. /// /// Returns true if the runtime is non-fragile. ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args, ArgStringList &cmdArgs, RewriteKind rewriteKind) const { // Look for the controlling runtime option. Arg *runtimeArg = args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime, options::OPT_fobjc_runtime_EQ); // Just forward -fobjc-runtime= to the frontend. This supercedes // options about fragility. if (runtimeArg && runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) { ObjCRuntime runtime; StringRef value = runtimeArg->getValue(); if (runtime.tryParse(value)) { getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime) << value; } runtimeArg->render(args, cmdArgs); return runtime; } // Otherwise, we'll need the ABI "version". Version numbers are // slightly confusing for historical reasons: // 1 - Traditional "fragile" ABI // 2 - Non-fragile ABI, version 1 // 3 - Non-fragile ABI, version 2 unsigned objcABIVersion = 1; // If -fobjc-abi-version= is present, use that to set the version. if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) { StringRef value = abiArg->getValue(); if (value == "1") objcABIVersion = 1; else if (value == "2") objcABIVersion = 2; else if (value == "3") objcABIVersion = 3; else getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value; } else { // Otherwise, determine if we are using the non-fragile ABI. bool nonFragileABIIsDefault = (rewriteKind == RK_NonFragile || (rewriteKind == RK_None && getToolChain().IsObjCNonFragileABIDefault())); if (args.hasFlag(options::OPT_fobjc_nonfragile_abi, options::OPT_fno_objc_nonfragile_abi, nonFragileABIIsDefault)) { // Determine the non-fragile ABI version to use. #ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO unsigned nonFragileABIVersion = 1; #else unsigned nonFragileABIVersion = 2; #endif if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) { StringRef value = abiArg->getValue(); if (value == "1") nonFragileABIVersion = 1; else if (value == "2") nonFragileABIVersion = 2; else getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value; } objcABIVersion = 1 + nonFragileABIVersion; } else { objcABIVersion = 1; } } // We don't actually care about the ABI version other than whether // it's non-fragile. bool isNonFragile = objcABIVersion != 1; // If we have no runtime argument, ask the toolchain for its default runtime. // However, the rewriter only really supports the Mac runtime, so assume that. ObjCRuntime runtime; if (!runtimeArg) { switch (rewriteKind) { case RK_None: runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); break; case RK_Fragile: runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple()); break; case RK_NonFragile: runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); break; } // -fnext-runtime } else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) { // On Darwin, make this use the default behavior for the toolchain. if (getToolChain().getTriple().isOSDarwin()) { runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); // Otherwise, build for a generic macosx port. } else { runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); } // -fgnu-runtime } else { assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime)); // Legacy behaviour is to target the gnustep runtime if we are in // non-fragile mode or the GCC runtime in fragile mode. if (isNonFragile) runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1, 6)); else runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple()); } cmdArgs.push_back( args.MakeArgString("-fobjc-runtime=" + runtime.getAsString())); return runtime; } static bool maybeConsumeDash(const std::string &EH, size_t &I) { bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-'); I += HaveDash; return !HaveDash; } namespace { struct EHFlags { bool Synch = false; bool Asynch = false; bool NoUnwindC = false; }; } // end anonymous namespace /// /EH controls whether to run destructor cleanups when exceptions are /// thrown. There are three modifiers: /// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions. /// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions. /// The 'a' modifier is unimplemented and fundamentally hard in LLVM IR. /// - c: Assume that extern "C" functions are implicitly nounwind. /// The default is /EHs-c-, meaning cleanups are disabled. static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) { EHFlags EH; std::vector EHArgs = Args.getAllArgValues(options::OPT__SLASH_EH); for (auto EHVal : EHArgs) { for (size_t I = 0, E = EHVal.size(); I != E; ++I) { switch (EHVal[I]) { case 'a': EH.Asynch = maybeConsumeDash(EHVal, I); if (EH.Asynch) EH.Synch = false; continue; case 'c': EH.NoUnwindC = maybeConsumeDash(EHVal, I); continue; case 's': EH.Synch = maybeConsumeDash(EHVal, I); if (EH.Synch) EH.Asynch = false; continue; default: break; } D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal; break; } } // The /GX, /GX- flags are only processed if there are not /EH flags. // The default is that /GX is not specified. if (EHArgs.empty() && Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_, /*default=*/false)) { EH.Synch = true; EH.NoUnwindC = true; } return EH; } void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, ArgStringList &CmdArgs, codegenoptions::DebugInfoKind *DebugInfoKind, bool *EmitCodeView) const { unsigned RTOptionID = options::OPT__SLASH_MT; if (Args.hasArg(options::OPT__SLASH_LDd)) // The /LDd option implies /MTd. The dependent lib part can be overridden, // but defining _DEBUG is sticky. RTOptionID = options::OPT__SLASH_MTd; if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group)) RTOptionID = A->getOption().getID(); StringRef FlagForCRT; switch (RTOptionID) { case options::OPT__SLASH_MD: if (Args.hasArg(options::OPT__SLASH_LDd)) CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-D_DLL"); FlagForCRT = "--dependent-lib=msvcrt"; break; case options::OPT__SLASH_MDd: CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-D_DLL"); FlagForCRT = "--dependent-lib=msvcrtd"; break; case options::OPT__SLASH_MT: if (Args.hasArg(options::OPT__SLASH_LDd)) CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-flto-visibility-public-std"); FlagForCRT = "--dependent-lib=libcmt"; break; case options::OPT__SLASH_MTd: CmdArgs.push_back("-D_DEBUG"); CmdArgs.push_back("-D_MT"); CmdArgs.push_back("-flto-visibility-public-std"); FlagForCRT = "--dependent-lib=libcmtd"; break; default: llvm_unreachable("Unexpected option ID."); } if (Args.hasArg(options::OPT__SLASH_Zl)) { CmdArgs.push_back("-D_VC_NODEFAULTLIB"); } else { CmdArgs.push_back(FlagForCRT.data()); // This provides POSIX compatibility (maps 'open' to '_open'), which most // users want. The /Za flag to cl.exe turns this off, but it's not // implemented in clang. CmdArgs.push_back("--dependent-lib=oldnames"); } // Both /showIncludes and /E (and /EP) write to stdout. Allowing both // would produce interleaved output, so ignore /showIncludes in such cases. if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_EP)) if (Arg *A = Args.getLastArg(options::OPT_show_includes)) A->render(Args, CmdArgs); // This controls whether or not we emit RTTI data for polymorphic types. if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, /*default=*/false)) CmdArgs.push_back("-fno-rtti-data"); // This controls whether or not we emit stack-protector instrumentation. // In MSVC, Buffer Security Check (/GS) is on by default. if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_, /*default=*/true)) { CmdArgs.push_back("-stack-protector"); CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong))); } // Emit CodeView if -Z7, -Zd, or -gline-tables-only are present. if (Arg *DebugInfoArg = Args.getLastArg(options::OPT__SLASH_Z7, options::OPT__SLASH_Zd, options::OPT_gline_tables_only)) { *EmitCodeView = true; if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7)) *DebugInfoKind = codegenoptions::LimitedDebugInfo; else *DebugInfoKind = codegenoptions::DebugLineTablesOnly; CmdArgs.push_back("-gcodeview"); } else { *EmitCodeView = false; } const Driver &D = getToolChain().getDriver(); EHFlags EH = parseClangCLEHFlags(D, Args); if (EH.Synch || EH.Asynch) { if (types::isCXX(InputType)) CmdArgs.push_back("-fcxx-exceptions"); CmdArgs.push_back("-fexceptions"); } if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC) CmdArgs.push_back("-fexternc-nounwind"); // /EP should expand to -E -P. if (Args.hasArg(options::OPT__SLASH_EP)) { CmdArgs.push_back("-E"); CmdArgs.push_back("-P"); } unsigned VolatileOptionID; if (getToolChain().getArch() == llvm::Triple::x86_64 || getToolChain().getArch() == llvm::Triple::x86) VolatileOptionID = options::OPT__SLASH_volatile_ms; else VolatileOptionID = options::OPT__SLASH_volatile_iso; if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group)) VolatileOptionID = A->getOption().getID(); if (VolatileOptionID == options::OPT__SLASH_volatile_ms) CmdArgs.push_back("-fms-volatile"); Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg); Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb); if (MostGeneralArg && BestCaseArg) D.Diag(clang::diag::err_drv_argument_not_allowed_with) << MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args); if (MostGeneralArg) { Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms); Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm); Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv); Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg; Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg; if (FirstConflict && SecondConflict && FirstConflict != SecondConflict) D.Diag(clang::diag::err_drv_argument_not_allowed_with) << FirstConflict->getAsString(Args) << SecondConflict->getAsString(Args); if (SingleArg) CmdArgs.push_back("-fms-memptr-rep=single"); else if (MultipleArg) CmdArgs.push_back("-fms-memptr-rep=multiple"); else CmdArgs.push_back("-fms-memptr-rep=virtual"); } if (Args.getLastArg(options::OPT__SLASH_Gd)) CmdArgs.push_back("-fdefault-calling-conv=cdecl"); else if (Args.getLastArg(options::OPT__SLASH_Gr)) CmdArgs.push_back("-fdefault-calling-conv=fastcall"); else if (Args.getLastArg(options::OPT__SLASH_Gz)) CmdArgs.push_back("-fdefault-calling-conv=stdcall"); else if (Args.getLastArg(options::OPT__SLASH_Gv)) CmdArgs.push_back("-fdefault-calling-conv=vectorcall"); if (Arg *A = Args.getLastArg(options::OPT_vtordisp_mode_EQ)) A->render(Args, CmdArgs); if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) { CmdArgs.push_back("-fdiagnostics-format"); if (Args.hasArg(options::OPT__SLASH_fallback)) CmdArgs.push_back("msvc-fallback"); else CmdArgs.push_back("msvc"); } } visualstudio::Compiler *Clang::getCLFallback() const { if (!CLFallback) CLFallback.reset(new visualstudio::Compiler(getToolChain())); return CLFallback.get(); } void ClangAs::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { StringRef CPUName; StringRef ABIName; const llvm::Triple &Triple = getToolChain().getTriple(); mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); CmdArgs.push_back("-target-abi"); CmdArgs.push_back(ABIName.data()); } void ClangAs::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { StringRef Value = A->getValue(); if (Value == "intel" || Value == "att") { CmdArgs.push_back("-mllvm"); CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value)); } else { getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Value; } } } void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1 && "Unexpected number of inputs."); const InputInfo &Input = Inputs[0]; const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); const std::string &TripleStr = Triple.getTriple(); // Don't warn about "clang -w -c foo.s" Args.ClaimAllArgs(options::OPT_w); // and "clang -emit-llvm -c foo.s" Args.ClaimAllArgs(options::OPT_emit_llvm); claimNoWarnArgs(Args); // Invoke ourselves in -cc1as mode. // // FIXME: Implement custom jobs for internal actions. CmdArgs.push_back("-cc1as"); // Add the "effective" target triple. CmdArgs.push_back("-triple"); CmdArgs.push_back(Args.MakeArgString(TripleStr)); // Set the output mode, we currently only expect to be used as a real // assembler. CmdArgs.push_back("-filetype"); CmdArgs.push_back("obj"); // Set the main file name, so that debug info works even with // -save-temps or preprocessed assembly. CmdArgs.push_back("-main-file-name"); CmdArgs.push_back(Clang::getBaseInputName(Args, Input)); // Add the target cpu std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true); if (!CPU.empty()) { CmdArgs.push_back("-target-cpu"); CmdArgs.push_back(Args.MakeArgString(CPU)); } // Add the target features getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, true); // Ignore explicit -force_cpusubtype_ALL option. (void)Args.hasArg(options::OPT_force__cpusubtype__ALL); // Pass along any -I options so we get proper .include search paths. Args.AddAllArgs(CmdArgs, options::OPT_I_Group); // Determine the original source input. const Action *SourceAction = &JA; while (SourceAction->getKind() != Action::InputClass) { assert(!SourceAction->getInputs().empty() && "unexpected root action!"); SourceAction = SourceAction->getInputs()[0]; } // Forward -g and handle debug info related flags, assuming we are dealing // with an actual assembly file. bool WantDebug = false; unsigned DwarfVersion = 0; Args.ClaimAllArgs(options::OPT_g_Group); if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { WantDebug = !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_ggdb0); if (WantDebug) DwarfVersion = DwarfVersionNum(A->getSpelling()); } if (DwarfVersion == 0) DwarfVersion = getToolChain().GetDefaultDwarfVersion(); codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; if (SourceAction->getType() == types::TY_Asm || SourceAction->getType() == types::TY_PP_Asm) { // You might think that it would be ok to set DebugInfoKind outside of // the guard for source type, however there is a test which asserts // that some assembler invocation receives no -debug-info-kind, // and it's not clear whether that test is just overly restrictive. DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo : codegenoptions::NoDebugInfo); // Add the -fdebug-compilation-dir flag if needed. addDebugCompDirArg(Args, CmdArgs); // Set the AT_producer to the clang version when using the integrated // assembler on assembly source files. CmdArgs.push_back("-dwarf-debug-producer"); CmdArgs.push_back(Args.MakeArgString(getClangFullVersion())); // And pass along -I options Args.AddAllArgs(CmdArgs, options::OPT_I); } RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion, llvm::DebuggerKind::Default); // Handle -fPIC et al -- the relocation-model affects the assembler // for some targets. llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Args); const char *RMName = RelocationModelName(RelocationModel); if (RMName) { CmdArgs.push_back("-mrelocation-model"); CmdArgs.push_back(RMName); } // Optionally embed the -cc1as level arguments into the debug info, for build // analysis. if (getToolChain().UseDwarfDebugFlags()) { ArgStringList OriginalArgs; for (const auto &Arg : Args) Arg->render(Args, OriginalArgs); SmallString<256> Flags; const char *Exec = getToolChain().getDriver().getClangProgramPath(); Flags += Exec; for (const char *OriginalArg : OriginalArgs) { SmallString<128> EscapedArg; EscapeSpacesAndBackslashes(OriginalArg, EscapedArg); Flags += " "; Flags += EscapedArg; } CmdArgs.push_back("-dwarf-debug-flags"); CmdArgs.push_back(Args.MakeArgString(Flags)); } // FIXME: Add -static support, once we have it. // Add target specific flags. switch (getToolChain().getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: AddMIPSTargetArgs(Args, CmdArgs); break; case llvm::Triple::x86: case llvm::Triple::x86_64: AddX86TargetArgs(Args, CmdArgs); break; } // Consume all the warning flags. Usually this would be handled more // gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as // doesn't handle that so rather than warning about unused flags that are // actually used, we'll lie by omission instead. // FIXME: Stop lying and consume only the appropriate driver flags Args.ClaimAllArgs(options::OPT_W_Group); CollectArgsForIntegratedAssembler(C, Args, CmdArgs, getToolChain().getDriver()); Args.AddAllArgs(CmdArgs, options::OPT_mllvm); assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); const char *Exec = getToolChain().getDriver().getClangProgramPath(); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (Args.hasArg(options::OPT_gsplit_dwarf) && getToolChain().getTriple().isOSLinux()) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(Args, Input)); } void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const { // The version with only one output is expected to refer to a bundling job. assert(isa(JA) && "Expecting bundling job!"); // The bundling command looks like this: // clang-offload-bundler -type=bc // -targets=host-triple,openmp-triple1,openmp-triple2 // -outputs=input_file // -inputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2" ArgStringList CmdArgs; // Get the type. CmdArgs.push_back(TCArgs.MakeArgString( Twine("-type=") + types::getTypeTempSuffix(Output.getType()))); assert(JA.getInputs().size() == Inputs.size() && "Not have inputs for all dependence actions??"); // Get the targets. SmallString<128> Triples; Triples += "-targets="; for (unsigned I = 0; I < Inputs.size(); ++I) { if (I) Triples += ','; Action::OffloadKind CurKind = Action::OFK_Host; const ToolChain *CurTC = &getToolChain(); const Action *CurDep = JA.getInputs()[I]; if (const auto *OA = dyn_cast(CurDep)) { OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) { CurKind = A->getOffloadingDeviceKind(); CurTC = TC; }); } Triples += Action::GetOffloadKindName(CurKind); Triples += '-'; Triples += CurTC->getTriple().normalize(); } CmdArgs.push_back(TCArgs.MakeArgString(Triples)); // Get bundled file command. CmdArgs.push_back( TCArgs.MakeArgString(Twine("-outputs=") + Output.getFilename())); // Get unbundled files command. SmallString<128> UB; UB += "-inputs="; for (unsigned I = 0; I < Inputs.size(); ++I) { if (I) UB += ','; UB += Inputs[I].getFilename(); } CmdArgs.push_back(TCArgs.MakeArgString(UB)); // All the inputs are encoded as commands. C.addCommand(llvm::make_unique( JA, *this, TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), CmdArgs, None)); } void OffloadBundler::ConstructJobMultipleOutputs( Compilation &C, const JobAction &JA, const InputInfoList &Outputs, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const { // The version with multiple outputs is expected to refer to a unbundling job. auto &UA = cast(JA); // The unbundling command looks like this: // clang-offload-bundler -type=bc // -targets=host-triple,openmp-triple1,openmp-triple2 // -inputs=input_file // -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2" // -unbundle ArgStringList CmdArgs; assert(Inputs.size() == 1 && "Expecting to unbundle a single file!"); InputInfo Input = Inputs.front(); // Get the type. CmdArgs.push_back(TCArgs.MakeArgString( Twine("-type=") + types::getTypeTempSuffix(Input.getType()))); // Get the targets. SmallString<128> Triples; Triples += "-targets="; auto DepInfo = UA.getDependentActionsInfo(); for (unsigned I = 0; I < DepInfo.size(); ++I) { if (I) Triples += ','; auto &Dep = DepInfo[I]; Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind); Triples += '-'; Triples += Dep.DependentToolChain->getTriple().normalize(); } CmdArgs.push_back(TCArgs.MakeArgString(Triples)); // Get bundled file command. CmdArgs.push_back( TCArgs.MakeArgString(Twine("-inputs=") + Input.getFilename())); // Get unbundled files command. SmallString<128> UB; UB += "-outputs="; for (unsigned I = 0; I < Outputs.size(); ++I) { if (I) UB += ','; UB += Outputs[I].getFilename(); } CmdArgs.push_back(TCArgs.MakeArgString(UB)); CmdArgs.push_back("-unbundle"); // All the inputs are encoded as commands. C.addCommand(llvm::make_unique( JA, *this, TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), CmdArgs, None)); } void GnuTool::anchor() {} void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; for (const auto &A : Args) { if (forwardToGCC(A->getOption())) { // It is unfortunate that we have to claim here, as this means // we will basically never report anything interesting for // platforms using a generic gcc, even if we are just using gcc // to get to the assembler. A->claim(); // Don't forward any -g arguments to assembly steps. if (isa(JA) && A->getOption().matches(options::OPT_g_Group)) continue; // Don't forward any -W arguments to assembly and link steps. if ((isa(JA) || isa(JA)) && A->getOption().matches(options::OPT_W_Group)) continue; A->render(Args, CmdArgs); } } RenderExtraToolArgs(JA, CmdArgs); // If using a driver driver, force the arch. if (getToolChain().getTriple().isOSDarwin()) { CmdArgs.push_back("-arch"); CmdArgs.push_back( Args.MakeArgString(getToolChain().getDefaultUniversalArchName())); } // Try to force gcc to match the tool chain we want, if we recognize // the arch. // // FIXME: The triple class should directly provide the information we want // here. switch (getToolChain().getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::ppc: CmdArgs.push_back("-m32"); break; case llvm::Triple::x86_64: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: CmdArgs.push_back("-m64"); break; case llvm::Triple::sparcel: CmdArgs.push_back("-EL"); break; } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Unexpected output"); CmdArgs.push_back("-fsyntax-only"); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); // Only pass -x if gcc will understand it; otherwise hope gcc // understands the suffix correctly. The main use case this would go // wrong in is for linker inputs if they happened to have an odd // suffix; really the only way to get this to happen is a command // like '-x foobar a.c' which will treat a.c like a linker input. // // FIXME: For the linker case specifically, can we safely convert // inputs into '-Wl,' options? for (const auto &II : Inputs) { // Don't try to pass LLVM or AST inputs to a generic gcc. if (types::isLLVMIR(II.getType())) D.Diag(diag::err_drv_no_linker_llvm_support) << getToolChain().getTripleString(); else if (II.getType() == types::TY_AST) D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString(); else if (II.getType() == types::TY_ModuleFile) D.Diag(diag::err_drv_no_module_support) << getToolChain().getTripleString(); if (types::canTypeBeUserSpecified(II.getType())) { CmdArgs.push_back("-x"); CmdArgs.push_back(types::getTypeName(II.getType())); } if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else { const Arg &A = II.getInputArg(); // Reverse translate some rewritten options. if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) { CmdArgs.push_back("-lstdc++"); continue; } // Don't render as input, we need gcc to do the translations. A.render(Args, CmdArgs); } } const std::string &customGCCName = D.getCCCGenericGCCName(); const char *GCCName; if (!customGCCName.empty()) GCCName = customGCCName.c_str(); else if (D.CCCIsCXX()) { GCCName = "g++"; } else GCCName = "gcc"; const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void gcc::Preprocessor::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { CmdArgs.push_back("-E"); } void gcc::Compiler::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); switch (JA.getType()) { // If -flto, etc. are present then make sure not to force assembly output. case types::TY_LLVM_IR: case types::TY_LTO_IR: case types::TY_LLVM_BC: case types::TY_LTO_BC: CmdArgs.push_back("-c"); break; // We assume we've got an "integrated" assembler in that gcc will produce an // object file itself. case types::TY_Object: CmdArgs.push_back("-c"); break; case types::TY_PP_Asm: CmdArgs.push_back("-S"); break; case types::TY_Nothing: CmdArgs.push_back("-fsyntax-only"); break; default: D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType()); } } void gcc::Linker::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { // The types are (hopefully) good enough. } // Hexagon tools start. void hexagon::Assembler::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { } void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); auto &HTC = static_cast(getToolChain()); const Driver &D = HTC.getDriver(); ArgStringList CmdArgs; std::string MArchString = "-march=hexagon"; CmdArgs.push_back(Args.MakeArgString(MArchString)); RenderExtraToolArgs(JA, CmdArgs); std::string AsName = "hexagon-llvm-mc"; std::string MCpuString = "-mcpu=hexagon" + toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); CmdArgs.push_back("-filetype=obj"); CmdArgs.push_back(Args.MakeArgString(MCpuString)); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Unexpected output"); CmdArgs.push_back("-fsyntax-only"); } if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { std::string N = llvm::utostr(G.getValue()); CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N)); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); // Only pass -x if gcc will understand it; otherwise hope gcc // understands the suffix correctly. The main use case this would go // wrong in is for linker inputs if they happened to have an odd // suffix; really the only way to get this to happen is a command // like '-x foobar a.c' which will treat a.c like a linker input. // // FIXME: For the linker case specifically, can we safely convert // inputs into '-Wl,' options? for (const auto &II : Inputs) { // Don't try to pass LLVM or AST inputs to a generic gcc. if (types::isLLVMIR(II.getType())) D.Diag(clang::diag::err_drv_no_linker_llvm_support) << HTC.getTripleString(); else if (II.getType() == types::TY_AST) D.Diag(clang::diag::err_drv_no_ast_support) << HTC.getTripleString(); else if (II.getType() == types::TY_ModuleFile) D.Diag(diag::err_drv_no_module_support) << HTC.getTripleString(); if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else // Don't render as input, we need gcc to do the translations. // FIXME: What is this? II.getInputArg().render(Args, CmdArgs); } auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str())); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { } static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA, const toolchains::HexagonToolChain &HTC, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, ArgStringList &CmdArgs, const char *LinkingOutput) { const Driver &D = HTC.getDriver(); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- bool IsStatic = Args.hasArg(options::OPT_static); bool IsShared = Args.hasArg(options::OPT_shared); bool IsPIE = Args.hasArg(options::OPT_pie); bool IncStdLib = !Args.hasArg(options::OPT_nostdlib); bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles); bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs); bool UseG0 = false; bool UseShared = IsShared && !IsStatic; //---------------------------------------------------------------------------- // Silence warnings for various options //---------------------------------------------------------------------------- Args.ClaimAllArgs(options::OPT_g_Group); Args.ClaimAllArgs(options::OPT_emit_llvm); Args.ClaimAllArgs(options::OPT_w); // Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_static_libgcc); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); if (Args.hasArg(options::OPT_r)) CmdArgs.push_back("-r"); for (const auto &Opt : HTC.ExtraOpts) CmdArgs.push_back(Opt.c_str()); CmdArgs.push_back("-march=hexagon"); std::string CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); std::string MCpuString = "-mcpu=hexagon" + CpuVer; CmdArgs.push_back(Args.MakeArgString(MCpuString)); if (IsShared) { CmdArgs.push_back("-shared"); // The following should be the default, but doing as hexagon-gcc does. CmdArgs.push_back("-call_shared"); } if (IsStatic) CmdArgs.push_back("-static"); if (IsPIE && !IsShared) CmdArgs.push_back("-pie"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { std::string N = llvm::utostr(G.getValue()); CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N)); UseG0 = G.getValue() == 0; } //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); //---------------------------------------------------------------------------- // moslib //---------------------------------------------------------------------------- std::vector OsLibs; bool HasStandalone = false; for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) { A->claim(); OsLibs.emplace_back(A->getValue()); HasStandalone = HasStandalone || (OsLibs.back() == "standalone"); } if (OsLibs.empty()) { OsLibs.push_back("standalone"); HasStandalone = true; } //---------------------------------------------------------------------------- // Start Files //---------------------------------------------------------------------------- const std::string MCpuSuffix = "/" + CpuVer; const std::string MCpuG0Suffix = MCpuSuffix + "/G0"; const std::string RootDir = HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/"; const std::string StartSubDir = "hexagon/lib" + (UseG0 ? MCpuG0Suffix : MCpuSuffix); auto Find = [&HTC] (const std::string &RootDir, const std::string &SubDir, const char *Name) -> std::string { std::string RelName = SubDir + Name; std::string P = HTC.GetFilePath(RelName.c_str()); if (llvm::sys::fs::exists(P)) return P; return RootDir + RelName; }; if (IncStdLib && IncStartFiles) { if (!IsShared) { if (HasStandalone) { std::string Crt0SA = Find(RootDir, StartSubDir, "/crt0_standalone.o"); CmdArgs.push_back(Args.MakeArgString(Crt0SA)); } std::string Crt0 = Find(RootDir, StartSubDir, "/crt0.o"); CmdArgs.push_back(Args.MakeArgString(Crt0)); } std::string Init = UseShared ? Find(RootDir, StartSubDir + "/pic", "/initS.o") : Find(RootDir, StartSubDir, "/init.o"); CmdArgs.push_back(Args.MakeArgString(Init)); } //---------------------------------------------------------------------------- // Library Search Paths //---------------------------------------------------------------------------- const ToolChain::path_list &LibPaths = HTC.getFilePaths(); for (const auto &LibPath : LibPaths) CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath)); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- Args.AddAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_u_Group}); AddLinkerInputs(HTC, Inputs, Args, CmdArgs, JA); //---------------------------------------------------------------------------- // Libraries //---------------------------------------------------------------------------- if (IncStdLib && IncDefLibs) { if (D.CCCIsCXX()) { HTC.AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } CmdArgs.push_back("--start-group"); if (!IsShared) { for (const std::string &Lib : OsLibs) CmdArgs.push_back(Args.MakeArgString("-l" + Lib)); CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--end-group"); } //---------------------------------------------------------------------------- // End files //---------------------------------------------------------------------------- if (IncStdLib && IncStartFiles) { std::string Fini = UseShared ? Find(RootDir, StartSubDir + "/pic", "/finiS.o") : Find(RootDir, StartSubDir, "/fini.o"); CmdArgs.push_back(Args.MakeArgString(Fini)); } } void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { auto &HTC = static_cast(getToolChain()); ArgStringList CmdArgs; constructHexagonLinkArgs(C, JA, HTC, Output, Inputs, Args, CmdArgs, LinkingOutput); std::string Linker = HTC.GetProgramPath("hexagon-link"); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), CmdArgs, Inputs)); } // Hexagon tools end. void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { std::string Linker = getToolChain().GetProgramPath(getShortName()); ArgStringList CmdArgs; AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), CmdArgs, Inputs)); } // AMDGPU tools end. wasm::Linker::Linker(const ToolChain &TC) : GnuTool("wasm::Linker", "lld", TC) {} bool wasm::Linker::isLinkJob() const { return true; } bool wasm::Linker::hasIntegratedCPP() const { return false; } void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const ToolChain &ToolChain = getToolChain(); const Driver &D = ToolChain.getDriver(); const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath()); ArgStringList CmdArgs; CmdArgs.push_back("-flavor"); CmdArgs.push_back("ld"); // Enable garbage collection of unused input sections by default, since code // size is of particular importance. This is significantly facilitated by // the enabling of -ffunction-sections and -fdata-sections in // Clang::ConstructJob. if (areOptimizationsEnabled(Args)) CmdArgs.push_back("--gc-sections"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("--strip-all"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-shared"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bstatic"); Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("rcrt1.o"))); else if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o"))); else CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); } AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lcompiler_rt"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Linker, CmdArgs, Inputs)); } const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) { std::string MArch; if (!Arch.empty()) MArch = Arch; else MArch = Triple.getArchName(); MArch = StringRef(MArch).split("+").first.lower(); // Handle -march=native. if (MArch == "native") { std::string CPU = llvm::sys::getHostCPUName(); if (CPU != "generic") { // Translate the native cpu into the architecture suffix for that CPU. StringRef Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch, Triple); // If there is no valid architecture suffix for this CPU we don't know how // to handle it, so return no architecture. if (Suffix.empty()) MArch = ""; else MArch = std::string("arm") + Suffix.str(); } } return MArch; } /// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting. StringRef arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) { std::string MArch = getARMArch(Arch, Triple); // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch // here means an -march=native that we can't handle, so instead return no CPU. if (MArch.empty()) return StringRef(); // We need to return an empty string here on invalid MArch values as the // various places that call this function can't cope with a null result. return Triple.getARMCPUForArch(MArch); } /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting. std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { // FIXME: Warn on inconsistent use of -mcpu and -march. // If we have -mcpu=, use that. if (!CPU.empty()) { std::string MCPU = StringRef(CPU).split("+").first.lower(); // Handle -mcpu=native. if (MCPU == "native") return llvm::sys::getHostCPUName(); else return MCPU; } return getARMCPUForMArch(Arch, Triple); } /// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular /// CPU (or Arch, if CPU is generic). // FIXME: This is redundant with -mcpu, why does LLVM use this. StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { unsigned ArchKind; if (CPU == "generic") { std::string ARMArch = tools::arm::getARMArch(Arch, Triple); ArchKind = llvm::ARM::parseArch(ARMArch); if (ArchKind == llvm::ARM::AK_INVALID) // In case of generic Arch, i.e. "arm", // extract arch from default cpu of the Triple ArchKind = llvm::ARM::parseCPUArch(Triple.getARMCPUForArch(ARMArch)); } else { // FIXME: horrible hack to get around the fact that Cortex-A7 is only an // armv7k triple if it's actually been specified via "-arch armv7k". ArchKind = (Arch == "armv7k" || Arch == "thumbv7k") ? (unsigned)llvm::ARM::AK_ARMV7K : llvm::ARM::parseCPUArch(CPU); } if (ArchKind == llvm::ARM::AK_INVALID) return ""; return llvm::ARM::getSubArch(ArchKind); } void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple) { if (Args.hasArg(options::OPT_r)) return; // ARMv7 (and later) and ARMv6-M do not support BE-32, so instruct the linker // to generate BE-8 executables. if (getARMSubArchVersionNumber(Triple) >= 7 || isARMMProfile(Triple)) CmdArgs.push_back("--be8"); } mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) { // Strictly speaking, mips32r2 and mips64r2 are NanLegacy-only since Nan2008 // was first introduced in Release 3. However, other compilers have // traditionally allowed it for Release 2 so we should do the same. return (NanEncoding)llvm::StringSwitch(CPU) .Case("mips1", NanLegacy) .Case("mips2", NanLegacy) .Case("mips3", NanLegacy) .Case("mips4", NanLegacy) .Case("mips5", NanLegacy) .Case("mips32", NanLegacy) .Case("mips32r2", NanLegacy | Nan2008) .Case("mips32r3", NanLegacy | Nan2008) .Case("mips32r5", NanLegacy | Nan2008) .Case("mips32r6", Nan2008) .Case("mips64", NanLegacy) .Case("mips64r2", NanLegacy | Nan2008) .Case("mips64r3", NanLegacy | Nan2008) .Case("mips64r5", NanLegacy | Nan2008) .Case("mips64r6", Nan2008) .Default(NanLegacy); } bool mips::hasCompactBranches(StringRef &CPU) { // mips32r6 and mips64r6 have compact branches. return llvm::StringSwitch(CPU) .Case("mips32r6", true) .Case("mips64r6", true) .Default(false); } bool mips::hasMipsAbiArg(const ArgList &Args, const char *Value) { Arg *A = Args.getLastArg(options::OPT_mabi_EQ); return A && (A->getValue() == StringRef(Value)); } bool mips::isUCLibc(const ArgList &Args) { Arg *A = Args.getLastArg(options::OPT_m_libc_Group); return A && A->getOption().matches(options::OPT_muclibc); } bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) { if (Arg *NaNArg = Args.getLastArg(options::OPT_mnan_EQ)) return llvm::StringSwitch(NaNArg->getValue()) .Case("2008", true) .Case("legacy", false) .Default(false); // NaN2008 is the default for MIPS32r6/MIPS64r6. return llvm::StringSwitch(getCPUName(Args, Triple)) .Cases("mips32r6", "mips64r6", true) .Default(false); return false; } bool mips::isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName) { if (!Triple.isAndroid()) return false; // Android MIPS32R6 defaults to FP64A. return llvm::StringSwitch(CPUName) .Case("mips32r6", true) .Default(false); } bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, StringRef ABIName, mips::FloatABI FloatABI) { if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && Triple.getVendor() != llvm::Triple::MipsTechnologies && !Triple.isAndroid()) return false; if (ABIName != "32") return false; // FPXX shouldn't be used if either -msoft-float or -mfloat-abi=soft is // present. if (FloatABI == mips::FloatABI::Soft) return false; return llvm::StringSwitch(CPUName) .Cases("mips2", "mips3", "mips4", "mips5", true) .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true) .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true) .Default(false); } bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple, StringRef CPUName, StringRef ABIName, mips::FloatABI FloatABI) { bool UseFPXX = isFPXXDefault(Triple, CPUName, ABIName, FloatABI); // FPXX shouldn't be used if -msingle-float is present. if (Arg *A = Args.getLastArg(options::OPT_msingle_float, options::OPT_mdouble_float)) if (A->getOption().matches(options::OPT_msingle_float)) UseFPXX = false; return UseFPXX; } llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) { // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for // archs which Darwin doesn't use. // The matching this routine does is fairly pointless, since it is neither the // complete architecture list, nor a reasonable subset. The problem is that // historically the driver driver accepts this and also ties its -march= // handling to the architecture name, so we need to be careful before removing // support for it. // This code must be kept in sync with Clang's Darwin specific argument // translation. return llvm::StringSwitch(Str) .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc) .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc) .Case("ppc64", llvm::Triple::ppc64) .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86) .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4", llvm::Triple::x86) .Cases("x86_64", "x86_64h", llvm::Triple::x86_64) // This is derived from the driver driver. .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm) .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm) .Cases("armv7s", "xscale", llvm::Triple::arm) .Case("arm64", llvm::Triple::aarch64) .Case("r600", llvm::Triple::r600) .Case("amdgcn", llvm::Triple::amdgcn) .Case("nvptx", llvm::Triple::nvptx) .Case("nvptx64", llvm::Triple::nvptx64) .Case("amdil", llvm::Triple::amdil) .Case("spir", llvm::Triple::spir) .Default(llvm::Triple::UnknownArch); } void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) { const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str); unsigned ArchKind = llvm::ARM::parseArch(Str); T.setArch(Arch); if (Str == "x86_64h") T.setArchName(Str); else if (ArchKind == llvm::ARM::AK_ARMV6M || ArchKind == llvm::ARM::AK_ARMV7M || ArchKind == llvm::ARM::AK_ARMV7EM) { T.setOS(llvm::Triple::UnknownOS); T.setObjectFormat(llvm::Triple::MachO); } } const char *Clang::getBaseInputName(const ArgList &Args, const InputInfo &Input) { return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput())); } const char *Clang::getBaseInputStem(const ArgList &Args, const InputInfoList &Inputs) { const char *Str = getBaseInputName(Args, Inputs[0]); if (const char *End = strrchr(Str, '.')) return Args.MakeArgString(std::string(Str, End)); return Str; } const char *Clang::getDependencyFileName(const ArgList &Args, const InputInfoList &Inputs) { // FIXME: Think about this more. std::string Res; if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) { std::string Str(OutputOpt->getValue()); Res = Str.substr(0, Str.rfind('.')); } else { Res = getBaseInputStem(Args, Inputs); } return Args.MakeArgString(Res + ".d"); } void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const ToolChain &ToolChain = getToolChain(); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); // CloudABI only supports static linkage. CmdArgs.push_back("-Bstatic"); CmdArgs.push_back("--no-dynamic-linker"); // Provide PIE linker flags in case PIE is default for the architecture. if (ToolChain.isPIEDefault()) { CmdArgs.push_back("-pie"); CmdArgs.push_back("-zrelro"); } CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("--gc-sections"); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); if (D.isUsingLTO()) AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lcompiler_rt"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1 && "Unexpected number of inputs."); const InputInfo &Input = Inputs[0]; // Determine the original source input. const Action *SourceAction = &JA; while (SourceAction->getKind() != Action::InputClass) { assert(!SourceAction->getInputs().empty() && "unexpected root action!"); SourceAction = SourceAction->getInputs()[0]; } // If -fno-integrated-as is used add -Q to the darwin assember driver to make // sure it runs its system assembler not clang's integrated assembler. // Applicable to darwin11+ and Xcode 4+. darwin<10 lacked integrated-as. // FIXME: at run-time detect assembler capabilities or rely on version // information forwarded by -target-assembler-version. if (Args.hasArg(options::OPT_fno_integrated_as)) { const llvm::Triple &T(getToolChain().getTriple()); if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7))) CmdArgs.push_back("-Q"); } // Forward -g, assuming we are dealing with an actual assembly file. if (SourceAction->getType() == types::TY_Asm || SourceAction->getType() == types::TY_PP_Asm) { if (Args.hasArg(options::OPT_gstabs)) CmdArgs.push_back("--gstabs"); else if (Args.hasArg(options::OPT_g_Group)) CmdArgs.push_back("-g"); } // Derived from asm spec. AddMachOArch(Args, CmdArgs); // Use -force_cpusubtype_ALL on x86 by default. if (getToolChain().getArch() == llvm::Triple::x86 || getToolChain().getArch() == llvm::Triple::x86_64 || Args.hasArg(options::OPT_force__cpusubtype__ALL)) CmdArgs.push_back("-force_cpusubtype_ALL"); if (getToolChain().getArch() != llvm::Triple::x86_64 && (((Args.hasArg(options::OPT_mkernel) || Args.hasArg(options::OPT_fapple_kext)) && getMachOToolChain().isKernelStatic()) || Args.hasArg(options::OPT_static))) CmdArgs.push_back("-static"); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); // asm_final spec is empty. const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::MachOTool::anchor() {} void darwin::MachOTool::AddMachOArch(const ArgList &Args, ArgStringList &CmdArgs) const { StringRef ArchName = getMachOToolChain().getMachOArchName(Args); // Derived from darwin_arch spec. CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(ArchName)); // FIXME: Is this needed anymore? if (ArchName == "arm") CmdArgs.push_back("-force_cpusubtype_ALL"); } bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const { // We only need to generate a temp path for LTO if we aren't compiling object // files. When compiling source files, we run 'dsymutil' after linking. We // don't run 'dsymutil' when compiling object files. for (const auto &Input : Inputs) if (Input.getType() != types::TY_Object) return true; return false; } /// \brief Pass -no_deduplicate to ld64 under certain conditions: /// /// - Either -O0 or -O1 is explicitly specified /// - No -O option is specified *and* this is a compile+link (implicit -O0) /// /// Also do *not* add -no_deduplicate when no -O option is specified and this /// is just a link (we can't imply -O0) static bool shouldLinkerNotDedup(bool IsLinkerOnlyAction, const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O0)) return true; if (A->getOption().matches(options::OPT_O)) return llvm::StringSwitch(A->getValue()) .Case("1", true) .Default(false); return false; // OPT_Ofast & OPT_O4 } if (!IsLinkerOnlyAction) // Implicit -O0 for compile+linker only. return true; return false; } void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, ArgStringList &CmdArgs, const InputInfoList &Inputs) const { const Driver &D = getToolChain().getDriver(); const toolchains::MachO &MachOTC = getMachOToolChain(); unsigned Version[5] = {0, 0, 0, 0, 0}; if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { if (!Driver::GetReleaseVersion(A->getValue(), Version)) D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args); } // Newer linkers support -demangle. Pass it if supported and not disabled by // the user. if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("-demangle"); if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137) CmdArgs.push_back("-export_dynamic"); // If we are using App Extension restrictions, pass a flag to the linker // telling it that the compiled code has been audited. if (Args.hasFlag(options::OPT_fapplication_extension, options::OPT_fno_application_extension, false)) CmdArgs.push_back("-application_extension"); if (D.isUsingLTO()) { // If we are using LTO, then automatically create a temporary file path for // the linker to use, so that it's lifetime will extend past a possible // dsymutil step. if (Version[0] >= 116 && NeedsTempPath(Inputs)) { const char *TmpPath = C.getArgs().MakeArgString( D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object))); C.addTempFile(TmpPath); CmdArgs.push_back("-object_path_lto"); CmdArgs.push_back(TmpPath); } } // Use -lto_library option to specify the libLTO.dylib path. Try to find // it in clang installed libraries. ld64 will only look at this argument // when it actually uses LTO, so libLTO.dylib only needs to exist at link // time if ld64 decides that it needs to use LTO. // Since this is passed unconditionally, ld64 will never look for libLTO.dylib // next to it. That's ok since ld64 using a libLTO.dylib not matching the // clang version won't work anyways. if (Version[0] >= 133) { // Search for libLTO in /../lib/libLTO.dylib StringRef P = llvm::sys::path::parent_path(D.Dir); SmallString<128> LibLTOPath(P); llvm::sys::path::append(LibLTOPath, "lib"); llvm::sys::path::append(LibLTOPath, "libLTO.dylib"); CmdArgs.push_back("-lto_library"); CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath)); } // ld64 version 262 and above run the deduplicate pass by default. if (Version[0] >= 262 && shouldLinkerNotDedup(C.getJobs().empty(), Args)) CmdArgs.push_back("-no_deduplicate"); // Derived from the "link" spec. Args.AddAllArgs(CmdArgs, options::OPT_static); if (!Args.hasArg(options::OPT_static)) CmdArgs.push_back("-dynamic"); if (Args.hasArg(options::OPT_fgnu_runtime)) { // FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu // here. How do we wish to handle such things? } if (!Args.hasArg(options::OPT_dynamiclib)) { AddMachOArch(Args, CmdArgs); // FIXME: Why do this only on this path? Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL); Args.AddLastArg(CmdArgs, options::OPT_bundle); Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader); Args.AddAllArgs(CmdArgs, options::OPT_client__name); Arg *A; if ((A = Args.getLastArg(options::OPT_compatibility__version)) || (A = Args.getLastArg(options::OPT_current__version)) || (A = Args.getLastArg(options::OPT_install__name))) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-dynamiclib"; Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace); Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs); Args.AddLastArg(CmdArgs, options::OPT_private__bundle); } else { CmdArgs.push_back("-dylib"); Arg *A; if ((A = Args.getLastArg(options::OPT_bundle)) || (A = Args.getLastArg(options::OPT_bundle__loader)) || (A = Args.getLastArg(options::OPT_client__name)) || (A = Args.getLastArg(options::OPT_force__flat__namespace)) || (A = Args.getLastArg(options::OPT_keep__private__externs)) || (A = Args.getLastArg(options::OPT_private__bundle))) D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << "-dynamiclib"; Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version, "-dylib_compatibility_version"); Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version, "-dylib_current_version"); AddMachOArch(Args, CmdArgs); Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name, "-dylib_install_name"); } Args.AddLastArg(CmdArgs, options::OPT_all__load); Args.AddAllArgs(CmdArgs, options::OPT_allowable__client); Args.AddLastArg(CmdArgs, options::OPT_bind__at__load); if (MachOTC.isTargetIOSBased()) Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal); Args.AddLastArg(CmdArgs, options::OPT_dead__strip); Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms); Args.AddAllArgs(CmdArgs, options::OPT_dylib__file); Args.AddLastArg(CmdArgs, options::OPT_dynamic); Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list); Args.AddLastArg(CmdArgs, options::OPT_flat__namespace); Args.AddAllArgs(CmdArgs, options::OPT_force__load); Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names); Args.AddAllArgs(CmdArgs, options::OPT_image__base); Args.AddAllArgs(CmdArgs, options::OPT_init); // Add the deployment target. MachOTC.addMinVersionArgs(Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_nomultidefs); Args.AddLastArg(CmdArgs, options::OPT_multi__module); Args.AddLastArg(CmdArgs, options::OPT_single__module); Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined); Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused); if (const Arg *A = Args.getLastArg(options::OPT_fpie, options::OPT_fPIE, options::OPT_fno_pie, options::OPT_fno_PIE)) { if (A->getOption().matches(options::OPT_fpie) || A->getOption().matches(options::OPT_fPIE)) CmdArgs.push_back("-pie"); else CmdArgs.push_back("-no_pie"); } // for embed-bitcode, use -bitcode_bundle in linker command if (C.getDriver().embedBitcodeEnabled()) { // Check if the toolchain supports bitcode build flow. if (MachOTC.SupportsEmbeddedBitcode()) CmdArgs.push_back("-bitcode_bundle"); else D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain); } Args.AddLastArg(CmdArgs, options::OPT_prebind); Args.AddLastArg(CmdArgs, options::OPT_noprebind); Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding); Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules); Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs); Args.AddAllArgs(CmdArgs, options::OPT_sectcreate); Args.AddAllArgs(CmdArgs, options::OPT_sectorder); Args.AddAllArgs(CmdArgs, options::OPT_seg1addr); Args.AddAllArgs(CmdArgs, options::OPT_segprot); Args.AddAllArgs(CmdArgs, options::OPT_segaddr); Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr); Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr); Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table); Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename); Args.AddAllArgs(CmdArgs, options::OPT_sub__library); Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella); // Give --sysroot= preference, over the Apple specific behavior to also use // --isysroot as the syslibroot. StringRef sysroot = C.getSysRoot(); if (sysroot != "") { CmdArgs.push_back("-syslibroot"); CmdArgs.push_back(C.getArgs().MakeArgString(sysroot)); } else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) { CmdArgs.push_back("-syslibroot"); CmdArgs.push_back(A->getValue()); } Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace); Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints); Args.AddAllArgs(CmdArgs, options::OPT_umbrella); Args.AddAllArgs(CmdArgs, options::OPT_undefined); Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list); Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches); Args.AddLastArg(CmdArgs, options::OPT_X_Flag); Args.AddAllArgs(CmdArgs, options::OPT_y); Args.AddLastArg(CmdArgs, options::OPT_w); Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size); Args.AddAllArgs(CmdArgs, options::OPT_segs__read__); Args.AddLastArg(CmdArgs, options::OPT_seglinkedit); Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit); Args.AddAllArgs(CmdArgs, options::OPT_sectalign); Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols); Args.AddAllArgs(CmdArgs, options::OPT_segcreate); Args.AddLastArg(CmdArgs, options::OPT_whyload); Args.AddLastArg(CmdArgs, options::OPT_whatsloaded); Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name); Args.AddLastArg(CmdArgs, options::OPT_dylinker); Args.AddLastArg(CmdArgs, options::OPT_Mach); } void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { assert(Output.getType() == types::TY_Image && "Invalid linker output type."); // If the number of arguments surpasses the system limits, we will encode the // input files in a separate file, shortening the command line. To this end, // build a list of input file names that can be passed via a file with the // -filelist linker option. llvm::opt::ArgStringList InputFileList; // The logic here is derived from gcc's behavior; most of which // comes from specs (starting with link_command). Consult gcc for // more information. ArgStringList CmdArgs; /// Hack(tm) to ignore linking errors when we are doing ARC migration. if (Args.hasArg(options::OPT_ccc_arcmt_check, options::OPT_ccc_arcmt_migrate)) { for (const auto &Arg : Args) Arg->claim(); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("touch")); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, None)); return; } // I'm not sure why this particular decomposition exists in gcc, but // we follow suite for ease of comparison. AddLinkArgs(C, Args, CmdArgs, Inputs); // For LTO, pass the name of the optimization record file. if (Args.hasFlag(options::OPT_fsave_optimization_record, options::OPT_fno_save_optimization_record, false)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-lto-pass-remarks-output"); CmdArgs.push_back("-mllvm"); SmallString<128> F; F = Output.getFilename(); F += ".opt.yaml"; CmdArgs.push_back(Args.MakeArgString(F)); if (getLastProfileUseArg(Args)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-lto-pass-remarks-with-hotness"); } } // It seems that the 'e' option is completely ignored for dynamic executables // (the default), and with static executables, the last one wins, as expected. Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_u_Group, options::OPT_e, options::OPT_r}); // Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading // members of static archive libraries which implement Objective-C classes or // categories. if (Args.hasArg(options::OPT_ObjC) || Args.hasArg(options::OPT_ObjCXX)) CmdArgs.push_back("-ObjC"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs); // SafeStack requires its own runtime libraries // These libraries should be linked first, to make sure the // __safestack_init constructor executes before everything else if (getToolChain().getSanitizerArgs().needsSafeStackRt()) { getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.safestack_osx.a", /*AlwaysLink=*/true); } Args.AddAllArgs(CmdArgs, options::OPT_L); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); // Build the input file for -filelist (list of linker input files) in case we // need it later for (const auto &II : Inputs) { if (!II.isFilename()) { // This is a linker input argument. // We cannot mix input arguments and file names in a -filelist input, thus // we prematurely stop our list (remaining files shall be passed as // arguments). if (InputFileList.size() > 0) break; continue; } InputFileList.push_back(II.getFilename()); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) addOpenMPRuntime(CmdArgs, getToolChain(), Args); if (isObjCRuntimeLinked(Args) && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { // We use arclite library for both ARC and subscripting support. getMachOToolChain().AddLinkARCArgs(Args, CmdArgs); CmdArgs.push_back("-framework"); CmdArgs.push_back("Foundation"); // Link libobj. CmdArgs.push_back("-lobjc"); } if (LinkingOutput) { CmdArgs.push_back("-arch_multiple"); CmdArgs.push_back("-final_output"); CmdArgs.push_back(LinkingOutput); } if (Args.hasArg(options::OPT_fnested_functions)) CmdArgs.push_back("-allow_stack_execute"); getMachOToolChain().addProfileRTLibs(Args, CmdArgs); if (unsigned Parallelism = getLTOParallelism(Args, getToolChain().getDriver())) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back( Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism))); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (getToolChain().getDriver().CCCIsCXX()) getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); // link_ssp spec is empty. // Let the tool chain choose which runtime library to link. getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { // endfile_spec is empty. } Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_F); // -iframework should be forwarded as -F. for (const Arg *A : Args.filtered(options::OPT_iframework)) CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue())); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (Arg *A = Args.getLastArg(options::OPT_fveclib)) { if (A->getValue() == StringRef("Accelerate")) { CmdArgs.push_back("-framework"); CmdArgs.push_back("Accelerate"); } } } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); std::unique_ptr Cmd = llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs); Cmd->setInputFileList(std::move(InputFileList)); C.addCommand(std::move(Cmd)); } void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("-create"); assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-output"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) { assert(II.isFilename() && "Unexpected lipo input."); CmdArgs.push_back(II.getFilename()); } const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); const InputInfo &Input = Inputs[0]; assert(Input.isFilename() && "Unexpected dsymutil input."); CmdArgs.push_back(Input.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dsymutil")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("--verify"); CmdArgs.push_back("--debug-info"); CmdArgs.push_back("--eh-frame"); CmdArgs.push_back("--quiet"); assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); const InputInfo &Input = Inputs[0]; assert(Input.isFilename() && "Unexpected verify input"); // Grabbing the output of the earlier dsymutil run. CmdArgs.push_back(Input.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; // Demangle C++ names in errors CmdArgs.push_back("-C"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { CmdArgs.push_back("-e"); CmdArgs.push_back("_start"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); CmdArgs.push_back("-dn"); } else { CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } else { CmdArgs.push_back("--dynamic-linker"); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("ld.so.1"))); } } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("values-Xa.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } getToolChain().AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e, options::OPT_r}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (getToolChain().getDriver().CCCIsCXX()) getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("-lc"); if (!Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lm"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); } CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; switch (getToolChain().getArch()) { case llvm::Triple::x86: // When building 32-bit code on OpenBSD/amd64, we have to explicitly // instruct as in the base system to assemble 32-bit code. CmdArgs.push_back("--32"); break; case llvm::Triple::ppc: CmdArgs.push_back("-mppc"); CmdArgs.push_back("-many"); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); CmdArgs.push_back("-mabi"); CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } default: break; } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else if (getToolChain().getArch() == llvm::Triple::mips64el) CmdArgs.push_back("-EL"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { CmdArgs.push_back("-e"); CmdArgs.push_back("__start"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld.so"); } } if (Args.hasArg(options::OPT_nopie)) CmdArgs.push_back("-nopie"); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o"))); + else if (Args.hasArg(options::OPT_static) && + !Args.hasArg(options::OPT_nopie)) + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("rcrt0.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } else { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); } } std::string Triple = getToolChain().getTripleString(); if (Triple.substr(0, 6) == "x86_64") Triple.replace(0, 6, "amd64"); CmdArgs.push_back( Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1")); Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } // FIXME: For some reason GCC passes -lgcc before adding // the default system libraries. Just mimic this for now. CmdArgs.push_back("-lgcc"); if (Args.hasArg(options::OPT_pthread)) { if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lc_p"); else CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lgcc"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { CmdArgs.push_back("-e"); CmdArgs.push_back("__start"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld.so"); } } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } else { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); } } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } if (Args.hasArg(options::OPT_pthread)) { if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lc_p"); else CmdArgs.push_back("-lc"); } StringRef MyArch; switch (getToolChain().getArch()) { case llvm::Triple::arm: MyArch = "arm"; break; case llvm::Triple::x86: MyArch = "i386"; break; case llvm::Triple::x86_64: MyArch = "amd64"; break; default: llvm_unreachable("Unsupported architecture"); } CmdArgs.push_back(Args.MakeArgString("-lclang_rt." + MyArch)); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; // When building 32-bit code on FreeBSD/amd64, we have to explicitly // instruct as in the base system to assemble 32-bit code. switch (getToolChain().getArch()) { default: break; case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::ppc: CmdArgs.push_back("-a32"); break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); if (Arg *A = Args.getLastArg(options::OPT_G)) { StringRef v = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-G" + v)); A->claim(); } AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args); if (ABI == arm::FloatABI::Hard) CmdArgs.push_back("-mfpu=vfp"); else CmdArgs.push_back("-mfpu=softvfp"); switch (getToolChain().getTriple().getEnvironment()) { case llvm::Triple::GNUEABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::EABI: CmdArgs.push_back("-meabi=5"); break; default: CmdArgs.push_back("-matpcs"); } break; } case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: { std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::FreeBSD &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool IsPIE = !Args.hasArg(options::OPT_shared) && (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault()); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (IsPIE) CmdArgs.push_back("-pie"); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld-elf.so.1"); } if (ToolChain.getTriple().getOSMajorVersion() >= 9) { if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc || Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) { CmdArgs.push_back("--hash-style=both"); } } CmdArgs.push_back("--enable-new-dtags"); } // When building 32-bit code on FreeBSD/amd64, we have to explicitly // instruct ld in the base system to link 32-bit code. if (Arch == llvm::Triple::x86) { CmdArgs.push_back("-m"); CmdArgs.push_back("elf_i386_fbsd"); } if (Arch == llvm::Triple::ppc) { CmdArgs.push_back("-m"); CmdArgs.push_back("elf32ppc_fbsd"); } if (Arg *A = Args.getLastArg(options::OPT_G)) { if (ToolChain.getArch() == llvm::Triple::mips || ToolChain.getArch() == llvm::Triple::mipsel || ToolChain.getArch() == llvm::Triple::mips64 || ToolChain.getArch() == llvm::Triple::mips64el) { StringRef v = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-G" + v)); A->claim(); } } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (IsPIE) crt1 = "Scrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); const char *crtbegin = nullptr; if (Args.hasArg(options::OPT_static)) crtbegin = "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared) || IsPIE) crtbegin = "crtbeginS.o"; else crtbegin = "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); Args.AddAllArgs(CmdArgs, options::OPT_r); if (D.isUsingLTO()) AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { addOpenMPRuntime(CmdArgs, ToolChain, Args); if (D.CCCIsCXX()) { ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(ToolChain, CmdArgs); // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding // the default system libraries. Just mimic this for now. if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lgcc_p"); else CmdArgs.push_back("-lgcc"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lgcc_eh"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); } if (Args.hasArg(options::OPT_pthread)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (Args.hasArg(options::OPT_pg)) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lc"); else CmdArgs.push_back("-lc_p"); CmdArgs.push_back("-lgcc_p"); } else { CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgcc"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lgcc_eh"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || IsPIE) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o"))); else CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } ToolChain.addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; // GNU as needs different flags for creating the correct output format // on architectures with different ABIs or optional feature sets. switch (getToolChain().getArch()) { case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { StringRef MArch, MCPU; getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true); std::string Arch = arm::getARMTargetCPU(MCPU, MArch, getToolChain().getTriple()); CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch)); break; } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } default: break; } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as"))); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); } else { Args.AddAllArgs(CmdArgs, options::OPT_pie); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld.elf_so"); } } // Many NetBSD architectures support more than one ABI. // Determine the correct emulation for ld. switch (getToolChain().getArch()) { case llvm::Triple::x86: CmdArgs.push_back("-m"); CmdArgs.push_back("elf_i386"); break; case llvm::Triple::arm: case llvm::Triple::thumb: CmdArgs.push_back("-m"); switch (getToolChain().getTriple().getEnvironment()) { case llvm::Triple::EABI: case llvm::Triple::GNUEABI: CmdArgs.push_back("armelf_nbsd_eabi"); break; case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: CmdArgs.push_back("armelf_nbsd_eabihf"); break; default: CmdArgs.push_back("armelf_nbsd"); break; } break; case llvm::Triple::armeb: case llvm::Triple::thumbeb: arm::appendEBLinkFlags(Args, CmdArgs, getToolChain().getEffectiveTriple()); CmdArgs.push_back("-m"); switch (getToolChain().getTriple().getEnvironment()) { case llvm::Triple::EABI: case llvm::Triple::GNUEABI: CmdArgs.push_back("armelfb_nbsd_eabi"); break; case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: CmdArgs.push_back("armelfb_nbsd_eabihf"); break; default: CmdArgs.push_back("armelfb_nbsd"); break; } break; case llvm::Triple::mips64: case llvm::Triple::mips64el: if (mips::hasMipsAbiArg(Args, "32")) { CmdArgs.push_back("-m"); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("elf32btsmip"); else CmdArgs.push_back("elf32ltsmip"); } else if (mips::hasMipsAbiArg(Args, "64")) { CmdArgs.push_back("-m"); if (getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("elf64btsmip"); else CmdArgs.push_back("elf64ltsmip"); } break; case llvm::Triple::ppc: CmdArgs.push_back("-m"); CmdArgs.push_back("elf32ppc_nbsd"); break; case llvm::Triple::ppc64: case llvm::Triple::ppc64le: CmdArgs.push_back("-m"); CmdArgs.push_back("elf64ppc"); break; case llvm::Triple::sparc: CmdArgs.push_back("-m"); CmdArgs.push_back("elf32_sparc"); break; case llvm::Triple::sparcv9: CmdArgs.push_back("-m"); CmdArgs.push_back("elf64_sparc"); break; default: break; } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); } CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); } else { CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); Args.AddAllArgs(CmdArgs, options::OPT_r); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); unsigned Major, Minor, Micro; getToolChain().getTriple().getOSVersion(Major, Minor, Micro); bool useLibgcc = true; if (Major >= 7 || Major == 0) { switch (getToolChain().getArch()) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: case llvm::Triple::sparc: case llvm::Triple::sparcv9: case llvm::Triple::x86: case llvm::Triple::x86_64: useLibgcc = false; break; default: break; } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { addOpenMPRuntime(CmdArgs, getToolChain(), Args); if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); if (useLibgcc) { if (Args.hasArg(options::OPT_static)) { // libgcc_eh depends on libc, so resolve as much as possible, // pull in any new requirements from libc and then get the rest // of libgcc. CmdArgs.push_back("-lgcc_eh"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgcc"); } else { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); } } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Args); switch (getToolChain().getArch()) { default: break; // Add --32/--64 to make sure we get the format we want. // This is incomplete case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::x86_64: if (getToolChain().getTriple().getEnvironment() == llvm::Triple::GNUX32) CmdArgs.push_back("--x32"); else CmdArgs.push_back("--64"); break; case llvm::Triple::ppc: CmdArgs.push_back("-a32"); CmdArgs.push_back("-mppc"); CmdArgs.push_back("-many"); break; case llvm::Triple::ppc64: CmdArgs.push_back("-a64"); CmdArgs.push_back("-mppc64"); CmdArgs.push_back("-many"); break; case llvm::Triple::ppc64le: CmdArgs.push_back("-a64"); CmdArgs.push_back("-mppc64"); CmdArgs.push_back("-many"); CmdArgs.push_back("-mlittle-endian"); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(Args, getToolChain().getTriple()); CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { const llvm::Triple &Triple2 = getToolChain().getTriple(); switch (Triple2.getSubArch()) { case llvm::Triple::ARMSubArch_v7: CmdArgs.push_back("-mfpu=neon"); break; case llvm::Triple::ARMSubArch_v8: CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8"); break; default: break; } switch (arm::getARMFloatABI(getToolChain(), Args)) { case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!"); case arm::FloatABI::Soft: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft")); break; case arm::FloatABI::SoftFP: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp")); break; case arm::FloatABI::Hard: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard")); break; } Args.AddLastArg(CmdArgs, options::OPT_march_EQ); // FIXME: remove krait check when GNU tools support krait cpu // for now replace it with -mcpu=cortex-a15 to avoid a lower // march from being picked in the absence of a cpu flag. Arg *A; if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) && StringRef(A->getValue()).equals_lower("krait")) CmdArgs.push_back("-mcpu=cortex-a15"); else Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ); break; } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ABIName = getGnuCompatibleMipsABIName(ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(ABIName.data()); // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, // or -mshared (not implemented) is in effect. if (RelocationModel == llvm::Reloc::Static) CmdArgs.push_back("-mno-shared"); // LLVM doesn't support -mplt yet and acts as if it is always given. // However, -mplt has no effect with the N64 ABI. CmdArgs.push_back(ABIName == "64" ? "-KPIC" : "-call_nonpic"); if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mips64) CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { if (StringRef(A->getValue()) == "2008") CmdArgs.push_back(Args.MakeArgString("-mnan=2008")); } // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default. if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, options::OPT_mfp64)) { A->claim(); A->render(Args, CmdArgs); } else if (mips::shouldUseFPXX( Args, getToolChain().getTriple(), CPUName, ABIName, getMipsFloatABI(getToolChain().getDriver(), Args))) CmdArgs.push_back("-mfpxx"); // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of // -mno-mips16 is actually -no-mips16. if (Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) { if (A->getOption().matches(options::OPT_mips16)) { A->claim(); A->render(Args, CmdArgs); } else { A->claim(); CmdArgs.push_back("-no-mips16"); } } Args.AddLastArg(CmdArgs, options::OPT_mmicromips, options::OPT_mno_micromips); Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp); Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2); if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) { // Do not use AddLastArg because not all versions of MIPS assembler // support -mmsa / -mno-msa options. if (A->getOption().matches(options::OPT_mmsa)) CmdArgs.push_back(Args.MakeArgString("-mmsa")); } Args.AddLastArg(CmdArgs, options::OPT_mhard_float, options::OPT_msoft_float); Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, options::OPT_msingle_float); Args.AddLastArg(CmdArgs, options::OPT_modd_spreg, options::OPT_mno_odd_spreg); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::systemz: { // Always pass an -march option, since our default of z10 is later // than the GNU assembler's default. StringRef CPUName = getSystemZTargetCPU(Args); CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName)); break; } } Args.AddAllArgs(CmdArgs, options::OPT_I); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (Args.hasArg(options::OPT_gsplit_dwarf) && getToolChain().getTriple().isOSLinux()) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(Args, Inputs[0])); } static void AddLibgcc(const llvm::Triple &Triple, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { bool isAndroid = Triple.isAndroid(); bool isCygMing = Triple.isOSCygMing(); bool IsIAMCU = Triple.isOSIAMCU(); bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) || Args.hasArg(options::OPT_static); if (!D.CCCIsCXX()) CmdArgs.push_back("-lgcc"); if (StaticLibgcc || isAndroid) { if (D.CCCIsCXX()) CmdArgs.push_back("-lgcc"); } else { if (!D.CCCIsCXX() && !isCygMing) CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_s"); if (!D.CCCIsCXX() && !isCygMing) CmdArgs.push_back("--no-as-needed"); } if (StaticLibgcc && !isAndroid && !IsIAMCU) CmdArgs.push_back("-lgcc_eh"); else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX()) CmdArgs.push_back("-lgcc"); // According to Android ABI, we have to link with libdl if we are // linking with non-static libgcc. // // NOTE: This fixes a link error on Android MIPS as well. The non-static // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl. if (isAndroid && !StaticLibgcc) CmdArgs.push_back("-ldl"); } static void AddRunTimeLibs(const ToolChain &TC, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { // Make use of compiler-rt if --rtlib option is used ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args); switch (RLT) { case ToolChain::RLT_CompilerRT: switch (TC.getTriple().getOS()) { default: llvm_unreachable("unsupported OS"); case llvm::Triple::Win32: case llvm::Triple::Linux: case llvm::Triple::Fuchsia: addClangRT(TC, Args, CmdArgs); break; } break; case ToolChain::RLT_Libgcc: // Make sure libgcc is not used under MSVC environment by default if (TC.getTriple().isKnownWindowsMSVCEnvironment()) { // Issue error diagnostic if libgcc is explicitly specified // through command line as --rtlib option argument. if (Args.hasArg(options::OPT_rtlib_EQ)) { TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform) << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC"; } } else AddLibgcc(TC.getTriple(), D, CmdArgs, Args); break; } } static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { switch (T.getArch()) { case llvm::Triple::x86: if (T.isOSIAMCU()) return "elf_iamcu"; return "elf_i386"; case llvm::Triple::aarch64: return "aarch64linux"; case llvm::Triple::aarch64_be: return "aarch64_be_linux"; case llvm::Triple::arm: case llvm::Triple::thumb: return "armelf_linux_eabi"; case llvm::Triple::armeb: case llvm::Triple::thumbeb: return "armelfb_linux_eabi"; case llvm::Triple::ppc: return "elf32ppclinux"; case llvm::Triple::ppc64: return "elf64ppc"; case llvm::Triple::ppc64le: return "elf64lppc"; case llvm::Triple::sparc: case llvm::Triple::sparcel: return "elf32_sparc"; case llvm::Triple::sparcv9: return "elf64_sparc"; case llvm::Triple::mips: return "elf32btsmip"; case llvm::Triple::mipsel: return "elf32ltsmip"; case llvm::Triple::mips64: if (mips::hasMipsAbiArg(Args, "n32")) return "elf32btsmipn32"; return "elf64btsmip"; case llvm::Triple::mips64el: if (mips::hasMipsAbiArg(Args, "n32")) return "elf32ltsmipn32"; return "elf64ltsmip"; case llvm::Triple::systemz: return "elf64_s390"; case llvm::Triple::x86_64: if (T.getEnvironment() == llvm::Triple::GNUX32) return "elf32_x86_64"; return "elf_x86_64"; default: return nullptr; } } void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::Linux &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool isAndroid = ToolChain.getTriple().isAndroid(); const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU(); const bool IsPIE = !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) && (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault()); const bool HasCRTBeginEndFiles = ToolChain.getTriple().hasEnvironment() || (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); if (llvm::sys::path::filename(Exec) == "lld") { CmdArgs.push_back("-flavor"); CmdArgs.push_back("old-gnu"); CmdArgs.push_back("-target"); CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString())); } if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (IsPIE) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb) arm::appendEBLinkFlags(Args, CmdArgs, Triple); // Most Android ARM64 targets should enable the linker fix for erratum // 843419. Only non-Cortex-A53 devices are allowed to skip this flag. if (Arch == llvm::Triple::aarch64 && isAndroid) { std::string CPU = getCPUName(Args, Triple); if (CPU.empty() || CPU == "generic" || CPU == "cortex-a53") CmdArgs.push_back("--fix-cortex-a53-843419"); } for (const auto &Opt : ToolChain.ExtraOpts) CmdArgs.push_back(Opt.c_str()); if (!Args.hasArg(options::OPT_static)) { CmdArgs.push_back("--eh-frame-hdr"); } if (const char *LDMOption = getLDMOption(ToolChain.getTriple(), Args)) { CmdArgs.push_back("-m"); CmdArgs.push_back(LDMOption); } else { D.Diag(diag::err_target_unknown_triple) << Triple.str(); return; } if (Args.hasArg(options::OPT_static)) { if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb) CmdArgs.push_back("-Bstatic"); else CmdArgs.push_back("-static"); } else if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); } if (!Args.hasArg(options::OPT_static)) { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (!Args.hasArg(options::OPT_shared)) { const std::string Loader = D.DyldPrefix + ToolChain.getDynamicLinker(Args); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back(Args.MakeArgString(Loader)); } } CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!isAndroid && !IsIAMCU) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (IsPIE) crt1 = "Scrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); } if (IsIAMCU) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); else { const char *crtbegin; if (Args.hasArg(options::OPT_static)) crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared)) crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o"; else if (IsPIE) crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o"; else crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o"; if (HasCRTBeginEndFiles) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } // Add crtfastmath.o if available and fast math is enabled. ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs); } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_u); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (D.isUsingLTO()) AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); // The profile runtime also needs access to system libraries. getToolChain().addProfileRTLibs(Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("-lm"); } // Silence warnings when linking C code with a C++ '-stdlib' argument. Args.ClaimAllArgs(options::OPT_stdlib_EQ); if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--start-group"); if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(ToolChain, CmdArgs); if (NeedsXRayDeps) linkXRayRuntimeDeps(ToolChain, Args, CmdArgs); bool WantPthread = Args.hasArg(options::OPT_pthread) || Args.hasArg(options::OPT_pthreads); if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) { // OpenMP runtimes implies pthreads when using the GNU toolchain. // FIXME: Does this really make sense for all GNU toolchains? WantPthread = true; // Also link the particular OpenMP runtimes. switch (ToolChain.getDriver().getOpenMPRuntime(Args)) { case Driver::OMPRT_OMP: CmdArgs.push_back("-lomp"); break; case Driver::OMPRT_GOMP: CmdArgs.push_back("-lgomp"); // FIXME: Exclude this for platforms with libgomp that don't require // librt. Most modern Linux platforms require it, but some may not. CmdArgs.push_back("-lrt"); break; case Driver::OMPRT_IOMP5: CmdArgs.push_back("-liomp5"); break; case Driver::OMPRT_Unknown: // Already diagnosed. break; } if (JA.isHostOffloading(Action::OFK_OpenMP)) CmdArgs.push_back("-lomptarget"); } AddRunTimeLibs(ToolChain, D, CmdArgs, Args); if (WantPthread && !isAndroid) CmdArgs.push_back("-lpthread"); if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("--wrap=pthread_create"); CmdArgs.push_back("-lc"); // Add IAMCU specific libs, if needed. if (IsIAMCU) CmdArgs.push_back("-lgloss"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--end-group"); else AddRunTimeLibs(ToolChain, D, CmdArgs, Args); // Add IAMCU specific libs (outside the group), if needed. if (IsIAMCU) { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lsoftfp"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) { const char *crtend; if (Args.hasArg(options::OPT_shared)) crtend = isAndroid ? "crtend_so.o" : "crtendS.o"; else if (IsPIE) crtend = isAndroid ? "crtend_android.o" : "crtendS.o"; else crtend = isAndroid ? "crtend_android.o" : "crtend.o"; if (HasCRTBeginEndFiles) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); if (!isAndroid) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } } // Add OpenMP offloading linker script args if required. AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // NaCl ARM assembly (inline or standalone) can be written with a set of macros // for the various SFI requirements like register masking. The assembly tool // inserts the file containing the macros as an input into all the assembly // jobs. void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::NaClToolChain &ToolChain = static_cast(getToolChain()); InputInfo NaClMacros(types::TY_PP_Asm, ToolChain.GetNaClArmMacrosPath(), "nacl-arm-macros.s"); InputInfoList NewInputs; NewInputs.push_back(NaClMacros); NewInputs.append(Inputs.begin(), Inputs.end()); gnutools::Assembler::ConstructJob(C, JA, Output, NewInputs, Args, LinkingOutput); } // This is quite similar to gnutools::Linker::ConstructJob with changes that // we use static by default, do not yet support sanitizers or LTO, and a few // others. Eventually we can support more of that and hopefully migrate back // to gnutools::Linker. void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::NaClToolChain &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool IsStatic = !Args.hasArg(options::OPT_dynamic) && !Args.hasArg(options::OPT_shared); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); // NaClToolChain doesn't have ExtraOpts like Linux; the only relevant flag // from there is --build-id, which we do want. CmdArgs.push_back("--build-id"); if (!IsStatic) CmdArgs.push_back("--eh-frame-hdr"); CmdArgs.push_back("-m"); if (Arch == llvm::Triple::x86) CmdArgs.push_back("elf_i386_nacl"); else if (Arch == llvm::Triple::arm) CmdArgs.push_back("armelf_nacl"); else if (Arch == llvm::Triple::x86_64) CmdArgs.push_back("elf_x86_64_nacl"); else if (Arch == llvm::Triple::mipsel) CmdArgs.push_back("mipselelf_nacl"); else D.Diag(diag::err_target_unsupported_arch) << ToolChain.getArchName() << "Native Client"; if (IsStatic) CmdArgs.push_back("-static"); else if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); const char *crtbegin; if (IsStatic) crtbegin = "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared)) crtbegin = "crtbeginS.o"; else crtbegin = "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_u); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !IsStatic; if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("-lm"); } if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { // Always use groups, since it has no effect on dynamic libraries. CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); // NaCl's libc++ currently requires libpthread, so just always include it // in the group for C++. if (Args.hasArg(options::OPT_pthread) || Args.hasArg(options::OPT_pthreads) || D.CCCIsCXX()) { // Gold, used by Mips, handles nested groups differently than ld, and // without '-lnacl' it prefers symbols from libpthread.a over libnacl.a, // which is not a desired behaviour here. // See https://sourceware.org/ml/binutils/2015-03/msg00034.html if (getToolChain().getArch() == llvm::Triple::mipsel) CmdArgs.push_back("-lnacl"); CmdArgs.push_back("-lpthread"); } CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--as-needed"); if (IsStatic) CmdArgs.push_back("-lgcc_eh"); else CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("--no-as-needed"); // Mips needs to create and use pnacl_legacy library that contains // definitions from bitcode/pnaclmm.c and definitions for // __nacl_tp_tls_offset() and __nacl_tp_tdb_offset(). if (getToolChain().getArch() == llvm::Triple::mipsel) CmdArgs.push_back("-lpnacl_legacy"); CmdArgs.push_back("--end-group"); } if (!Args.hasArg(options::OPT_nostartfiles)) { const char *crtend; if (Args.hasArg(options::OPT_shared)) crtend = "crtendS.o"; else crtend = "crtend.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::Fuchsia &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); if (llvm::sys::path::stem(Exec).equals_lower("lld")) { CmdArgs.push_back("-flavor"); CmdArgs.push_back("gnu"); } if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (!Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_r)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); if (Args.hasArg(options::OPT_r)) CmdArgs.push_back("-r"); else CmdArgs.push_back("--build-id"); if (!Args.hasArg(options::OPT_static)) CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bstatic"); else if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-shared"); if (!Args.hasArg(options::OPT_static)) { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (!Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back(Args.MakeArgString(D.DyldPrefix + "ld.so.1")); } } CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o"))); } } Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_u); ToolChain.AddFilePathLibArgs(Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bdynamic"); if (D.CCCIsCXX()) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("-lm"); } AddRunTimeLibs(ToolChain, D, CmdArgs, Args); if (Args.hasArg(options::OPT_pthread) || Args.hasArg(options::OPT_pthreads)) CmdArgs.push_back("-lpthread"); if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("--wrap=pthread_create"); CmdArgs.push_back("-lc"); } C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); getToolChain().addProfileRTLibs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lCompilerRT-Generic"); CmdArgs.push_back("-L/usr/pkg/compiler-rt/lib"); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } /// DragonFly Tools // For now, DragonFly Assemble does just about the same as for // FreeBSD, but this may change soon. void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; // When building 32-bit code on DragonFly/pc64, we have to explicitly // instruct as in the base system to assemble 32-bit code. if (getToolChain().getArch() == llvm::Triple::x86) CmdArgs.push_back("--32"); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-Bshareable"); else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/usr/libexec/ld-elf.so.2"); } CmdArgs.push_back("--hash-style=gnu"); CmdArgs.push_back("--enable-new-dtags"); } // When building 32-bit code on DragonFly/pc64, we have to explicitly // instruct ld in the base system to link 32-bit code. if (getToolChain().getArch() == llvm::Triple::x86) { CmdArgs.push_back("-m"); CmdArgs.push_back("elf_i386"); } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("gcrt1.o"))); else { if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("Scrt1.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); } } CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e}); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { CmdArgs.push_back("-L/usr/lib/gcc50"); if (!Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-rpath"); CmdArgs.push_back("/usr/lib/gcc50"); } if (D.CCCIsCXX()) { getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); CmdArgs.push_back("-lm"); } if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); if (!Args.hasArg(options::OPT_nolibc)) { CmdArgs.push_back("-lc"); } if (Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_libgcc)) { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lgcc_eh"); } else { if (Args.hasArg(options::OPT_shared_libgcc)) { CmdArgs.push_back("-lgcc_pic"); if (!Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lgcc"); } else { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lgcc_pic"); CmdArgs.push_back("--no-as-needed"); } } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); else CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // Try to find Exe from a Visual Studio distribution. This first tries to find // an installed copy of Visual Studio and, failing that, looks in the PATH, // making sure that whatever executable that's found is not a same-named exe // from clang itself to prevent clang from falling back to itself. static std::string FindVisualStudioExecutable(const ToolChain &TC, const char *Exe, const char *ClangProgramPath) { const auto &MSVC = static_cast(TC); std::string visualStudioBinDir; if (MSVC.getVisualStudioBinariesFolder(ClangProgramPath, visualStudioBinDir)) { SmallString<128> FilePath(visualStudioBinDir); llvm::sys::path::append(FilePath, Exe); if (llvm::sys::fs::can_execute(FilePath.c_str())) return FilePath.str(); } return Exe; } void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; const ToolChain &TC = getToolChain(); assert((Output.isFilename() || Output.isNothing()) && "invalid output"); if (Output.isFilename()) CmdArgs.push_back( Args.MakeArgString(std::string("-out:") + Output.getFilename())); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) && !C.getDriver().IsCLMode()) CmdArgs.push_back("-defaultlib:libcmt"); if (!llvm::sys::Process::GetEnv("LIB")) { // If the VC environment hasn't been configured (perhaps because the user // did not run vcvarsall), try to build a consistent link environment. If // the environment variable is set however, assume the user knows what // they're doing. std::string VisualStudioDir; const auto &MSVC = static_cast(TC); if (MSVC.getVisualStudioInstallDir(VisualStudioDir)) { SmallString<128> LibDir(VisualStudioDir); llvm::sys::path::append(LibDir, "VC", "lib"); switch (MSVC.getArch()) { case llvm::Triple::x86: // x86 just puts the libraries directly in lib break; case llvm::Triple::x86_64: llvm::sys::path::append(LibDir, "amd64"); break; case llvm::Triple::arm: llvm::sys::path::append(LibDir, "arm"); break; default: break; } CmdArgs.push_back( Args.MakeArgString(std::string("-libpath:") + LibDir.c_str())); if (MSVC.useUniversalCRT(VisualStudioDir)) { std::string UniversalCRTLibPath; if (MSVC.getUniversalCRTLibraryPath(UniversalCRTLibPath)) CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + UniversalCRTLibPath)); } } std::string WindowsSdkLibPath; if (MSVC.getWindowsSDKLibraryPath(WindowsSdkLibPath)) CmdArgs.push_back( Args.MakeArgString(std::string("-libpath:") + WindowsSdkLibPath)); } if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L)) for (const auto &LibPath : Args.getAllArgValues(options::OPT_L)) CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath)); CmdArgs.push_back("-nologo"); if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7, options::OPT__SLASH_Zd)) CmdArgs.push_back("-debug"); bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd, options::OPT_shared); if (DLL) { CmdArgs.push_back(Args.MakeArgString("-dll")); SmallString<128> ImplibName(Output.getFilename()); llvm::sys::path::replace_extension(ImplibName, "lib"); CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName)); } if (TC.getSanitizerArgs().needsAsanRt()) { CmdArgs.push_back(Args.MakeArgString("-debug")); CmdArgs.push_back(Args.MakeArgString("-incremental:no")); if (TC.getSanitizerArgs().needsSharedAsanRt() || Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) { for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"}) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); // Make sure the dynamic runtime thunk is not optimized out at link time // to ensure proper SEH handling. CmdArgs.push_back(Args.MakeArgString( TC.getArch() == llvm::Triple::x86 ? "-include:___asan_seh_interceptor" : "-include:__asan_seh_interceptor")); } else if (DLL) { CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk")); } else { for (const auto &Lib : {"asan", "asan_cxx"}) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); } } Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link); if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) { CmdArgs.push_back("-nodefaultlib:vcomp.lib"); CmdArgs.push_back("-nodefaultlib:vcompd.lib"); CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + TC.getDriver().Dir + "/../lib")); switch (TC.getDriver().getOpenMPRuntime(Args)) { case Driver::OMPRT_OMP: CmdArgs.push_back("-defaultlib:libomp.lib"); break; case Driver::OMPRT_IOMP5: CmdArgs.push_back("-defaultlib:libiomp5md.lib"); break; case Driver::OMPRT_GOMP: break; case Driver::OMPRT_Unknown: // Already diagnosed. break; } } // Add compiler-rt lib in case if it was explicitly // specified as an argument for --rtlib option. if (!Args.hasArg(options::OPT_nostdlib)) { AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args); } // Add filenames, libraries, and other linker inputs. for (const auto &Input : Inputs) { if (Input.isFilename()) { CmdArgs.push_back(Input.getFilename()); continue; } const Arg &A = Input.getInputArg(); // Render -l options differently for the MSVC linker. if (A.getOption().matches(options::OPT_l)) { StringRef Lib = A.getValue(); const char *LinkLibArg; if (Lib.endswith(".lib")) LinkLibArg = Args.MakeArgString(Lib); else LinkLibArg = Args.MakeArgString(Lib + ".lib"); CmdArgs.push_back(LinkLibArg); continue; } // Otherwise, this is some other kind of linker input option like -Wl, -z, // or -L. Render it, even if MSVC doesn't understand it. A.renderAsInput(Args, CmdArgs); } TC.addProfileRTLibs(Args, CmdArgs); // We need to special case some linker paths. In the case of lld, we need to // translate 'lld' into 'lld-link', and in the case of the regular msvc // linker, we need to use a special search algorithm. llvm::SmallString<128> linkPath; StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link"); if (Linker.equals_lower("lld")) Linker = "lld-link"; if (Linker.equals_lower("link")) { // If we're using the MSVC linker, it's not sufficient to just use link // from the program PATH, because other environments like GnuWin32 install // their own link.exe which may come first. linkPath = FindVisualStudioExecutable(TC, "link.exe", C.getDriver().getClangProgramPath()); } else { linkPath = Linker; llvm::sys::path::replace_extension(linkPath, "exe"); linkPath = TC.GetProgramPath(linkPath.c_str()); } const char *Exec = Args.MakeArgString(linkPath); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput)); } std::unique_ptr visualstudio::Compiler::GetCommand( Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; CmdArgs.push_back("/nologo"); CmdArgs.push_back("/c"); // Compile only. CmdArgs.push_back("/W0"); // No warnings. // The goal is to be able to invoke this tool correctly based on // any flag accepted by clang-cl. // These are spelled the same way in clang and cl.exe,. Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I}); // Optimization level. if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin)) CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi" : "/Oi-"); if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) { if (A->getOption().getID() == options::OPT_O0) { CmdArgs.push_back("/Od"); } else { CmdArgs.push_back("/Og"); StringRef OptLevel = A->getValue(); if (OptLevel == "s" || OptLevel == "z") CmdArgs.push_back("/Os"); else CmdArgs.push_back("/Ot"); CmdArgs.push_back("/Ob2"); } } if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer, options::OPT_fno_omit_frame_pointer)) CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer ? "/Oy" : "/Oy-"); if (!Args.hasArg(options::OPT_fwritable_strings)) CmdArgs.push_back("/GF"); // Flags for which clang-cl has an alias. // FIXME: How can we ensure this stays in sync with relevant clang-cl options? if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, /*default=*/false)) CmdArgs.push_back("/GR-"); if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS, /*default=*/false)) CmdArgs.push_back("/GS-"); if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections, options::OPT_fno_function_sections)) CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections ? "/Gy" : "/Gy-"); if (Arg *A = Args.getLastArg(options::OPT_fdata_sections, options::OPT_fno_data_sections)) CmdArgs.push_back( A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-"); if (Args.hasArg(options::OPT_fsyntax_only)) CmdArgs.push_back("/Zs"); if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only, options::OPT__SLASH_Z7)) CmdArgs.push_back("/Z7"); std::vector Includes = Args.getAllArgValues(options::OPT_include); for (const auto &Include : Includes) CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Include)); // Flags that can simply be passed through. Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH); Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl); // The order of these flags is relevant, so pick the last one. if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd, options::OPT__SLASH_MT, options::OPT__SLASH_MTd)) A->render(Args, CmdArgs); // Use MSVC's default threadsafe statics behaviour unless there was a flag. if (Arg *A = Args.getLastArg(options::OPT_fthreadsafe_statics, options::OPT_fno_threadsafe_statics)) { CmdArgs.push_back(A->getOption().getID() == options::OPT_fthreadsafe_statics ? "/Zc:threadSafeInit" : "/Zc:threadSafeInit-"); } // Pass through all unknown arguments so that the fallback command can see // them too. Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN); // Input filename. assert(Inputs.size() == 1); const InputInfo &II = Inputs[0]; assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX); CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp"); if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else II.getInputArg().renderAsInput(Args, CmdArgs); // Output filename. assert(Output.getType() == types::TY_Object); const char *Fo = Args.MakeArgString(std::string("/Fo") + Output.getFilename()); CmdArgs.push_back(Fo); const Driver &D = getToolChain().getDriver(); std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe", D.getClangProgramPath()); return llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs); } /// MinGW Tools void MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; if (getToolChain().getArch() == llvm::Triple::x86) { CmdArgs.push_back("--32"); } else if (getToolChain().getArch() == llvm::Triple::x86_64) { CmdArgs.push_back("--64"); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); if (Args.hasArg(options::OPT_gsplit_dwarf)) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(Args, Inputs[0])); } void MinGW::Linker::AddLibGCC(const ArgList &Args, ArgStringList &CmdArgs) const { if (Args.hasArg(options::OPT_mthreads)) CmdArgs.push_back("-lmingwthrd"); CmdArgs.push_back("-lmingw32"); // Make use of compiler-rt if --rtlib option is used ToolChain::RuntimeLibType RLT = getToolChain().GetRuntimeLibType(Args); if (RLT == ToolChain::RLT_Libgcc) { bool Static = Args.hasArg(options::OPT_static_libgcc) || Args.hasArg(options::OPT_static); bool Shared = Args.hasArg(options::OPT_shared); bool CXX = getToolChain().getDriver().CCCIsCXX(); if (Static || (!CXX && !Shared)) { CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lgcc_eh"); } else { CmdArgs.push_back("-lgcc_s"); CmdArgs.push_back("-lgcc"); } } else { AddRunTimeLibs(getToolChain(), getToolChain().getDriver(), CmdArgs, Args); } CmdArgs.push_back("-lmoldname"); CmdArgs.push_back("-lmingwex"); CmdArgs.push_back("-lmsvcrt"); } void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const ToolChain &TC = getToolChain(); const Driver &D = TC.getDriver(); // const SanitizerArgs &Sanitize = TC.getSanitizerArgs(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); StringRef LinkerName = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "ld"); if (LinkerName.equals_lower("lld")) { CmdArgs.push_back("-flavor"); CmdArgs.push_back("gnu"); } else if (!LinkerName.equals_lower("ld")) { D.Diag(diag::err_drv_unsupported_linker) << LinkerName; } if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); CmdArgs.push_back("-m"); if (TC.getArch() == llvm::Triple::x86) CmdArgs.push_back("i386pe"); if (TC.getArch() == llvm::Triple::x86_64) CmdArgs.push_back("i386pep"); if (TC.getArch() == llvm::Triple::arm) CmdArgs.push_back("thumb2pe"); if (Args.hasArg(options::OPT_mwindows)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("windows"); } else if (Args.hasArg(options::OPT_mconsole)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("console"); } if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bstatic"); else { if (Args.hasArg(options::OPT_mdll)) CmdArgs.push_back("--dll"); else if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("--shared"); CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-e"); if (TC.getArch() == llvm::Triple::x86) CmdArgs.push_back("_DllMainCRTStartup@12"); else CmdArgs.push_back("DllMainCRTStartup"); CmdArgs.push_back("--enable-auto-image-base"); } } CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); Args.AddAllArgs(CmdArgs, options::OPT_e); // FIXME: add -N, -n flags Args.AddLastArg(CmdArgs, options::OPT_r); Args.AddLastArg(CmdArgs, options::OPT_s); Args.AddLastArg(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_u_Group); Args.AddLastArg(CmdArgs, options::OPT_Z_Flag); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_mdll)) { CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("dllcrt2.o"))); } else { if (Args.hasArg(options::OPT_municode)) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2u.o"))); else CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2.o"))); } if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("gcrt2.o"))); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, options::OPT_L); TC.AddFilePathLibArgs(Args, CmdArgs); AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); // TODO: Add ASan stuff here // TODO: Add profile stuff here if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); TC.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); } if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--start-group"); if (Args.hasArg(options::OPT_fstack_protector) || Args.hasArg(options::OPT_fstack_protector_strong) || Args.hasArg(options::OPT_fstack_protector_all)) { CmdArgs.push_back("-lssp_nonshared"); CmdArgs.push_back("-lssp"); } if (Args.hasArg(options::OPT_fopenmp)) CmdArgs.push_back("-lgomp"); AddLibGCC(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lgmon"); if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); // add system libraries if (Args.hasArg(options::OPT_mwindows)) { CmdArgs.push_back("-lgdi32"); CmdArgs.push_back("-lcomdlg32"); } CmdArgs.push_back("-ladvapi32"); CmdArgs.push_back("-lshell32"); CmdArgs.push_back("-luser32"); CmdArgs.push_back("-lkernel32"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("--end-group"); else if (!LinkerName.equals_lower("lld")) AddLibGCC(Args, CmdArgs); } if (!Args.hasArg(options::OPT_nostartfiles)) { // Add crtfastmath.o if available and fast math is enabled. TC.AddFastMathRuntimeIfAvailable(Args, CmdArgs); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o"))); } } const char *Exec = Args.MakeArgString(TC.GetProgramPath(LinkerName.data())); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } /// XCore Tools // We pass assemble and link construction to the xcc tool. void XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); CmdArgs.push_back("-c"); if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); if (Arg *A = Args.getLastArg(options::OPT_g_Group)) if (!A->getOption().matches(options::OPT_g0)) CmdArgs.push_back("-g"); if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, false)) CmdArgs.push_back("-fverbose-asm"); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } if (Args.hasArg(options::OPT_v)) CmdArgs.push_back("-v"); // Pass -fexceptions through to the linker if it was present. if (Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, false)) CmdArgs.push_back("-fexceptions"); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); const auto &TC = static_cast(getToolChain()); ArgStringList CmdArgs; const char *Exec; switch (TC.getArch()) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::arm: case llvm::Triple::thumb: break; case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::x86_64: CmdArgs.push_back("--64"); break; } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &Input : Inputs) CmdArgs.push_back(Input.getFilename()); const std::string Assembler = TC.GetProgramPath("as"); Exec = Args.MakeArgString(Assembler); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); const llvm::Triple &T = TC.getTriple(); const Driver &D = TC.getDriver(); SmallString<128> EntryPoint; ArgStringList CmdArgs; const char *Exec; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo" Args.ClaimAllArgs(options::OPT_w); // Other warning options are already handled somewhere else. if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("--strip-all"); CmdArgs.push_back("-m"); switch (TC.getArch()) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::arm: case llvm::Triple::thumb: // FIXME: this is incorrect for WinCE CmdArgs.push_back("thumb2pe"); break; case llvm::Triple::x86: CmdArgs.push_back("i386pe"); EntryPoint.append("_"); break; case llvm::Triple::x86_64: CmdArgs.push_back("i386pep"); break; } if (Args.hasArg(options::OPT_shared)) { switch (T.getArch()) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::arm: case llvm::Triple::thumb: case llvm::Triple::x86_64: EntryPoint.append("_DllMainCRTStartup"); break; case llvm::Triple::x86: EntryPoint.append("_DllMainCRTStartup@12"); break; } CmdArgs.push_back("-shared"); CmdArgs.push_back("-Bdynamic"); CmdArgs.push_back("--enable-auto-image-base"); CmdArgs.push_back("--entry"); CmdArgs.push_back(Args.MakeArgString(EntryPoint)); } else { EntryPoint.append("mainCRTStartup"); CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic" : "-Bdynamic"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { CmdArgs.push_back("--entry"); CmdArgs.push_back(Args.MakeArgString(EntryPoint)); } // FIXME: handle subsystem } // NOTE: deal with multiple definitions on Windows (e.g. COMDAT) CmdArgs.push_back("--allow-multiple-definition"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_rdynamic)) { SmallString<261> ImpLib(Output.getFilename()); llvm::sys::path::replace_extension(ImpLib, ".lib"); CmdArgs.push_back("--out-implib"); CmdArgs.push_back(Args.MakeArgString(ImpLib)); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { const std::string CRTPath(D.SysRoot + "/usr/lib/"); const char *CRTBegin; CRTBegin = Args.hasArg(options::OPT_shared) ? "crtbeginS.obj" : "crtbegin.obj"; CmdArgs.push_back(Args.MakeArgString(CRTPath + CRTBegin)); } Args.AddAllArgs(CmdArgs, options::OPT_L); TC.AddFilePathLibArgs(Args, CmdArgs); AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) && !Args.hasArg(options::OPT_nodefaultlibs)) { bool StaticCXX = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (StaticCXX) CmdArgs.push_back("-Bstatic"); TC.AddCXXStdlibLibArgs(Args, CmdArgs); if (StaticCXX) CmdArgs.push_back("-Bdynamic"); } if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { // TODO handle /MT[d] /MD[d] CmdArgs.push_back("-lmsvcrt"); AddRunTimeLibs(TC, D, CmdArgs, Args); } } if (TC.getSanitizerArgs().needsAsanRt()) { // TODO handle /MT[d] /MD[d] if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk")); } else { for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"}) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); // Make sure the dynamic runtime thunk is not optimized out at link time // to ensure proper SEH handling. CmdArgs.push_back(Args.MakeArgString("--undefined")); CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86 ? "___asan_seh_interceptor" : "__asan_seh_interceptor")); } } Exec = Args.MakeArgString(TC.GetLinkerPath()); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1); const InputInfo &II = Inputs[0]; assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX || II.getType() == types::TY_PP_CXX); if (JA.getKind() == Action::PreprocessJobClass) { Args.ClaimAllArgs(); CmdArgs.push_back("-E"); } else { assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm. CmdArgs.push_back("-S"); CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified. } CmdArgs.push_back("-DMYRIAD2"); // Append all -I, -iquote, -isystem paths, defines/undefines, // 'f' flags, optimize flags, and warning options. // These are spelled the same way in clang and moviCompile. Args.AddAllArgsExcept( CmdArgs, {options::OPT_I_Group, options::OPT_clang_i_Group, options::OPT_std_EQ, options::OPT_D, options::OPT_U, options::OPT_f_Group, options::OPT_f_clang_Group, options::OPT_g_Group, options::OPT_M_Group, options::OPT_O_Group, options::OPT_W_Group, options::OPT_mcpu_EQ}, {options::OPT_fno_split_dwarf_inlining}); Args.hasArg(options::OPT_fno_split_dwarf_inlining); // Claim it if present. // If we're producing a dependency file, and assembly is the final action, // then the name of the target in the dependency file should be the '.o' // file, not the '.s' file produced by this step. For example, instead of // /tmp/mumble.s: mumble.c .../someheader.h // the filename on the lefthand side should be "mumble.o" if (Args.getLastArg(options::OPT_MF) && !Args.getLastArg(options::OPT_MT) && C.getActions().size() == 1 && C.getActions()[0]->getKind() == Action::AssembleJobClass) { Arg *A = Args.getLastArg(options::OPT_o); if (A) { CmdArgs.push_back("-MT"); CmdArgs.push_back(Args.MakeArgString(A->getValue())); } } CmdArgs.push_back(II.getFilename()); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); std::string Exec = Args.MakeArgString(getToolChain().GetProgramPath("moviCompile")); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs)); } void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { ArgStringList CmdArgs; assert(Inputs.size() == 1); const InputInfo &II = Inputs[0]; assert(II.getType() == types::TY_PP_Asm); // Require preprocessed asm input. assert(Output.getType() == types::TY_Object); CmdArgs.push_back("-no6thSlotCompression"); const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); if (CPUArg) CmdArgs.push_back( Args.MakeArgString("-cv:" + StringRef(CPUArg->getValue()))); CmdArgs.push_back("-noSPrefixing"); CmdArgs.push_back("-a"); // Mystery option. Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); for (const Arg *A : Args.filtered(options::OPT_I, options::OPT_isystem)) { A->claim(); CmdArgs.push_back( Args.MakeArgString(std::string("-i:") + A->getValue(0))); } CmdArgs.push_back("-elf"); // Output format. CmdArgs.push_back(II.getFilename()); CmdArgs.push_back( Args.MakeArgString(std::string("-o:") + Output.getFilename())); std::string Exec = Args.MakeArgString(getToolChain().GetProgramPath("moviAsm")); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs)); } void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); const llvm::Triple &T = TC.getTriple(); ArgStringList CmdArgs; bool UseStartfiles = !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); bool UseDefaultLibs = !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs); // Silence warning if the args contain both -nostdlib and -stdlib=. Args.getLastArg(options::OPT_stdlib_EQ); if (T.getArch() == llvm::Triple::sparc) CmdArgs.push_back("-EB"); else // SHAVE assumes little-endian, and sparcel is expressly so. CmdArgs.push_back("-EL"); // The remaining logic is mostly like gnutools::Linker::ConstructJob, // but we never pass through a --sysroot option and various other bits. // For example, there are no sanitizers (yet) nor gold linker. // Eat some arguments that may be present but have no effect. Args.ClaimAllArgs(options::OPT_g_Group); Args.ClaimAllArgs(options::OPT_w); Args.ClaimAllArgs(options::OPT_static_libgcc); if (Args.hasArg(options::OPT_s)) // Pass the 'strip' option. CmdArgs.push_back("-s"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (UseStartfiles) { // If you want startfiles, it means you want the builtin crti and crtbegin, // but not crt0. Myriad link commands provide their own crt0.o as needed. CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crti.o"))); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o"))); } Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_e, options::OPT_s, options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); TC.AddFilePathLibArgs(Args, CmdArgs); bool NeedsSanitizerDeps = addSanitizerRuntimes(TC, Args, CmdArgs); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); if (UseDefaultLibs) { if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(TC, CmdArgs); if (C.getDriver().CCCIsCXX()) { if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) { CmdArgs.push_back("-lc++"); CmdArgs.push_back("-lc++abi"); } else CmdArgs.push_back("-lstdc++"); } if (T.getOS() == llvm::Triple::RTEMS) { CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgcc"); // circularly dependent on rtems // You must provide your own "-L" option to enable finding these. CmdArgs.push_back("-lrtemscpu"); CmdArgs.push_back("-lrtemsbsp"); CmdArgs.push_back("--end-group"); } else { CmdArgs.push_back("-lc"); CmdArgs.push_back("-lgcc"); } } if (UseStartfiles) { CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtn.o"))); } std::string Exec = Args.MakeArgString(TC.GetProgramPath("sparc-myriad-elf-ld")); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), CmdArgs, Inputs)); } void PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); assert(Inputs.size() == 1 && "Unexpected number of inputs."); const InputInfo &Input = Inputs[0]; assert(Input.isFilename() && "Invalid input."); CmdArgs.push_back(Input.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("orbis-as")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) { const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); if (SanArgs.needsUbsanRt()) { CmdArgs.push_back("-lSceDbgUBSanitizer_stub_weak"); } if (SanArgs.needsAsanRt()) { CmdArgs.push_back("-lSceDbgAddressSanitizer_stub_weak"); } } static void ConstructPS4LinkJob(const Tool &T, Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) { const toolchains::FreeBSD &ToolChain = static_cast(T.getToolChain()); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("--oformat=so"); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } AddPS4SanitizerArgs(ToolChain, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_r); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (Args.hasArg(options::OPT_pthread)) { CmdArgs.push_back("-lpthread"); } const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); C.addCommand(llvm::make_unique(JA, T, Exec, CmdArgs, Inputs)); } static void ConstructGoldLinkJob(const Tool &T, Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) { const toolchains::FreeBSD &ToolChain = static_cast(T.getToolChain()); const Driver &D = ToolChain.getDriver(); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); } else { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); CmdArgs.push_back("--eh-frame-hdr"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-Bshareable"); } else { CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld-elf.so.1"); } CmdArgs.push_back("--enable-new-dtags"); } if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { assert(Output.isNothing() && "Invalid output."); } AddPS4SanitizerArgs(ToolChain, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (Args.hasArg(options::OPT_pie)) crt1 = "Scrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); const char *crtbegin = nullptr; if (Args.hasArg(options::OPT_static)) crtbegin = "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) crtbegin = "crtbeginS.o"; else crtbegin = "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_s); Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_r); if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { // For PS4, we always want to pass libm, libstdc++ and libkernel // libraries for both C and C++ compilations. CmdArgs.push_back("-lkernel"); if (D.CCCIsCXX()) { ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); } // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding // the default system libraries. Just mimic this for now. if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lgcc_p"); else CmdArgs.push_back("-lcompiler_rt"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lstdc++"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lstdc++"); CmdArgs.push_back("--no-as-needed"); } if (Args.hasArg(options::OPT_pthread)) { if (Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } if (Args.hasArg(options::OPT_pg)) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lc"); else { if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc_p"); CmdArgs.push_back("-lpthread_p"); CmdArgs.push_back("--end-group"); } else { CmdArgs.push_back("-lc_p"); } } CmdArgs.push_back("-lgcc_p"); } else { if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("--start-group"); CmdArgs.push_back("-lc"); CmdArgs.push_back("-lpthread"); CmdArgs.push_back("--end-group"); } else { CmdArgs.push_back("-lc"); } CmdArgs.push_back("-lcompiler_rt"); } if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lstdc++"); } else if (Args.hasArg(options::OPT_pg)) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lstdc++"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o"))); else CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } const char *Exec = #ifdef LLVM_ON_WIN32 Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld.gold")); #else Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); #endif C.addCommand(llvm::make_unique(JA, T, Exec, CmdArgs, Inputs)); } void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const toolchains::FreeBSD &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); bool PS4Linker; StringRef LinkerOptName; if (const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) { LinkerOptName = A->getValue(); if (LinkerOptName != "ps4" && LinkerOptName != "gold") D.Diag(diag::err_drv_unsupported_linker) << LinkerOptName; } if (LinkerOptName == "gold") PS4Linker = false; else if (LinkerOptName == "ps4") PS4Linker = true; else PS4Linker = !Args.hasArg(options::OPT_shared); if (PS4Linker) ConstructPS4LinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); else ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); } void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); // Obtain architecture from the action. CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); assert(gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an architecture."); // Check that our installation's ptxas supports gpu_arch. if (!Args.hasArg(options::OPT_no_cuda_version_check)) { TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); } ArgStringList CmdArgs; CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); if (Args.hasFlag(options::OPT_cuda_noopt_device_debug, options::OPT_no_cuda_noopt_device_debug, false)) { // ptxas does not accept -g option if optimization is enabled, so // we ignore the compiler's -O* options if we want debug info. CmdArgs.push_back("-g"); CmdArgs.push_back("--dont-merge-basicblocks"); CmdArgs.push_back("--return-at-end"); } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { // Map the -O we received to -O{0,1,2,3}. // // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's // default, so it may correspond more closely to the spirit of clang -O2. // -O3 seems like the least-bad option when -Osomething is specified to // clang but it isn't handled below. StringRef OOpt = "3"; if (A->getOption().matches(options::OPT_O4) || A->getOption().matches(options::OPT_Ofast)) OOpt = "3"; else if (A->getOption().matches(options::OPT_O0)) OOpt = "0"; else if (A->getOption().matches(options::OPT_O)) { // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options. OOpt = llvm::StringSwitch(A->getValue()) .Case("1", "1") .Case("2", "2") .Case("3", "3") .Case("s", "2") .Case("z", "2") .Default("2"); } CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); } else { // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond // to no optimizations, but ptxas's default is -O3. CmdArgs.push_back("-O0"); } CmdArgs.push_back("--gpu-name"); CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); CmdArgs.push_back("--output-file"); CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); for (const auto& II : Inputs) CmdArgs.push_back(Args.MakeArgString(II.getFilename())); for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) CmdArgs.push_back(Args.MakeArgString(A)); const char *Exec; if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ)) Exec = A->getValue(); else Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } // All inputs to this linker must be from CudaDeviceActions, as we need to look // at the Inputs' Actions in order to figure out which GPU architecture they // correspond to. void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &TC = static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); ArgStringList CmdArgs; CmdArgs.push_back("--cuda"); CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); CmdArgs.push_back(Args.MakeArgString("--create")); CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); for (const auto& II : Inputs) { auto *A = II.getAction(); assert(A->getInputs().size() == 1 && "Device offload action is expected to have a single input"); const char *gpu_arch_str = A->getOffloadingArch(); assert(gpu_arch_str && "Device action expected to have associated a GPU architecture!"); CudaArch gpu_arch = StringToCudaArch(gpu_arch_str); // We need to pass an Arch of the form "sm_XX" for cubin files and // "compute_XX" for ptx. const char *Arch = (II.getType() == types::TY_PP_Asm) ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch)) : gpu_arch_str; CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + ",file=" + II.getFilename())); } for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) CmdArgs.push_back(Args.MakeArgString(A)); const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { std::string Linker = getToolChain().GetProgramPath(getShortName()); ArgStringList CmdArgs; AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), CmdArgs, Inputs)); } // AVR tools end. Index: projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp (revision 314176) +++ projects/clang400-import/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp (revision 314177) @@ -1,7590 +1,7590 @@ //===--- SemaType.cpp - Semantic Analysis for Types -----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements type-related semantic analysis. // //===----------------------------------------------------------------------===// #include "TypeLocBuilder.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeLocVisitor.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" using namespace clang; enum TypeDiagSelector { TDS_Function, TDS_Pointer, TDS_ObjCObjOrBlock }; /// isOmittedBlockReturnType - Return true if this declarator is missing a /// return type because this is a omitted return type on a block literal. static bool isOmittedBlockReturnType(const Declarator &D) { if (D.getContext() != Declarator::BlockLiteralContext || D.getDeclSpec().hasTypeSpecifier()) return false; if (D.getNumTypeObjects() == 0) return true; // ^{ ... } if (D.getNumTypeObjects() == 1 && D.getTypeObject(0).Kind == DeclaratorChunk::Function) return true; // ^(int X, float Y) { ... } return false; } /// diagnoseBadTypeAttribute - Diagnoses a type attribute which /// doesn't apply to the given type. static void diagnoseBadTypeAttribute(Sema &S, const AttributeList &attr, QualType type) { TypeDiagSelector WhichType; bool useExpansionLoc = true; switch (attr.getKind()) { case AttributeList::AT_ObjCGC: WhichType = TDS_Pointer; break; case AttributeList::AT_ObjCOwnership: WhichType = TDS_ObjCObjOrBlock; break; default: // Assume everything else was a function attribute. WhichType = TDS_Function; useExpansionLoc = false; break; } SourceLocation loc = attr.getLoc(); StringRef name = attr.getName()->getName(); // The GC attributes are usually written with macros; special-case them. IdentifierInfo *II = attr.isArgIdent(0) ? attr.getArgAsIdent(0)->Ident : nullptr; if (useExpansionLoc && loc.isMacroID() && II) { if (II->isStr("strong")) { if (S.findMacroSpelling(loc, "__strong")) name = "__strong"; } else if (II->isStr("weak")) { if (S.findMacroSpelling(loc, "__weak")) name = "__weak"; } } S.Diag(loc, diag::warn_type_attribute_wrong_type) << name << WhichType << type; } // objc_gc applies to Objective-C pointers or, otherwise, to the // smallest available pointer type (i.e. 'void*' in 'void**'). #define OBJC_POINTER_TYPE_ATTRS_CASELIST \ case AttributeList::AT_ObjCGC: \ case AttributeList::AT_ObjCOwnership // Calling convention attributes. #define CALLING_CONV_ATTRS_CASELIST \ case AttributeList::AT_CDecl: \ case AttributeList::AT_FastCall: \ case AttributeList::AT_StdCall: \ case AttributeList::AT_ThisCall: \ case AttributeList::AT_RegCall: \ case AttributeList::AT_Pascal: \ case AttributeList::AT_SwiftCall: \ case AttributeList::AT_VectorCall: \ case AttributeList::AT_MSABI: \ case AttributeList::AT_SysVABI: \ case AttributeList::AT_Pcs: \ case AttributeList::AT_IntelOclBicc: \ case AttributeList::AT_PreserveMost: \ case AttributeList::AT_PreserveAll // Function type attributes. #define FUNCTION_TYPE_ATTRS_CASELIST \ case AttributeList::AT_NoReturn: \ case AttributeList::AT_Regparm: \ CALLING_CONV_ATTRS_CASELIST // Microsoft-specific type qualifiers. #define MS_TYPE_ATTRS_CASELIST \ case AttributeList::AT_Ptr32: \ case AttributeList::AT_Ptr64: \ case AttributeList::AT_SPtr: \ case AttributeList::AT_UPtr // Nullability qualifiers. #define NULLABILITY_TYPE_ATTRS_CASELIST \ case AttributeList::AT_TypeNonNull: \ case AttributeList::AT_TypeNullable: \ case AttributeList::AT_TypeNullUnspecified namespace { /// An object which stores processing state for the entire /// GetTypeForDeclarator process. class TypeProcessingState { Sema &sema; /// The declarator being processed. Declarator &declarator; /// The index of the declarator chunk we're currently processing. /// May be the total number of valid chunks, indicating the /// DeclSpec. unsigned chunkIndex; /// Whether there are non-trivial modifications to the decl spec. bool trivial; /// Whether we saved the attributes in the decl spec. bool hasSavedAttrs; /// The original set of attributes on the DeclSpec. SmallVector savedAttrs; /// A list of attributes to diagnose the uselessness of when the /// processing is complete. SmallVector ignoredTypeAttrs; public: TypeProcessingState(Sema &sema, Declarator &declarator) : sema(sema), declarator(declarator), chunkIndex(declarator.getNumTypeObjects()), trivial(true), hasSavedAttrs(false) {} Sema &getSema() const { return sema; } Declarator &getDeclarator() const { return declarator; } bool isProcessingDeclSpec() const { return chunkIndex == declarator.getNumTypeObjects(); } unsigned getCurrentChunkIndex() const { return chunkIndex; } void setCurrentChunkIndex(unsigned idx) { assert(idx <= declarator.getNumTypeObjects()); chunkIndex = idx; } AttributeList *&getCurrentAttrListRef() const { if (isProcessingDeclSpec()) return getMutableDeclSpec().getAttributes().getListRef(); return declarator.getTypeObject(chunkIndex).getAttrListRef(); } /// Save the current set of attributes on the DeclSpec. void saveDeclSpecAttrs() { // Don't try to save them multiple times. if (hasSavedAttrs) return; DeclSpec &spec = getMutableDeclSpec(); for (AttributeList *attr = spec.getAttributes().getList(); attr; attr = attr->getNext()) savedAttrs.push_back(attr); trivial &= savedAttrs.empty(); hasSavedAttrs = true; } /// Record that we had nowhere to put the given type attribute. /// We will diagnose such attributes later. void addIgnoredTypeAttr(AttributeList &attr) { ignoredTypeAttrs.push_back(&attr); } /// Diagnose all the ignored type attributes, given that the /// declarator worked out to the given type. void diagnoseIgnoredTypeAttrs(QualType type) const { for (auto *Attr : ignoredTypeAttrs) diagnoseBadTypeAttribute(getSema(), *Attr, type); } ~TypeProcessingState() { if (trivial) return; restoreDeclSpecAttrs(); } private: DeclSpec &getMutableDeclSpec() const { return const_cast(declarator.getDeclSpec()); } void restoreDeclSpecAttrs() { assert(hasSavedAttrs); if (savedAttrs.empty()) { getMutableDeclSpec().getAttributes().set(nullptr); return; } getMutableDeclSpec().getAttributes().set(savedAttrs[0]); for (unsigned i = 0, e = savedAttrs.size() - 1; i != e; ++i) savedAttrs[i]->setNext(savedAttrs[i+1]); savedAttrs.back()->setNext(nullptr); } }; } // end anonymous namespace static void spliceAttrIntoList(AttributeList &attr, AttributeList *&head) { attr.setNext(head); head = &attr; } static void spliceAttrOutOfList(AttributeList &attr, AttributeList *&head) { if (head == &attr) { head = attr.getNext(); return; } AttributeList *cur = head; while (true) { assert(cur && cur->getNext() && "ran out of attrs?"); if (cur->getNext() == &attr) { cur->setNext(attr.getNext()); return; } cur = cur->getNext(); } } static void moveAttrFromListToList(AttributeList &attr, AttributeList *&fromList, AttributeList *&toList) { spliceAttrOutOfList(attr, fromList); spliceAttrIntoList(attr, toList); } /// The location of a type attribute. enum TypeAttrLocation { /// The attribute is in the decl-specifier-seq. TAL_DeclSpec, /// The attribute is part of a DeclaratorChunk. TAL_DeclChunk, /// The attribute is immediately after the declaration's name. TAL_DeclName }; static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, AttributeList *attrs); static bool handleFunctionTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type); static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state, AttributeList &attr, QualType &type); static bool handleObjCGCTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type); static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type); static bool handleObjCPointerTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type) { if (attr.getKind() == AttributeList::AT_ObjCGC) return handleObjCGCTypeAttr(state, attr, type); assert(attr.getKind() == AttributeList::AT_ObjCOwnership); return handleObjCOwnershipTypeAttr(state, attr, type); } /// Given the index of a declarator chunk, check whether that chunk /// directly specifies the return type of a function and, if so, find /// an appropriate place for it. /// /// \param i - a notional index which the search will start /// immediately inside /// /// \param onlyBlockPointers Whether we should only look into block /// pointer types (vs. all pointer types). static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator, unsigned i, bool onlyBlockPointers) { assert(i <= declarator.getNumTypeObjects()); DeclaratorChunk *result = nullptr; // First, look inwards past parens for a function declarator. for (; i != 0; --i) { DeclaratorChunk &fnChunk = declarator.getTypeObject(i-1); switch (fnChunk.Kind) { case DeclaratorChunk::Paren: continue; // If we find anything except a function, bail out. case DeclaratorChunk::Pointer: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Array: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: return result; // If we do find a function declarator, scan inwards from that, // looking for a (block-)pointer declarator. case DeclaratorChunk::Function: for (--i; i != 0; --i) { DeclaratorChunk &ptrChunk = declarator.getTypeObject(i-1); switch (ptrChunk.Kind) { case DeclaratorChunk::Paren: case DeclaratorChunk::Array: case DeclaratorChunk::Function: case DeclaratorChunk::Reference: case DeclaratorChunk::Pipe: continue; case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pointer: if (onlyBlockPointers) continue; // fallthrough case DeclaratorChunk::BlockPointer: result = &ptrChunk; goto continue_outer; } llvm_unreachable("bad declarator chunk kind"); } // If we run out of declarators doing that, we're done. return result; } llvm_unreachable("bad declarator chunk kind"); // Okay, reconsider from our new point. continue_outer: ; } // Ran out of chunks, bail out. return result; } /// Given that an objc_gc attribute was written somewhere on a /// declaration *other* than on the declarator itself (for which, use /// distributeObjCPointerTypeAttrFromDeclarator), and given that it /// didn't apply in whatever position it was written in, try to move /// it to a more appropriate position. static void distributeObjCPointerTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType type) { Declarator &declarator = state.getDeclarator(); // Move it to the outermost normal or block pointer declarator. for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) { DeclaratorChunk &chunk = declarator.getTypeObject(i-1); switch (chunk.Kind) { case DeclaratorChunk::Pointer: case DeclaratorChunk::BlockPointer: { // But don't move an ARC ownership attribute to the return type // of a block. DeclaratorChunk *destChunk = nullptr; if (state.isProcessingDeclSpec() && attr.getKind() == AttributeList::AT_ObjCOwnership) destChunk = maybeMovePastReturnType(declarator, i - 1, /*onlyBlockPointers=*/true); if (!destChunk) destChunk = &chunk; moveAttrFromListToList(attr, state.getCurrentAttrListRef(), destChunk->getAttrListRef()); return; } case DeclaratorChunk::Paren: case DeclaratorChunk::Array: continue; // We may be starting at the return type of a block. case DeclaratorChunk::Function: if (state.isProcessingDeclSpec() && attr.getKind() == AttributeList::AT_ObjCOwnership) { if (DeclaratorChunk *dest = maybeMovePastReturnType( declarator, i, /*onlyBlockPointers=*/true)) { moveAttrFromListToList(attr, state.getCurrentAttrListRef(), dest->getAttrListRef()); return; } } goto error; // Don't walk through these. case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: goto error; } } error: diagnoseBadTypeAttribute(state.getSema(), attr, type); } /// Distribute an objc_gc type attribute that was written on the /// declarator. static void distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state, AttributeList &attr, QualType &declSpecType) { Declarator &declarator = state.getDeclarator(); // objc_gc goes on the innermost pointer to something that's not a // pointer. unsigned innermost = -1U; bool considerDeclSpec = true; for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) { DeclaratorChunk &chunk = declarator.getTypeObject(i); switch (chunk.Kind) { case DeclaratorChunk::Pointer: case DeclaratorChunk::BlockPointer: innermost = i; continue; case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Paren: case DeclaratorChunk::Array: case DeclaratorChunk::Pipe: continue; case DeclaratorChunk::Function: considerDeclSpec = false; goto done; } } done: // That might actually be the decl spec if we weren't blocked by // anything in the declarator. if (considerDeclSpec) { if (handleObjCPointerTypeAttr(state, attr, declSpecType)) { // Splice the attribute into the decl spec. Prevents the // attribute from being applied multiple times and gives // the source-location-filler something to work with. state.saveDeclSpecAttrs(); moveAttrFromListToList(attr, declarator.getAttrListRef(), declarator.getMutableDeclSpec().getAttributes().getListRef()); return; } } // Otherwise, if we found an appropriate chunk, splice the attribute // into it. if (innermost != -1U) { moveAttrFromListToList(attr, declarator.getAttrListRef(), declarator.getTypeObject(innermost).getAttrListRef()); return; } // Otherwise, diagnose when we're done building the type. spliceAttrOutOfList(attr, declarator.getAttrListRef()); state.addIgnoredTypeAttr(attr); } /// A function type attribute was written somewhere in a declaration /// *other* than on the declarator itself or in the decl spec. Given /// that it didn't apply in whatever position it was written in, try /// to move it to a more appropriate position. static void distributeFunctionTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType type) { Declarator &declarator = state.getDeclarator(); // Try to push the attribute from the return type of a function to // the function itself. for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) { DeclaratorChunk &chunk = declarator.getTypeObject(i-1); switch (chunk.Kind) { case DeclaratorChunk::Function: moveAttrFromListToList(attr, state.getCurrentAttrListRef(), chunk.getAttrListRef()); return; case DeclaratorChunk::Paren: case DeclaratorChunk::Pointer: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Array: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: continue; } } diagnoseBadTypeAttribute(state.getSema(), attr, type); } /// Try to distribute a function type attribute to the innermost /// function chunk or type. Returns true if the attribute was /// distributed, false if no location was found. static bool distributeFunctionTypeAttrToInnermost(TypeProcessingState &state, AttributeList &attr, AttributeList *&attrList, QualType &declSpecType) { Declarator &declarator = state.getDeclarator(); // Put it on the innermost function chunk, if there is one. for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) { DeclaratorChunk &chunk = declarator.getTypeObject(i); if (chunk.Kind != DeclaratorChunk::Function) continue; moveAttrFromListToList(attr, attrList, chunk.getAttrListRef()); return true; } return handleFunctionTypeAttr(state, attr, declSpecType); } /// A function type attribute was written in the decl spec. Try to /// apply it somewhere. static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state, AttributeList &attr, QualType &declSpecType) { state.saveDeclSpecAttrs(); // C++11 attributes before the decl specifiers actually appertain to // the declarators. Move them straight there. We don't support the // 'put them wherever you like' semantics we allow for GNU attributes. if (attr.isCXX11Attribute()) { moveAttrFromListToList(attr, state.getCurrentAttrListRef(), state.getDeclarator().getAttrListRef()); return; } // Try to distribute to the innermost. if (distributeFunctionTypeAttrToInnermost(state, attr, state.getCurrentAttrListRef(), declSpecType)) return; // If that failed, diagnose the bad attribute when the declarator is // fully built. state.addIgnoredTypeAttr(attr); } /// A function type attribute was written on the declarator. Try to /// apply it somewhere. static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state, AttributeList &attr, QualType &declSpecType) { Declarator &declarator = state.getDeclarator(); // Try to distribute to the innermost. if (distributeFunctionTypeAttrToInnermost(state, attr, declarator.getAttrListRef(), declSpecType)) return; // If that failed, diagnose the bad attribute when the declarator is // fully built. spliceAttrOutOfList(attr, declarator.getAttrListRef()); state.addIgnoredTypeAttr(attr); } /// \brief Given that there are attributes written on the declarator /// itself, try to distribute any type attributes to the appropriate /// declarator chunk. /// /// These are attributes like the following: /// int f ATTR; /// int (f ATTR)(); /// but not necessarily this: /// int f() ATTR; static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state, QualType &declSpecType) { // Collect all the type attributes from the declarator itself. assert(state.getDeclarator().getAttributes() && "declarator has no attrs!"); AttributeList *attr = state.getDeclarator().getAttributes(); AttributeList *next; do { next = attr->getNext(); // Do not distribute C++11 attributes. They have strict rules for what // they appertain to. if (attr->isCXX11Attribute()) continue; switch (attr->getKind()) { OBJC_POINTER_TYPE_ATTRS_CASELIST: distributeObjCPointerTypeAttrFromDeclarator(state, *attr, declSpecType); break; case AttributeList::AT_NSReturnsRetained: if (!state.getSema().getLangOpts().ObjCAutoRefCount) break; // fallthrough FUNCTION_TYPE_ATTRS_CASELIST: distributeFunctionTypeAttrFromDeclarator(state, *attr, declSpecType); break; MS_TYPE_ATTRS_CASELIST: // Microsoft type attributes cannot go after the declarator-id. continue; NULLABILITY_TYPE_ATTRS_CASELIST: // Nullability specifiers cannot go after the declarator-id. // Objective-C __kindof does not get distributed. case AttributeList::AT_ObjCKindOf: continue; default: break; } } while ((attr = next)); } /// Add a synthetic '()' to a block-literal declarator if it is /// required, given the return type. static void maybeSynthesizeBlockSignature(TypeProcessingState &state, QualType declSpecType) { Declarator &declarator = state.getDeclarator(); // First, check whether the declarator would produce a function, // i.e. whether the innermost semantic chunk is a function. if (declarator.isFunctionDeclarator()) { // If so, make that declarator a prototyped declarator. declarator.getFunctionTypeInfo().hasPrototype = true; return; } // If there are any type objects, the type as written won't name a // function, regardless of the decl spec type. This is because a // block signature declarator is always an abstract-declarator, and // abstract-declarators can't just be parentheses chunks. Therefore // we need to build a function chunk unless there are no type // objects and the decl spec type is a function. if (!declarator.getNumTypeObjects() && declSpecType->isFunctionType()) return; // Note that there *are* cases with invalid declarators where // declarators consist solely of parentheses. In general, these // occur only in failed efforts to make function declarators, so // faking up the function chunk is still the right thing to do. // Otherwise, we need to fake up a function declarator. SourceLocation loc = declarator.getLocStart(); // ...and *prepend* it to the declarator. SourceLocation NoLoc; declarator.AddInnermostTypeInfo(DeclaratorChunk::getFunction( /*HasProto=*/true, /*IsAmbiguous=*/false, /*LParenLoc=*/NoLoc, /*ArgInfo=*/nullptr, /*NumArgs=*/0, /*EllipsisLoc=*/NoLoc, /*RParenLoc=*/NoLoc, /*TypeQuals=*/0, /*RefQualifierIsLvalueRef=*/true, /*RefQualifierLoc=*/NoLoc, /*ConstQualifierLoc=*/NoLoc, /*VolatileQualifierLoc=*/NoLoc, /*RestrictQualifierLoc=*/NoLoc, /*MutableLoc=*/NoLoc, EST_None, /*ESpecRange=*/SourceRange(), /*Exceptions=*/nullptr, /*ExceptionRanges=*/nullptr, /*NumExceptions=*/0, /*NoexceptExpr=*/nullptr, /*ExceptionSpecTokens=*/nullptr, /*DeclsInPrototype=*/None, loc, loc, declarator)); // For consistency, make sure the state still has us as processing // the decl spec. assert(state.getCurrentChunkIndex() == declarator.getNumTypeObjects() - 1); state.setCurrentChunkIndex(declarator.getNumTypeObjects()); } static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS, unsigned &TypeQuals, QualType TypeSoFar, unsigned RemoveTQs, unsigned DiagID) { // If this occurs outside a template instantiation, warn the user about // it; they probably didn't mean to specify a redundant qualifier. typedef std::pair QualLoc; for (QualLoc Qual : {QualLoc(DeclSpec::TQ_const, DS.getConstSpecLoc()), QualLoc(DeclSpec::TQ_restrict, DS.getRestrictSpecLoc()), QualLoc(DeclSpec::TQ_volatile, DS.getVolatileSpecLoc()), QualLoc(DeclSpec::TQ_atomic, DS.getAtomicSpecLoc())}) { if (!(RemoveTQs & Qual.first)) continue; if (S.ActiveTemplateInstantiations.empty()) { if (TypeQuals & Qual.first) S.Diag(Qual.second, DiagID) << DeclSpec::getSpecifierName(Qual.first) << TypeSoFar << FixItHint::CreateRemoval(Qual.second); } TypeQuals &= ~Qual.first; } } /// Return true if this is omitted block return type. Also check type /// attributes and type qualifiers when returning true. static bool checkOmittedBlockReturnType(Sema &S, Declarator &declarator, QualType Result) { if (!isOmittedBlockReturnType(declarator)) return false; // Warn if we see type attributes for omitted return type on a block literal. AttributeList *&attrs = declarator.getMutableDeclSpec().getAttributes().getListRef(); AttributeList *prev = nullptr; for (AttributeList *cur = attrs; cur; cur = cur->getNext()) { AttributeList &attr = *cur; // Skip attributes that were marked to be invalid or non-type // attributes. if (attr.isInvalid() || !attr.isTypeAttr()) { prev = cur; continue; } S.Diag(attr.getLoc(), diag::warn_block_literal_attributes_on_omitted_return_type) << attr.getName(); // Remove cur from the list. if (prev) { prev->setNext(cur->getNext()); prev = cur; } else { attrs = cur->getNext(); } } // Warn if we see type qualifiers for omitted return type on a block literal. const DeclSpec &DS = declarator.getDeclSpec(); unsigned TypeQuals = DS.getTypeQualifiers(); diagnoseAndRemoveTypeQualifiers(S, DS, TypeQuals, Result, (unsigned)-1, diag::warn_block_literal_qualifiers_on_omitted_return_type); declarator.getMutableDeclSpec().ClearTypeQualifiers(); return true; } /// Apply Objective-C type arguments to the given type. static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type, ArrayRef typeArgs, SourceRange typeArgsRange, bool failOnError = false) { // We can only apply type arguments to an Objective-C class type. const auto *objcObjectType = type->getAs(); if (!objcObjectType || !objcObjectType->getInterface()) { S.Diag(loc, diag::err_objc_type_args_non_class) << type << typeArgsRange; if (failOnError) return QualType(); return type; } // The class type must be parameterized. ObjCInterfaceDecl *objcClass = objcObjectType->getInterface(); ObjCTypeParamList *typeParams = objcClass->getTypeParamList(); if (!typeParams) { S.Diag(loc, diag::err_objc_type_args_non_parameterized_class) << objcClass->getDeclName() << FixItHint::CreateRemoval(typeArgsRange); if (failOnError) return QualType(); return type; } // The type must not already be specialized. if (objcObjectType->isSpecialized()) { S.Diag(loc, diag::err_objc_type_args_specialized_class) << type << FixItHint::CreateRemoval(typeArgsRange); if (failOnError) return QualType(); return type; } // Check the type arguments. SmallVector finalTypeArgs; unsigned numTypeParams = typeParams->size(); bool anyPackExpansions = false; for (unsigned i = 0, n = typeArgs.size(); i != n; ++i) { TypeSourceInfo *typeArgInfo = typeArgs[i]; QualType typeArg = typeArgInfo->getType(); // Type arguments cannot have explicit qualifiers or nullability. // We ignore indirect sources of these, e.g. behind typedefs or // template arguments. if (TypeLoc qual = typeArgInfo->getTypeLoc().findExplicitQualifierLoc()) { bool diagnosed = false; SourceRange rangeToRemove; if (auto attr = qual.getAs()) { rangeToRemove = attr.getLocalSourceRange(); if (attr.getTypePtr()->getImmediateNullability()) { typeArg = attr.getTypePtr()->getModifiedType(); S.Diag(attr.getLocStart(), diag::err_objc_type_arg_explicit_nullability) << typeArg << FixItHint::CreateRemoval(rangeToRemove); diagnosed = true; } } if (!diagnosed) { S.Diag(qual.getLocStart(), diag::err_objc_type_arg_qualified) << typeArg << typeArg.getQualifiers().getAsString() << FixItHint::CreateRemoval(rangeToRemove); } } // Remove qualifiers even if they're non-local. typeArg = typeArg.getUnqualifiedType(); finalTypeArgs.push_back(typeArg); if (typeArg->getAs()) anyPackExpansions = true; // Find the corresponding type parameter, if there is one. ObjCTypeParamDecl *typeParam = nullptr; if (!anyPackExpansions) { if (i < numTypeParams) { typeParam = typeParams->begin()[i]; } else { // Too many arguments. S.Diag(loc, diag::err_objc_type_args_wrong_arity) << false << objcClass->getDeclName() << (unsigned)typeArgs.size() << numTypeParams; S.Diag(objcClass->getLocation(), diag::note_previous_decl) << objcClass; if (failOnError) return QualType(); return type; } } // Objective-C object pointer types must be substitutable for the bounds. if (const auto *typeArgObjC = typeArg->getAs()) { // If we don't have a type parameter to match against, assume // everything is fine. There was a prior pack expansion that // means we won't be able to match anything. if (!typeParam) { assert(anyPackExpansions && "Too many arguments?"); continue; } // Retrieve the bound. QualType bound = typeParam->getUnderlyingType(); const auto *boundObjC = bound->getAs(); // Determine whether the type argument is substitutable for the bound. if (typeArgObjC->isObjCIdType()) { // When the type argument is 'id', the only acceptable type // parameter bound is 'id'. if (boundObjC->isObjCIdType()) continue; } else if (S.Context.canAssignObjCInterfaces(boundObjC, typeArgObjC)) { // Otherwise, we follow the assignability rules. continue; } // Diagnose the mismatch. S.Diag(typeArgInfo->getTypeLoc().getLocStart(), diag::err_objc_type_arg_does_not_match_bound) << typeArg << bound << typeParam->getDeclName(); S.Diag(typeParam->getLocation(), diag::note_objc_type_param_here) << typeParam->getDeclName(); if (failOnError) return QualType(); return type; } // Block pointer types are permitted for unqualified 'id' bounds. if (typeArg->isBlockPointerType()) { // If we don't have a type parameter to match against, assume // everything is fine. There was a prior pack expansion that // means we won't be able to match anything. if (!typeParam) { assert(anyPackExpansions && "Too many arguments?"); continue; } // Retrieve the bound. QualType bound = typeParam->getUnderlyingType(); if (bound->isBlockCompatibleObjCPointerType(S.Context)) continue; // Diagnose the mismatch. S.Diag(typeArgInfo->getTypeLoc().getLocStart(), diag::err_objc_type_arg_does_not_match_bound) << typeArg << bound << typeParam->getDeclName(); S.Diag(typeParam->getLocation(), diag::note_objc_type_param_here) << typeParam->getDeclName(); if (failOnError) return QualType(); return type; } // Dependent types will be checked at instantiation time. if (typeArg->isDependentType()) { continue; } // Diagnose non-id-compatible type arguments. S.Diag(typeArgInfo->getTypeLoc().getLocStart(), diag::err_objc_type_arg_not_id_compatible) << typeArg << typeArgInfo->getTypeLoc().getSourceRange(); if (failOnError) return QualType(); return type; } // Make sure we didn't have the wrong number of arguments. if (!anyPackExpansions && finalTypeArgs.size() != numTypeParams) { S.Diag(loc, diag::err_objc_type_args_wrong_arity) << (typeArgs.size() < typeParams->size()) << objcClass->getDeclName() << (unsigned)finalTypeArgs.size() << (unsigned)numTypeParams; S.Diag(objcClass->getLocation(), diag::note_previous_decl) << objcClass; if (failOnError) return QualType(); return type; } // Success. Form the specialized type. return S.Context.getObjCObjectType(type, finalTypeArgs, { }, false); } QualType Sema::BuildObjCTypeParamType(const ObjCTypeParamDecl *Decl, SourceLocation ProtocolLAngleLoc, ArrayRef Protocols, ArrayRef ProtocolLocs, SourceLocation ProtocolRAngleLoc, bool FailOnError) { QualType Result = QualType(Decl->getTypeForDecl(), 0); if (!Protocols.empty()) { bool HasError; Result = Context.applyObjCProtocolQualifiers(Result, Protocols, HasError); if (HasError) { Diag(SourceLocation(), diag::err_invalid_protocol_qualifiers) << SourceRange(ProtocolLAngleLoc, ProtocolRAngleLoc); if (FailOnError) Result = QualType(); } if (FailOnError && Result.isNull()) return QualType(); } return Result; } QualType Sema::BuildObjCObjectType(QualType BaseType, SourceLocation Loc, SourceLocation TypeArgsLAngleLoc, ArrayRef TypeArgs, SourceLocation TypeArgsRAngleLoc, SourceLocation ProtocolLAngleLoc, ArrayRef Protocols, ArrayRef ProtocolLocs, SourceLocation ProtocolRAngleLoc, bool FailOnError) { QualType Result = BaseType; if (!TypeArgs.empty()) { Result = applyObjCTypeArgs(*this, Loc, Result, TypeArgs, SourceRange(TypeArgsLAngleLoc, TypeArgsRAngleLoc), FailOnError); if (FailOnError && Result.isNull()) return QualType(); } if (!Protocols.empty()) { bool HasError; Result = Context.applyObjCProtocolQualifiers(Result, Protocols, HasError); if (HasError) { Diag(Loc, diag::err_invalid_protocol_qualifiers) << SourceRange(ProtocolLAngleLoc, ProtocolRAngleLoc); if (FailOnError) Result = QualType(); } if (FailOnError && Result.isNull()) return QualType(); } return Result; } TypeResult Sema::actOnObjCProtocolQualifierType( SourceLocation lAngleLoc, ArrayRef protocols, ArrayRef protocolLocs, SourceLocation rAngleLoc) { // Form id. QualType Result = Context.getObjCObjectType( Context.ObjCBuiltinIdTy, { }, llvm::makeArrayRef( (ObjCProtocolDecl * const *)protocols.data(), protocols.size()), false); Result = Context.getObjCObjectPointerType(Result); TypeSourceInfo *ResultTInfo = Context.CreateTypeSourceInfo(Result); TypeLoc ResultTL = ResultTInfo->getTypeLoc(); auto ObjCObjectPointerTL = ResultTL.castAs(); ObjCObjectPointerTL.setStarLoc(SourceLocation()); // implicit auto ObjCObjectTL = ObjCObjectPointerTL.getPointeeLoc() .castAs(); ObjCObjectTL.setHasBaseTypeAsWritten(false); ObjCObjectTL.getBaseLoc().initialize(Context, SourceLocation()); // No type arguments. ObjCObjectTL.setTypeArgsLAngleLoc(SourceLocation()); ObjCObjectTL.setTypeArgsRAngleLoc(SourceLocation()); // Fill in protocol qualifiers. ObjCObjectTL.setProtocolLAngleLoc(lAngleLoc); ObjCObjectTL.setProtocolRAngleLoc(rAngleLoc); for (unsigned i = 0, n = protocols.size(); i != n; ++i) ObjCObjectTL.setProtocolLoc(i, protocolLocs[i]); // We're done. Return the completed type to the parser. return CreateParsedType(Result, ResultTInfo); } TypeResult Sema::actOnObjCTypeArgsAndProtocolQualifiers( Scope *S, SourceLocation Loc, ParsedType BaseType, SourceLocation TypeArgsLAngleLoc, ArrayRef TypeArgs, SourceLocation TypeArgsRAngleLoc, SourceLocation ProtocolLAngleLoc, ArrayRef Protocols, ArrayRef ProtocolLocs, SourceLocation ProtocolRAngleLoc) { TypeSourceInfo *BaseTypeInfo = nullptr; QualType T = GetTypeFromParser(BaseType, &BaseTypeInfo); if (T.isNull()) return true; // Handle missing type-source info. if (!BaseTypeInfo) BaseTypeInfo = Context.getTrivialTypeSourceInfo(T, Loc); // Extract type arguments. SmallVector ActualTypeArgInfos; for (unsigned i = 0, n = TypeArgs.size(); i != n; ++i) { TypeSourceInfo *TypeArgInfo = nullptr; QualType TypeArg = GetTypeFromParser(TypeArgs[i], &TypeArgInfo); if (TypeArg.isNull()) { ActualTypeArgInfos.clear(); break; } assert(TypeArgInfo && "No type source info?"); ActualTypeArgInfos.push_back(TypeArgInfo); } // Build the object type. QualType Result = BuildObjCObjectType( T, BaseTypeInfo->getTypeLoc().getSourceRange().getBegin(), TypeArgsLAngleLoc, ActualTypeArgInfos, TypeArgsRAngleLoc, ProtocolLAngleLoc, llvm::makeArrayRef((ObjCProtocolDecl * const *)Protocols.data(), Protocols.size()), ProtocolLocs, ProtocolRAngleLoc, /*FailOnError=*/false); if (Result == T) return BaseType; // Create source information for this type. TypeSourceInfo *ResultTInfo = Context.CreateTypeSourceInfo(Result); TypeLoc ResultTL = ResultTInfo->getTypeLoc(); // For id or Class, we'll have an // object pointer type. Fill in source information for it. if (auto ObjCObjectPointerTL = ResultTL.getAs()) { // The '*' is implicit. ObjCObjectPointerTL.setStarLoc(SourceLocation()); ResultTL = ObjCObjectPointerTL.getPointeeLoc(); } if (auto OTPTL = ResultTL.getAs()) { // Protocol qualifier information. if (OTPTL.getNumProtocols() > 0) { assert(OTPTL.getNumProtocols() == Protocols.size()); OTPTL.setProtocolLAngleLoc(ProtocolLAngleLoc); OTPTL.setProtocolRAngleLoc(ProtocolRAngleLoc); for (unsigned i = 0, n = Protocols.size(); i != n; ++i) OTPTL.setProtocolLoc(i, ProtocolLocs[i]); } // We're done. Return the completed type to the parser. return CreateParsedType(Result, ResultTInfo); } auto ObjCObjectTL = ResultTL.castAs(); // Type argument information. if (ObjCObjectTL.getNumTypeArgs() > 0) { assert(ObjCObjectTL.getNumTypeArgs() == ActualTypeArgInfos.size()); ObjCObjectTL.setTypeArgsLAngleLoc(TypeArgsLAngleLoc); ObjCObjectTL.setTypeArgsRAngleLoc(TypeArgsRAngleLoc); for (unsigned i = 0, n = ActualTypeArgInfos.size(); i != n; ++i) ObjCObjectTL.setTypeArgTInfo(i, ActualTypeArgInfos[i]); } else { ObjCObjectTL.setTypeArgsLAngleLoc(SourceLocation()); ObjCObjectTL.setTypeArgsRAngleLoc(SourceLocation()); } // Protocol qualifier information. if (ObjCObjectTL.getNumProtocols() > 0) { assert(ObjCObjectTL.getNumProtocols() == Protocols.size()); ObjCObjectTL.setProtocolLAngleLoc(ProtocolLAngleLoc); ObjCObjectTL.setProtocolRAngleLoc(ProtocolRAngleLoc); for (unsigned i = 0, n = Protocols.size(); i != n; ++i) ObjCObjectTL.setProtocolLoc(i, ProtocolLocs[i]); } else { ObjCObjectTL.setProtocolLAngleLoc(SourceLocation()); ObjCObjectTL.setProtocolRAngleLoc(SourceLocation()); } // Base type. ObjCObjectTL.setHasBaseTypeAsWritten(true); if (ObjCObjectTL.getType() == T) ObjCObjectTL.getBaseLoc().initializeFullCopy(BaseTypeInfo->getTypeLoc()); else ObjCObjectTL.getBaseLoc().initialize(Context, Loc); // We're done. Return the completed type to the parser. return CreateParsedType(Result, ResultTInfo); } static OpenCLAccessAttr::Spelling getImageAccess(const AttributeList *Attrs) { if (Attrs) { const AttributeList *Next = Attrs; do { const AttributeList &Attr = *Next; Next = Attr.getNext(); if (Attr.getKind() == AttributeList::AT_OpenCLAccess) { return static_cast( Attr.getSemanticSpelling()); } } while (Next); } return OpenCLAccessAttr::Keyword_read_only; } /// \brief Convert the specified declspec to the appropriate type /// object. /// \param state Specifies the declarator containing the declaration specifier /// to be converted, along with other associated processing state. /// \returns The type described by the declaration specifiers. This function /// never returns null. static QualType ConvertDeclSpecToType(TypeProcessingState &state) { // FIXME: Should move the logic from DeclSpec::Finish to here for validity // checking. Sema &S = state.getSema(); Declarator &declarator = state.getDeclarator(); const DeclSpec &DS = declarator.getDeclSpec(); SourceLocation DeclLoc = declarator.getIdentifierLoc(); if (DeclLoc.isInvalid()) DeclLoc = DS.getLocStart(); ASTContext &Context = S.Context; QualType Result; switch (DS.getTypeSpecType()) { case DeclSpec::TST_void: Result = Context.VoidTy; break; case DeclSpec::TST_char: if (DS.getTypeSpecSign() == DeclSpec::TSS_unspecified) Result = Context.CharTy; else if (DS.getTypeSpecSign() == DeclSpec::TSS_signed) Result = Context.SignedCharTy; else { assert(DS.getTypeSpecSign() == DeclSpec::TSS_unsigned && "Unknown TSS value"); Result = Context.UnsignedCharTy; } break; case DeclSpec::TST_wchar: if (DS.getTypeSpecSign() == DeclSpec::TSS_unspecified) Result = Context.WCharTy; else if (DS.getTypeSpecSign() == DeclSpec::TSS_signed) { S.Diag(DS.getTypeSpecSignLoc(), diag::ext_invalid_sign_spec) << DS.getSpecifierName(DS.getTypeSpecType(), Context.getPrintingPolicy()); Result = Context.getSignedWCharType(); } else { assert(DS.getTypeSpecSign() == DeclSpec::TSS_unsigned && "Unknown TSS value"); S.Diag(DS.getTypeSpecSignLoc(), diag::ext_invalid_sign_spec) << DS.getSpecifierName(DS.getTypeSpecType(), Context.getPrintingPolicy()); Result = Context.getUnsignedWCharType(); } break; case DeclSpec::TST_char16: assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified && "Unknown TSS value"); Result = Context.Char16Ty; break; case DeclSpec::TST_char32: assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified && "Unknown TSS value"); Result = Context.Char32Ty; break; case DeclSpec::TST_unspecified: // If this is a missing declspec in a block literal return context, then it // is inferred from the return statements inside the block. // The declspec is always missing in a lambda expr context; it is either // specified with a trailing return type or inferred. if (S.getLangOpts().CPlusPlus14 && declarator.getContext() == Declarator::LambdaExprContext) { // In C++1y, a lambda's implicit return type is 'auto'. Result = Context.getAutoDeductType(); break; } else if (declarator.getContext() == Declarator::LambdaExprContext || checkOmittedBlockReturnType(S, declarator, Context.DependentTy)) { Result = Context.DependentTy; break; } // Unspecified typespec defaults to int in C90. However, the C90 grammar // [C90 6.5] only allows a decl-spec if there was *some* type-specifier, // type-qualifier, or storage-class-specifier. If not, emit an extwarn. // Note that the one exception to this is function definitions, which are // allowed to be completely missing a declspec. This is handled in the // parser already though by it pretending to have seen an 'int' in this // case. if (S.getLangOpts().ImplicitInt) { // In C89 mode, we only warn if there is a completely missing declspec // when one is not allowed. if (DS.isEmpty()) { S.Diag(DeclLoc, diag::ext_missing_declspec) << DS.getSourceRange() << FixItHint::CreateInsertion(DS.getLocStart(), "int"); } } else if (!DS.hasTypeSpecifier()) { // C99 and C++ require a type specifier. For example, C99 6.7.2p2 says: // "At least one type specifier shall be given in the declaration // specifiers in each declaration, and in the specifier-qualifier list in // each struct declaration and type name." if (S.getLangOpts().CPlusPlus) { S.Diag(DeclLoc, diag::err_missing_type_specifier) << DS.getSourceRange(); // When this occurs in C++ code, often something is very broken with the // value being declared, poison it as invalid so we don't get chains of // errors. declarator.setInvalidType(true); } else if (S.getLangOpts().OpenCLVersion >= 200 && DS.isTypeSpecPipe()){ S.Diag(DeclLoc, diag::err_missing_actual_pipe_type) << DS.getSourceRange(); declarator.setInvalidType(true); } else { S.Diag(DeclLoc, diag::ext_missing_type_specifier) << DS.getSourceRange(); } } // FALL THROUGH. case DeclSpec::TST_int: { if (DS.getTypeSpecSign() != DeclSpec::TSS_unsigned) { switch (DS.getTypeSpecWidth()) { case DeclSpec::TSW_unspecified: Result = Context.IntTy; break; case DeclSpec::TSW_short: Result = Context.ShortTy; break; case DeclSpec::TSW_long: Result = Context.LongTy; break; case DeclSpec::TSW_longlong: Result = Context.LongLongTy; // 'long long' is a C99 or C++11 feature. if (!S.getLangOpts().C99) { if (S.getLangOpts().CPlusPlus) S.Diag(DS.getTypeSpecWidthLoc(), S.getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else S.Diag(DS.getTypeSpecWidthLoc(), diag::ext_c99_longlong); } break; } } else { switch (DS.getTypeSpecWidth()) { case DeclSpec::TSW_unspecified: Result = Context.UnsignedIntTy; break; case DeclSpec::TSW_short: Result = Context.UnsignedShortTy; break; case DeclSpec::TSW_long: Result = Context.UnsignedLongTy; break; case DeclSpec::TSW_longlong: Result = Context.UnsignedLongLongTy; // 'long long' is a C99 or C++11 feature. if (!S.getLangOpts().C99) { if (S.getLangOpts().CPlusPlus) S.Diag(DS.getTypeSpecWidthLoc(), S.getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); else S.Diag(DS.getTypeSpecWidthLoc(), diag::ext_c99_longlong); } break; } } break; } case DeclSpec::TST_int128: if (!S.Context.getTargetInfo().hasInt128Type()) S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__int128"; if (DS.getTypeSpecSign() == DeclSpec::TSS_unsigned) Result = Context.UnsignedInt128Ty; else Result = Context.Int128Ty; break; case DeclSpec::TST_half: Result = Context.HalfTy; break; case DeclSpec::TST_float: Result = Context.FloatTy; break; case DeclSpec::TST_double: if (DS.getTypeSpecWidth() == DeclSpec::TSW_long) Result = Context.LongDoubleTy; else Result = Context.DoubleTy; break; case DeclSpec::TST_float128: if (!S.Context.getTargetInfo().hasFloat128Type()) S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__float128"; Result = Context.Float128Ty; break; case DeclSpec::TST_bool: Result = Context.BoolTy; break; // _Bool or bool break; case DeclSpec::TST_decimal32: // _Decimal32 case DeclSpec::TST_decimal64: // _Decimal64 case DeclSpec::TST_decimal128: // _Decimal128 S.Diag(DS.getTypeSpecTypeLoc(), diag::err_decimal_unsupported); Result = Context.IntTy; declarator.setInvalidType(true); break; case DeclSpec::TST_class: case DeclSpec::TST_enum: case DeclSpec::TST_union: case DeclSpec::TST_struct: case DeclSpec::TST_interface: { TypeDecl *D = dyn_cast_or_null(DS.getRepAsDecl()); if (!D) { // This can happen in C++ with ambiguous lookups. Result = Context.IntTy; declarator.setInvalidType(true); break; } // If the type is deprecated or unavailable, diagnose it. S.DiagnoseUseOfDecl(D, DS.getTypeSpecTypeNameLoc()); assert(DS.getTypeSpecWidth() == 0 && DS.getTypeSpecComplex() == 0 && DS.getTypeSpecSign() == 0 && "No qualifiers on tag names!"); // TypeQuals handled by caller. Result = Context.getTypeDeclType(D); // In both C and C++, make an ElaboratedType. ElaboratedTypeKeyword Keyword = ElaboratedType::getKeywordForTypeSpec(DS.getTypeSpecType()); Result = S.getElaboratedType(Keyword, DS.getTypeSpecScope(), Result); break; } case DeclSpec::TST_typename: { assert(DS.getTypeSpecWidth() == 0 && DS.getTypeSpecComplex() == 0 && DS.getTypeSpecSign() == 0 && "Can't handle qualifiers on typedef names yet!"); Result = S.GetTypeFromParser(DS.getRepAsType()); if (Result.isNull()) { declarator.setInvalidType(true); } // TypeQuals handled by caller. break; } case DeclSpec::TST_typeofType: // FIXME: Preserve type source info. Result = S.GetTypeFromParser(DS.getRepAsType()); assert(!Result.isNull() && "Didn't get a type for typeof?"); if (!Result->isDependentType()) if (const TagType *TT = Result->getAs()) S.DiagnoseUseOfDecl(TT->getDecl(), DS.getTypeSpecTypeLoc()); // TypeQuals handled by caller. Result = Context.getTypeOfType(Result); break; case DeclSpec::TST_typeofExpr: { Expr *E = DS.getRepAsExpr(); assert(E && "Didn't get an expression for typeof?"); // TypeQuals handled by caller. Result = S.BuildTypeofExprType(E, DS.getTypeSpecTypeLoc()); if (Result.isNull()) { Result = Context.IntTy; declarator.setInvalidType(true); } break; } case DeclSpec::TST_decltype: { Expr *E = DS.getRepAsExpr(); assert(E && "Didn't get an expression for decltype?"); // TypeQuals handled by caller. Result = S.BuildDecltypeType(E, DS.getTypeSpecTypeLoc()); if (Result.isNull()) { Result = Context.IntTy; declarator.setInvalidType(true); } break; } case DeclSpec::TST_underlyingType: Result = S.GetTypeFromParser(DS.getRepAsType()); assert(!Result.isNull() && "Didn't get a type for __underlying_type?"); Result = S.BuildUnaryTransformType(Result, UnaryTransformType::EnumUnderlyingType, DS.getTypeSpecTypeLoc()); if (Result.isNull()) { Result = Context.IntTy; declarator.setInvalidType(true); } break; case DeclSpec::TST_auto: // TypeQuals handled by caller. // If auto is mentioned in a lambda parameter context, convert it to a // template parameter type immediately, with the appropriate depth and // index, and update sema's state (LambdaScopeInfo) for the current lambda // being analyzed (which tracks the invented type template parameter). if (declarator.getContext() == Declarator::LambdaExprParameterContext) { sema::LambdaScopeInfo *LSI = S.getCurLambda(); assert(LSI && "No LambdaScopeInfo on the stack!"); const unsigned TemplateParameterDepth = LSI->AutoTemplateParameterDepth; const unsigned AutoParameterPosition = LSI->AutoTemplateParams.size(); const bool IsParameterPack = declarator.hasEllipsis(); // Turns out we must create the TemplateTypeParmDecl here to // retrieve the corresponding template parameter type. TemplateTypeParmDecl *CorrespondingTemplateParam = TemplateTypeParmDecl::Create(Context, // Temporarily add to the TranslationUnit DeclContext. When the // associated TemplateParameterList is attached to a template // declaration (such as FunctionTemplateDecl), the DeclContext // for each template parameter gets updated appropriately via // a call to AdoptTemplateParameterList. Context.getTranslationUnitDecl(), /*KeyLoc*/ SourceLocation(), /*NameLoc*/ declarator.getLocStart(), TemplateParameterDepth, AutoParameterPosition, // our template param index /* Identifier*/ nullptr, false, IsParameterPack); LSI->AutoTemplateParams.push_back(CorrespondingTemplateParam); // Replace the 'auto' in the function parameter with this invented // template type parameter. Result = QualType(CorrespondingTemplateParam->getTypeForDecl(), 0); } else { Result = Context.getAutoType(QualType(), AutoTypeKeyword::Auto, false); } break; case DeclSpec::TST_auto_type: Result = Context.getAutoType(QualType(), AutoTypeKeyword::GNUAutoType, false); break; case DeclSpec::TST_decltype_auto: Result = Context.getAutoType(QualType(), AutoTypeKeyword::DecltypeAuto, /*IsDependent*/ false); break; case DeclSpec::TST_unknown_anytype: Result = Context.UnknownAnyTy; break; case DeclSpec::TST_atomic: Result = S.GetTypeFromParser(DS.getRepAsType()); assert(!Result.isNull() && "Didn't get a type for _Atomic?"); Result = S.BuildAtomicType(Result, DS.getTypeSpecTypeLoc()); if (Result.isNull()) { Result = Context.IntTy; declarator.setInvalidType(true); } break; #define GENERIC_IMAGE_TYPE(ImgType, Id) \ case DeclSpec::TST_##ImgType##_t: \ switch (getImageAccess(DS.getAttributes().getList())) { \ case OpenCLAccessAttr::Keyword_write_only: \ Result = Context.Id##WOTy; break; \ case OpenCLAccessAttr::Keyword_read_write: \ Result = Context.Id##RWTy; break; \ case OpenCLAccessAttr::Keyword_read_only: \ Result = Context.Id##ROTy; break; \ } \ break; #include "clang/Basic/OpenCLImageTypes.def" case DeclSpec::TST_error: Result = Context.IntTy; declarator.setInvalidType(true); break; } if (S.getLangOpts().OpenCL && S.checkOpenCLDisabledTypeDeclSpec(DS, Result)) declarator.setInvalidType(true); // Handle complex types. if (DS.getTypeSpecComplex() == DeclSpec::TSC_complex) { if (S.getLangOpts().Freestanding) S.Diag(DS.getTypeSpecComplexLoc(), diag::ext_freestanding_complex); Result = Context.getComplexType(Result); } else if (DS.isTypeAltiVecVector()) { unsigned typeSize = static_cast(Context.getTypeSize(Result)); assert(typeSize > 0 && "type size for vector must be greater than 0 bits"); VectorType::VectorKind VecKind = VectorType::AltiVecVector; if (DS.isTypeAltiVecPixel()) VecKind = VectorType::AltiVecPixel; else if (DS.isTypeAltiVecBool()) VecKind = VectorType::AltiVecBool; Result = Context.getVectorType(Result, 128/typeSize, VecKind); } // FIXME: Imaginary. if (DS.getTypeSpecComplex() == DeclSpec::TSC_imaginary) S.Diag(DS.getTypeSpecComplexLoc(), diag::err_imaginary_not_supported); // Before we process any type attributes, synthesize a block literal // function declarator if necessary. if (declarator.getContext() == Declarator::BlockLiteralContext) maybeSynthesizeBlockSignature(state, Result); // Apply any type attributes from the decl spec. This may cause the // list of type attributes to be temporarily saved while the type // attributes are pushed around. // pipe attributes will be handled later ( at GetFullTypeForDeclarator ) if (!DS.isTypeSpecPipe()) processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes().getList()); // Apply const/volatile/restrict qualifiers to T. if (unsigned TypeQuals = DS.getTypeQualifiers()) { // Warn about CV qualifiers on function types. // C99 6.7.3p8: // If the specification of a function type includes any type qualifiers, // the behavior is undefined. // C++11 [dcl.fct]p7: // The effect of a cv-qualifier-seq in a function declarator is not the // same as adding cv-qualification on top of the function type. In the // latter case, the cv-qualifiers are ignored. if (TypeQuals && Result->isFunctionType()) { diagnoseAndRemoveTypeQualifiers( S, DS, TypeQuals, Result, DeclSpec::TQ_const | DeclSpec::TQ_volatile, S.getLangOpts().CPlusPlus ? diag::warn_typecheck_function_qualifiers_ignored : diag::warn_typecheck_function_qualifiers_unspecified); // No diagnostic for 'restrict' or '_Atomic' applied to a // function type; we'll diagnose those later, in BuildQualifiedType. } // C++11 [dcl.ref]p1: // Cv-qualified references are ill-formed except when the // cv-qualifiers are introduced through the use of a typedef-name // or decltype-specifier, in which case the cv-qualifiers are ignored. // // There don't appear to be any other contexts in which a cv-qualified // reference type could be formed, so the 'ill-formed' clause here appears // to never happen. if (TypeQuals && Result->isReferenceType()) { diagnoseAndRemoveTypeQualifiers( S, DS, TypeQuals, Result, DeclSpec::TQ_const | DeclSpec::TQ_volatile | DeclSpec::TQ_atomic, diag::warn_typecheck_reference_qualifiers); } // C90 6.5.3 constraints: "The same type qualifier shall not appear more // than once in the same specifier-list or qualifier-list, either directly // or via one or more typedefs." if (!S.getLangOpts().C99 && !S.getLangOpts().CPlusPlus && TypeQuals & Result.getCVRQualifiers()) { if (TypeQuals & DeclSpec::TQ_const && Result.isConstQualified()) { S.Diag(DS.getConstSpecLoc(), diag::ext_duplicate_declspec) << "const"; } if (TypeQuals & DeclSpec::TQ_volatile && Result.isVolatileQualified()) { S.Diag(DS.getVolatileSpecLoc(), diag::ext_duplicate_declspec) << "volatile"; } // C90 doesn't have restrict nor _Atomic, so it doesn't force us to // produce a warning in this case. } QualType Qualified = S.BuildQualifiedType(Result, DeclLoc, TypeQuals, &DS); // If adding qualifiers fails, just use the unqualified type. if (Qualified.isNull()) declarator.setInvalidType(true); else Result = Qualified; } assert(!Result.isNull() && "This function should not return a null type"); return Result; } static std::string getPrintableNameForEntity(DeclarationName Entity) { if (Entity) return Entity.getAsString(); return "type name"; } QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc, Qualifiers Qs, const DeclSpec *DS) { if (T.isNull()) return QualType(); // Ignore any attempt to form a cv-qualified reference. if (T->isReferenceType()) { Qs.removeConst(); Qs.removeVolatile(); } // Enforce C99 6.7.3p2: "Types other than pointer types derived from // object or incomplete types shall not be restrict-qualified." if (Qs.hasRestrict()) { unsigned DiagID = 0; QualType ProblemTy; if (T->isAnyPointerType() || T->isReferenceType() || T->isMemberPointerType()) { QualType EltTy; if (T->isObjCObjectPointerType()) EltTy = T; else if (const MemberPointerType *PTy = T->getAs()) EltTy = PTy->getPointeeType(); else EltTy = T->getPointeeType(); // If we have a pointer or reference, the pointee must have an object // incomplete type. if (!EltTy->isIncompleteOrObjectType()) { DiagID = diag::err_typecheck_invalid_restrict_invalid_pointee; ProblemTy = EltTy; } } else if (!T->isDependentType()) { DiagID = diag::err_typecheck_invalid_restrict_not_pointer; ProblemTy = T; } if (DiagID) { Diag(DS ? DS->getRestrictSpecLoc() : Loc, DiagID) << ProblemTy; Qs.removeRestrict(); } } return Context.getQualifiedType(T, Qs); } QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc, unsigned CVRAU, const DeclSpec *DS) { if (T.isNull()) return QualType(); // Ignore any attempt to form a cv-qualified reference. if (T->isReferenceType()) CVRAU &= ~(DeclSpec::TQ_const | DeclSpec::TQ_volatile | DeclSpec::TQ_atomic); // Convert from DeclSpec::TQ to Qualifiers::TQ by just dropping TQ_atomic and // TQ_unaligned; unsigned CVR = CVRAU & ~(DeclSpec::TQ_atomic | DeclSpec::TQ_unaligned); // C11 6.7.3/5: // If the same qualifier appears more than once in the same // specifier-qualifier-list, either directly or via one or more typedefs, // the behavior is the same as if it appeared only once. // // It's not specified what happens when the _Atomic qualifier is applied to // a type specified with the _Atomic specifier, but we assume that this // should be treated as if the _Atomic qualifier appeared multiple times. if (CVRAU & DeclSpec::TQ_atomic && !T->isAtomicType()) { // C11 6.7.3/5: // If other qualifiers appear along with the _Atomic qualifier in a // specifier-qualifier-list, the resulting type is the so-qualified // atomic type. // // Don't need to worry about array types here, since _Atomic can't be // applied to such types. SplitQualType Split = T.getSplitUnqualifiedType(); T = BuildAtomicType(QualType(Split.Ty, 0), DS ? DS->getAtomicSpecLoc() : Loc); if (T.isNull()) return T; Split.Quals.addCVRQualifiers(CVR); return BuildQualifiedType(T, Loc, Split.Quals); } Qualifiers Q = Qualifiers::fromCVRMask(CVR); Q.setUnaligned(CVRAU & DeclSpec::TQ_unaligned); return BuildQualifiedType(T, Loc, Q, DS); } /// \brief Build a paren type including \p T. QualType Sema::BuildParenType(QualType T) { return Context.getParenType(T); } /// Given that we're building a pointer or reference to the given static QualType inferARCLifetimeForPointee(Sema &S, QualType type, SourceLocation loc, bool isReference) { // Bail out if retention is unrequired or already specified. if (!type->isObjCLifetimeType() || type.getObjCLifetime() != Qualifiers::OCL_None) return type; Qualifiers::ObjCLifetime implicitLifetime = Qualifiers::OCL_None; // If the object type is const-qualified, we can safely use // __unsafe_unretained. This is safe (because there are no read // barriers), and it'll be safe to coerce anything but __weak* to // the resulting type. if (type.isConstQualified()) { implicitLifetime = Qualifiers::OCL_ExplicitNone; // Otherwise, check whether the static type does not require // retaining. This currently only triggers for Class (possibly // protocol-qualifed, and arrays thereof). } else if (type->isObjCARCImplicitlyUnretainedType()) { implicitLifetime = Qualifiers::OCL_ExplicitNone; // If we are in an unevaluated context, like sizeof, skip adding a // qualification. } else if (S.isUnevaluatedContext()) { return type; // If that failed, give an error and recover using __strong. __strong // is the option most likely to prevent spurious second-order diagnostics, // like when binding a reference to a field. } else { // These types can show up in private ivars in system headers, so // we need this to not be an error in those cases. Instead we // want to delay. if (S.DelayedDiagnostics.shouldDelayDiagnostics()) { S.DelayedDiagnostics.add( sema::DelayedDiagnostic::makeForbiddenType(loc, diag::err_arc_indirect_no_ownership, type, isReference)); } else { S.Diag(loc, diag::err_arc_indirect_no_ownership) << type << isReference; } implicitLifetime = Qualifiers::OCL_Strong; } assert(implicitLifetime && "didn't infer any lifetime!"); Qualifiers qs; qs.addObjCLifetime(implicitLifetime); return S.Context.getQualifiedType(type, qs); } static std::string getFunctionQualifiersAsString(const FunctionProtoType *FnTy){ std::string Quals = Qualifiers::fromCVRMask(FnTy->getTypeQuals()).getAsString(); switch (FnTy->getRefQualifier()) { case RQ_None: break; case RQ_LValue: if (!Quals.empty()) Quals += ' '; Quals += '&'; break; case RQ_RValue: if (!Quals.empty()) Quals += ' '; Quals += "&&"; break; } return Quals; } namespace { /// Kinds of declarator that cannot contain a qualified function type. /// /// C++98 [dcl.fct]p4 / C++11 [dcl.fct]p6: /// a function type with a cv-qualifier or a ref-qualifier can only appear /// at the topmost level of a type. /// /// Parens and member pointers are permitted. We don't diagnose array and /// function declarators, because they don't allow function types at all. /// /// The values of this enum are used in diagnostics. enum QualifiedFunctionKind { QFK_BlockPointer, QFK_Pointer, QFK_Reference }; } // end anonymous namespace /// Check whether the type T is a qualified function type, and if it is, /// diagnose that it cannot be contained within the given kind of declarator. static bool checkQualifiedFunction(Sema &S, QualType T, SourceLocation Loc, QualifiedFunctionKind QFK) { // Does T refer to a function type with a cv-qualifier or a ref-qualifier? const FunctionProtoType *FPT = T->getAs(); if (!FPT || (FPT->getTypeQuals() == 0 && FPT->getRefQualifier() == RQ_None)) return false; S.Diag(Loc, diag::err_compound_qualified_function_type) << QFK << isa(T.IgnoreParens()) << T << getFunctionQualifiersAsString(FPT); return true; } /// \brief Build a pointer type. /// /// \param T The type to which we'll be building a pointer. /// /// \param Loc The location of the entity whose type involves this /// pointer type or, if there is no such entity, the location of the /// type that will have pointer type. /// /// \param Entity The name of the entity that involves the pointer /// type, if known. /// /// \returns A suitable pointer type, if there are no /// errors. Otherwise, returns a NULL type. QualType Sema::BuildPointerType(QualType T, SourceLocation Loc, DeclarationName Entity) { if (T->isReferenceType()) { // C++ 8.3.2p4: There shall be no ... pointers to references ... Diag(Loc, diag::err_illegal_decl_pointer_to_reference) << getPrintableNameForEntity(Entity) << T; return QualType(); } if (checkQualifiedFunction(*this, T, Loc, QFK_Pointer)) return QualType(); assert(!T->isObjCObjectType() && "Should build ObjCObjectPointerType"); // In ARC, it is forbidden to build pointers to unqualified pointers. if (getLangOpts().ObjCAutoRefCount) T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ false); // Build the pointer type. return Context.getPointerType(T); } /// \brief Build a reference type. /// /// \param T The type to which we'll be building a reference. /// /// \param Loc The location of the entity whose type involves this /// reference type or, if there is no such entity, the location of the /// type that will have reference type. /// /// \param Entity The name of the entity that involves the reference /// type, if known. /// /// \returns A suitable reference type, if there are no /// errors. Otherwise, returns a NULL type. QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue, SourceLocation Loc, DeclarationName Entity) { assert(Context.getCanonicalType(T) != Context.OverloadTy && "Unresolved overloaded function type"); // C++0x [dcl.ref]p6: // If a typedef (7.1.3), a type template-parameter (14.3.1), or a // decltype-specifier (7.1.6.2) denotes a type TR that is a reference to a // type T, an attempt to create the type "lvalue reference to cv TR" creates // the type "lvalue reference to T", while an attempt to create the type // "rvalue reference to cv TR" creates the type TR. bool LValueRef = SpelledAsLValue || T->getAs(); // C++ [dcl.ref]p4: There shall be no references to references. // // According to C++ DR 106, references to references are only // diagnosed when they are written directly (e.g., "int & &"), // but not when they happen via a typedef: // // typedef int& intref; // typedef intref& intref2; // // Parser::ParseDeclaratorInternal diagnoses the case where // references are written directly; here, we handle the // collapsing of references-to-references as described in C++0x. // DR 106 and 540 introduce reference-collapsing into C++98/03. // C++ [dcl.ref]p1: // A declarator that specifies the type "reference to cv void" // is ill-formed. if (T->isVoidType()) { Diag(Loc, diag::err_reference_to_void); return QualType(); } if (checkQualifiedFunction(*this, T, Loc, QFK_Reference)) return QualType(); // In ARC, it is forbidden to build references to unqualified pointers. if (getLangOpts().ObjCAutoRefCount) T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ true); // Handle restrict on references. if (LValueRef) return Context.getLValueReferenceType(T, SpelledAsLValue); return Context.getRValueReferenceType(T); } /// \brief Build a Read-only Pipe type. /// /// \param T The type to which we'll be building a Pipe. /// /// \param Loc We do not use it for now. /// /// \returns A suitable pipe type, if there are no errors. Otherwise, returns a /// NULL type. QualType Sema::BuildReadPipeType(QualType T, SourceLocation Loc) { return Context.getReadPipeType(T); } /// \brief Build a Write-only Pipe type. /// /// \param T The type to which we'll be building a Pipe. /// /// \param Loc We do not use it for now. /// /// \returns A suitable pipe type, if there are no errors. Otherwise, returns a /// NULL type. QualType Sema::BuildWritePipeType(QualType T, SourceLocation Loc) { return Context.getWritePipeType(T); } /// Check whether the specified array size makes the array type a VLA. If so, /// return true, if not, return the size of the array in SizeVal. static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) { // If the size is an ICE, it certainly isn't a VLA. If we're in a GNU mode // (like gnu99, but not c99) accept any evaluatable value as an extension. class VLADiagnoser : public Sema::VerifyICEDiagnoser { public: VLADiagnoser() : Sema::VerifyICEDiagnoser(true) {} void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override { } void diagnoseFold(Sema &S, SourceLocation Loc, SourceRange SR) override { S.Diag(Loc, diag::ext_vla_folded_to_constant) << SR; } } Diagnoser; return S.VerifyIntegerConstantExpression(ArraySize, &SizeVal, Diagnoser, S.LangOpts.GNUMode || S.LangOpts.OpenCL).isInvalid(); } /// \brief Build an array type. /// /// \param T The type of each element in the array. /// /// \param ASM C99 array size modifier (e.g., '*', 'static'). /// /// \param ArraySize Expression describing the size of the array. /// /// \param Brackets The range from the opening '[' to the closing ']'. /// /// \param Entity The name of the entity that involves the array /// type, if known. /// /// \returns A suitable array type, if there are no errors. Otherwise, /// returns a NULL type. QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM, Expr *ArraySize, unsigned Quals, SourceRange Brackets, DeclarationName Entity) { SourceLocation Loc = Brackets.getBegin(); if (getLangOpts().CPlusPlus) { // C++ [dcl.array]p1: // T is called the array element type; this type shall not be a reference // type, the (possibly cv-qualified) type void, a function type or an // abstract class type. // // C++ [dcl.array]p3: // When several "array of" specifications are adjacent, [...] only the // first of the constant expressions that specify the bounds of the arrays // may be omitted. // // Note: function types are handled in the common path with C. if (T->isReferenceType()) { Diag(Loc, diag::err_illegal_decl_array_of_references) << getPrintableNameForEntity(Entity) << T; return QualType(); } if (T->isVoidType() || T->isIncompleteArrayType()) { Diag(Loc, diag::err_illegal_decl_array_incomplete_type) << T; return QualType(); } if (RequireNonAbstractType(Brackets.getBegin(), T, diag::err_array_of_abstract_type)) return QualType(); // Mentioning a member pointer type for an array type causes us to lock in // an inheritance model, even if it's inside an unused typedef. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) if (const MemberPointerType *MPTy = T->getAs()) if (!MPTy->getClass()->isDependentType()) (void)isCompleteType(Loc, T); } else { // C99 6.7.5.2p1: If the element type is an incomplete or function type, // reject it (e.g. void ary[7], struct foo ary[7], void ary[7]()) if (RequireCompleteType(Loc, T, diag::err_illegal_decl_array_incomplete_type)) return QualType(); } if (T->isFunctionType()) { Diag(Loc, diag::err_illegal_decl_array_of_functions) << getPrintableNameForEntity(Entity) << T; return QualType(); } if (const RecordType *EltTy = T->getAs()) { // If the element type is a struct or union that contains a variadic // array, accept it as a GNU extension: C99 6.7.2.1p2. if (EltTy->getDecl()->hasFlexibleArrayMember()) Diag(Loc, diag::ext_flexible_array_in_array) << T; } else if (T->isObjCObjectType()) { Diag(Loc, diag::err_objc_array_of_interfaces) << T; return QualType(); } // Do placeholder conversions on the array size expression. if (ArraySize && ArraySize->hasPlaceholderType()) { ExprResult Result = CheckPlaceholderExpr(ArraySize); if (Result.isInvalid()) return QualType(); ArraySize = Result.get(); } // Do lvalue-to-rvalue conversions on the array size expression. if (ArraySize && !ArraySize->isRValue()) { ExprResult Result = DefaultLvalueConversion(ArraySize); if (Result.isInvalid()) return QualType(); ArraySize = Result.get(); } // C99 6.7.5.2p1: The size expression shall have integer type. // C++11 allows contextual conversions to such types. if (!getLangOpts().CPlusPlus11 && ArraySize && !ArraySize->isTypeDependent() && !ArraySize->getType()->isIntegralOrUnscopedEnumerationType()) { Diag(ArraySize->getLocStart(), diag::err_array_size_non_int) << ArraySize->getType() << ArraySize->getSourceRange(); return QualType(); } llvm::APSInt ConstVal(Context.getTypeSize(Context.getSizeType())); if (!ArraySize) { if (ASM == ArrayType::Star) T = Context.getVariableArrayType(T, nullptr, ASM, Quals, Brackets); else T = Context.getIncompleteArrayType(T, ASM, Quals); } else if (ArraySize->isTypeDependent() || ArraySize->isValueDependent()) { T = Context.getDependentSizedArrayType(T, ArraySize, ASM, Quals, Brackets); } else if ((!T->isDependentType() && !T->isIncompleteType() && !T->isConstantSizeType()) || isArraySizeVLA(*this, ArraySize, ConstVal)) { // Even in C++11, don't allow contextual conversions in the array bound // of a VLA. if (getLangOpts().CPlusPlus11 && !ArraySize->getType()->isIntegralOrUnscopedEnumerationType()) { Diag(ArraySize->getLocStart(), diag::err_array_size_non_int) << ArraySize->getType() << ArraySize->getSourceRange(); return QualType(); } // C99: an array with an element type that has a non-constant-size is a VLA. // C99: an array with a non-ICE size is a VLA. We accept any expression // that we can fold to a non-zero positive value as an extension. T = Context.getVariableArrayType(T, ArraySize, ASM, Quals, Brackets); } else { // C99 6.7.5.2p1: If the expression is a constant expression, it shall // have a value greater than zero. if (ConstVal.isSigned() && ConstVal.isNegative()) { if (Entity) Diag(ArraySize->getLocStart(), diag::err_decl_negative_array_size) << getPrintableNameForEntity(Entity) << ArraySize->getSourceRange(); else Diag(ArraySize->getLocStart(), diag::err_typecheck_negative_array_size) << ArraySize->getSourceRange(); return QualType(); } if (ConstVal == 0) { // GCC accepts zero sized static arrays. We allow them when // we're not in a SFINAE context. Diag(ArraySize->getLocStart(), isSFINAEContext()? diag::err_typecheck_zero_array_size : diag::ext_typecheck_zero_array_size) << ArraySize->getSourceRange(); if (ASM == ArrayType::Static) { Diag(ArraySize->getLocStart(), diag::warn_typecheck_zero_static_array_size) << ArraySize->getSourceRange(); ASM = ArrayType::Normal; } } else if (!T->isDependentType() && !T->isVariablyModifiedType() && !T->isIncompleteType() && !T->isUndeducedType()) { // Is the array too large? unsigned ActiveSizeBits = ConstantArrayType::getNumAddressingBits(Context, T, ConstVal); if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context)) { Diag(ArraySize->getLocStart(), diag::err_array_too_large) << ConstVal.toString(10) << ArraySize->getSourceRange(); return QualType(); } } T = Context.getConstantArrayType(T, ConstVal, ASM, Quals); } // OpenCL v1.2 s6.9.d: variable length arrays are not supported. if (getLangOpts().OpenCL && T->isVariableArrayType()) { Diag(Loc, diag::err_opencl_vla); return QualType(); } // CUDA device code doesn't support VLAs. if (getLangOpts().CUDA && T->isVariableArrayType()) CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentCUDATarget(); // If this is not C99, extwarn about VLA's and C99 array size modifiers. if (!getLangOpts().C99) { if (T->isVariableArrayType()) { // Prohibit the use of VLAs during template argument deduction. if (isSFINAEContext()) { Diag(Loc, diag::err_vla_in_sfinae); return QualType(); } // Just extwarn about VLAs. else Diag(Loc, diag::ext_vla); } else if (ASM != ArrayType::Normal || Quals != 0) Diag(Loc, getLangOpts().CPlusPlus? diag::err_c99_array_usage_cxx : diag::ext_c99_array_usage) << ASM; } if (T->isVariableArrayType()) { // Warn about VLAs for -Wvla. Diag(Loc, diag::warn_vla_used); } // OpenCL v2.0 s6.12.5 - Arrays of blocks are not supported. // OpenCL v2.0 s6.16.13.1 - Arrays of pipe type are not supported. // OpenCL v2.0 s6.9.b - Arrays of image/sampler type are not supported. if (getLangOpts().OpenCL) { const QualType ArrType = Context.getBaseElementType(T); if (ArrType->isBlockPointerType() || ArrType->isPipeType() || ArrType->isSamplerT() || ArrType->isImageType()) { Diag(Loc, diag::err_opencl_invalid_type_array) << ArrType; return QualType(); } } return T; } /// \brief Build an ext-vector type. /// /// Run the required checks for the extended vector type. QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize, SourceLocation AttrLoc) { // Unlike gcc's vector_size attribute, we do not allow vectors to be defined // in conjunction with complex types (pointers, arrays, functions, etc.). // // Additionally, OpenCL prohibits vectors of booleans (they're considered a // reserved data type under OpenCL v2.0 s6.1.4), we don't support selects // on bitvectors, and we have no well-defined ABI for bitvectors, so vectors // of bool aren't allowed. if ((!T->isDependentType() && !T->isIntegerType() && !T->isRealFloatingType()) || T->isBooleanType()) { Diag(AttrLoc, diag::err_attribute_invalid_vector_type) << T; return QualType(); } if (!ArraySize->isTypeDependent() && !ArraySize->isValueDependent()) { llvm::APSInt vecSize(32); if (!ArraySize->isIntegerConstantExpr(vecSize, Context)) { Diag(AttrLoc, diag::err_attribute_argument_type) << "ext_vector_type" << AANT_ArgumentIntegerConstant << ArraySize->getSourceRange(); return QualType(); } // Unlike gcc's vector_size attribute, the size is specified as the // number of elements, not the number of bytes. unsigned vectorSize = static_cast(vecSize.getZExtValue()); if (vectorSize == 0) { Diag(AttrLoc, diag::err_attribute_zero_size) << ArraySize->getSourceRange(); return QualType(); } if (VectorType::isVectorSizeTooLarge(vectorSize)) { Diag(AttrLoc, diag::err_attribute_size_too_large) << ArraySize->getSourceRange(); return QualType(); } return Context.getExtVectorType(T, vectorSize); } return Context.getDependentSizedExtVectorType(T, ArraySize, AttrLoc); } bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) { if (T->isArrayType() || T->isFunctionType()) { Diag(Loc, diag::err_func_returning_array_function) << T->isFunctionType() << T; return true; } // Functions cannot return half FP. if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) { Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 << FixItHint::CreateInsertion(Loc, "*"); return true; } // Methods cannot return interface types. All ObjC objects are // passed by reference. if (T->isObjCObjectType()) { Diag(Loc, diag::err_object_cannot_be_passed_returned_by_value) << 0 << T; return 0; } return false; } /// Check the extended parameter information. Most of the necessary /// checking should occur when applying the parameter attribute; the /// only other checks required are positional restrictions. static void checkExtParameterInfos(Sema &S, ArrayRef paramTypes, const FunctionProtoType::ExtProtoInfo &EPI, llvm::function_ref getParamLoc) { assert(EPI.ExtParameterInfos && "shouldn't get here without param infos"); bool hasCheckedSwiftCall = false; auto checkForSwiftCC = [&](unsigned paramIndex) { // Only do this once. if (hasCheckedSwiftCall) return; hasCheckedSwiftCall = true; if (EPI.ExtInfo.getCC() == CC_Swift) return; S.Diag(getParamLoc(paramIndex), diag::err_swift_param_attr_not_swiftcall) << getParameterABISpelling(EPI.ExtParameterInfos[paramIndex].getABI()); }; for (size_t paramIndex = 0, numParams = paramTypes.size(); paramIndex != numParams; ++paramIndex) { switch (EPI.ExtParameterInfos[paramIndex].getABI()) { // Nothing interesting to check for orindary-ABI parameters. case ParameterABI::Ordinary: continue; // swift_indirect_result parameters must be a prefix of the function // arguments. case ParameterABI::SwiftIndirectResult: checkForSwiftCC(paramIndex); if (paramIndex != 0 && EPI.ExtParameterInfos[paramIndex - 1].getABI() != ParameterABI::SwiftIndirectResult) { S.Diag(getParamLoc(paramIndex), diag::err_swift_indirect_result_not_first); } continue; case ParameterABI::SwiftContext: checkForSwiftCC(paramIndex); continue; // swift_error parameters must be preceded by a swift_context parameter. case ParameterABI::SwiftErrorResult: checkForSwiftCC(paramIndex); if (paramIndex == 0 || EPI.ExtParameterInfos[paramIndex - 1].getABI() != ParameterABI::SwiftContext) { S.Diag(getParamLoc(paramIndex), diag::err_swift_error_result_not_after_swift_context); } continue; } llvm_unreachable("bad ABI kind"); } } QualType Sema::BuildFunctionType(QualType T, MutableArrayRef ParamTypes, SourceLocation Loc, DeclarationName Entity, const FunctionProtoType::ExtProtoInfo &EPI) { bool Invalid = false; Invalid |= CheckFunctionReturnType(T, Loc); for (unsigned Idx = 0, Cnt = ParamTypes.size(); Idx < Cnt; ++Idx) { // FIXME: Loc is too inprecise here, should use proper locations for args. QualType ParamType = Context.getAdjustedParameterType(ParamTypes[Idx]); if (ParamType->isVoidType()) { Diag(Loc, diag::err_param_with_void_type); Invalid = true; } else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) { // Disallow half FP arguments. Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 << FixItHint::CreateInsertion(Loc, "*"); Invalid = true; } ParamTypes[Idx] = ParamType; } if (EPI.ExtParameterInfos) { checkExtParameterInfos(*this, ParamTypes, EPI, [=](unsigned i) { return Loc; }); } if (Invalid) return QualType(); return Context.getFunctionType(T, ParamTypes, EPI); } /// \brief Build a member pointer type \c T Class::*. /// /// \param T the type to which the member pointer refers. /// \param Class the class type into which the member pointer points. /// \param Loc the location where this type begins /// \param Entity the name of the entity that will have this member pointer type /// /// \returns a member pointer type, if successful, or a NULL type if there was /// an error. QualType Sema::BuildMemberPointerType(QualType T, QualType Class, SourceLocation Loc, DeclarationName Entity) { // Verify that we're not building a pointer to pointer to function with // exception specification. if (CheckDistantExceptionSpec(T)) { Diag(Loc, diag::err_distant_exception_spec); return QualType(); } // C++ 8.3.3p3: A pointer to member shall not point to ... a member // with reference type, or "cv void." if (T->isReferenceType()) { Diag(Loc, diag::err_illegal_decl_mempointer_to_reference) << getPrintableNameForEntity(Entity) << T; return QualType(); } if (T->isVoidType()) { Diag(Loc, diag::err_illegal_decl_mempointer_to_void) << getPrintableNameForEntity(Entity); return QualType(); } if (!Class->isDependentType() && !Class->isRecordType()) { Diag(Loc, diag::err_mempointer_in_nonclass_type) << Class; return QualType(); } // Adjust the default free function calling convention to the default method // calling convention. bool IsCtorOrDtor = (Entity.getNameKind() == DeclarationName::CXXConstructorName) || (Entity.getNameKind() == DeclarationName::CXXDestructorName); if (T->isFunctionType()) adjustMemberFunctionCC(T, /*IsStatic=*/false, IsCtorOrDtor, Loc); return Context.getMemberPointerType(T, Class.getTypePtr()); } /// \brief Build a block pointer type. /// /// \param T The type to which we'll be building a block pointer. /// /// \param Loc The source location, used for diagnostics. /// /// \param Entity The name of the entity that involves the block pointer /// type, if known. /// /// \returns A suitable block pointer type, if there are no /// errors. Otherwise, returns a NULL type. QualType Sema::BuildBlockPointerType(QualType T, SourceLocation Loc, DeclarationName Entity) { if (!T->isFunctionType()) { Diag(Loc, diag::err_nonfunction_block_type); return QualType(); } if (checkQualifiedFunction(*this, T, Loc, QFK_BlockPointer)) return QualType(); return Context.getBlockPointerType(T); } QualType Sema::GetTypeFromParser(ParsedType Ty, TypeSourceInfo **TInfo) { QualType QT = Ty.get(); if (QT.isNull()) { if (TInfo) *TInfo = nullptr; return QualType(); } TypeSourceInfo *DI = nullptr; if (const LocInfoType *LIT = dyn_cast(QT)) { QT = LIT->getType(); DI = LIT->getTypeSourceInfo(); } if (TInfo) *TInfo = DI; return QT; } static void transferARCOwnershipToDeclaratorChunk(TypeProcessingState &state, Qualifiers::ObjCLifetime ownership, unsigned chunkIndex); /// Given that this is the declaration of a parameter under ARC, /// attempt to infer attributes and such for pointer-to-whatever /// types. static void inferARCWriteback(TypeProcessingState &state, QualType &declSpecType) { Sema &S = state.getSema(); Declarator &declarator = state.getDeclarator(); // TODO: should we care about decl qualifiers? // Check whether the declarator has the expected form. We walk // from the inside out in order to make the block logic work. unsigned outermostPointerIndex = 0; bool isBlockPointer = false; unsigned numPointers = 0; for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) { unsigned chunkIndex = i; DeclaratorChunk &chunk = declarator.getTypeObject(chunkIndex); switch (chunk.Kind) { case DeclaratorChunk::Paren: // Ignore parens. break; case DeclaratorChunk::Reference: case DeclaratorChunk::Pointer: // Count the number of pointers. Treat references // interchangeably as pointers; if they're mis-ordered, normal // type building will discover that. outermostPointerIndex = chunkIndex; numPointers++; break; case DeclaratorChunk::BlockPointer: // If we have a pointer to block pointer, that's an acceptable // indirect reference; anything else is not an application of // the rules. if (numPointers != 1) return; numPointers++; outermostPointerIndex = chunkIndex; isBlockPointer = true; // We don't care about pointer structure in return values here. goto done; case DeclaratorChunk::Array: // suppress if written (id[])? case DeclaratorChunk::Function: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: return; } } done: // If we have *one* pointer, then we want to throw the qualifier on // the declaration-specifiers, which means that it needs to be a // retainable object type. if (numPointers == 1) { // If it's not a retainable object type, the rule doesn't apply. if (!declSpecType->isObjCRetainableType()) return; // If it already has lifetime, don't do anything. if (declSpecType.getObjCLifetime()) return; // Otherwise, modify the type in-place. Qualifiers qs; if (declSpecType->isObjCARCImplicitlyUnretainedType()) qs.addObjCLifetime(Qualifiers::OCL_ExplicitNone); else qs.addObjCLifetime(Qualifiers::OCL_Autoreleasing); declSpecType = S.Context.getQualifiedType(declSpecType, qs); // If we have *two* pointers, then we want to throw the qualifier on // the outermost pointer. } else if (numPointers == 2) { // If we don't have a block pointer, we need to check whether the // declaration-specifiers gave us something that will turn into a // retainable object pointer after we slap the first pointer on it. if (!isBlockPointer && !declSpecType->isObjCObjectType()) return; // Look for an explicit lifetime attribute there. DeclaratorChunk &chunk = declarator.getTypeObject(outermostPointerIndex); if (chunk.Kind != DeclaratorChunk::Pointer && chunk.Kind != DeclaratorChunk::BlockPointer) return; for (const AttributeList *attr = chunk.getAttrs(); attr; attr = attr->getNext()) if (attr->getKind() == AttributeList::AT_ObjCOwnership) return; transferARCOwnershipToDeclaratorChunk(state, Qualifiers::OCL_Autoreleasing, outermostPointerIndex); // Any other number of pointers/references does not trigger the rule. } else return; // TODO: mark whether we did this inference? } void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals, SourceLocation FallbackLoc, SourceLocation ConstQualLoc, SourceLocation VolatileQualLoc, SourceLocation RestrictQualLoc, SourceLocation AtomicQualLoc, SourceLocation UnalignedQualLoc) { if (!Quals) return; struct Qual { const char *Name; unsigned Mask; SourceLocation Loc; } const QualKinds[5] = { { "const", DeclSpec::TQ_const, ConstQualLoc }, { "volatile", DeclSpec::TQ_volatile, VolatileQualLoc }, { "restrict", DeclSpec::TQ_restrict, RestrictQualLoc }, { "__unaligned", DeclSpec::TQ_unaligned, UnalignedQualLoc }, { "_Atomic", DeclSpec::TQ_atomic, AtomicQualLoc } }; SmallString<32> QualStr; unsigned NumQuals = 0; SourceLocation Loc; FixItHint FixIts[5]; // Build a string naming the redundant qualifiers. for (auto &E : QualKinds) { if (Quals & E.Mask) { if (!QualStr.empty()) QualStr += ' '; QualStr += E.Name; // If we have a location for the qualifier, offer a fixit. SourceLocation QualLoc = E.Loc; if (QualLoc.isValid()) { FixIts[NumQuals] = FixItHint::CreateRemoval(QualLoc); if (Loc.isInvalid() || getSourceManager().isBeforeInTranslationUnit(QualLoc, Loc)) Loc = QualLoc; } ++NumQuals; } } Diag(Loc.isInvalid() ? FallbackLoc : Loc, DiagID) << QualStr << NumQuals << FixIts[0] << FixIts[1] << FixIts[2] << FixIts[3]; } // Diagnose pointless type qualifiers on the return type of a function. static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy, Declarator &D, unsigned FunctionChunkIndex) { if (D.getTypeObject(FunctionChunkIndex).Fun.hasTrailingReturnType()) { // FIXME: TypeSourceInfo doesn't preserve location information for // qualifiers. S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type, RetTy.getLocalCVRQualifiers(), D.getIdentifierLoc()); return; } for (unsigned OuterChunkIndex = FunctionChunkIndex + 1, End = D.getNumTypeObjects(); OuterChunkIndex != End; ++OuterChunkIndex) { DeclaratorChunk &OuterChunk = D.getTypeObject(OuterChunkIndex); switch (OuterChunk.Kind) { case DeclaratorChunk::Paren: continue; case DeclaratorChunk::Pointer: { DeclaratorChunk::PointerTypeInfo &PTI = OuterChunk.Ptr; S.diagnoseIgnoredQualifiers( diag::warn_qual_return_type, PTI.TypeQuals, SourceLocation(), SourceLocation::getFromRawEncoding(PTI.ConstQualLoc), SourceLocation::getFromRawEncoding(PTI.VolatileQualLoc), SourceLocation::getFromRawEncoding(PTI.RestrictQualLoc), SourceLocation::getFromRawEncoding(PTI.AtomicQualLoc), SourceLocation::getFromRawEncoding(PTI.UnalignedQualLoc)); return; } case DeclaratorChunk::Function: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Reference: case DeclaratorChunk::Array: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: // FIXME: We can't currently provide an accurate source location and a // fix-it hint for these. unsigned AtomicQual = RetTy->isAtomicType() ? DeclSpec::TQ_atomic : 0; S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type, RetTy.getCVRQualifiers() | AtomicQual, D.getIdentifierLoc()); return; } llvm_unreachable("unknown declarator chunk kind"); } // If the qualifiers come from a conversion function type, don't diagnose // them -- they're not necessarily redundant, since such a conversion // operator can be explicitly called as "x.operator const int()". if (D.getName().getKind() == UnqualifiedId::IK_ConversionFunctionId) return; // Just parens all the way out to the decl specifiers. Diagnose any qualifiers // which are present there. S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type, D.getDeclSpec().getTypeQualifiers(), D.getIdentifierLoc(), D.getDeclSpec().getConstSpecLoc(), D.getDeclSpec().getVolatileSpecLoc(), D.getDeclSpec().getRestrictSpecLoc(), D.getDeclSpec().getAtomicSpecLoc(), D.getDeclSpec().getUnalignedSpecLoc()); } static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, TypeSourceInfo *&ReturnTypeInfo) { Sema &SemaRef = state.getSema(); Declarator &D = state.getDeclarator(); QualType T; ReturnTypeInfo = nullptr; // The TagDecl owned by the DeclSpec. TagDecl *OwnedTagDecl = nullptr; switch (D.getName().getKind()) { case UnqualifiedId::IK_ImplicitSelfParam: case UnqualifiedId::IK_OperatorFunctionId: case UnqualifiedId::IK_Identifier: case UnqualifiedId::IK_LiteralOperatorId: case UnqualifiedId::IK_TemplateId: T = ConvertDeclSpecToType(state); if (!D.isInvalidType() && D.getDeclSpec().isTypeSpecOwned()) { OwnedTagDecl = cast(D.getDeclSpec().getRepAsDecl()); // Owned declaration is embedded in declarator. OwnedTagDecl->setEmbeddedInDeclarator(true); } break; case UnqualifiedId::IK_ConstructorName: case UnqualifiedId::IK_ConstructorTemplateId: case UnqualifiedId::IK_DestructorName: // Constructors and destructors don't have return types. Use // "void" instead. T = SemaRef.Context.VoidTy; processTypeAttrs(state, T, TAL_DeclSpec, D.getDeclSpec().getAttributes().getList()); break; case UnqualifiedId::IK_ConversionFunctionId: // The result type of a conversion function is the type that it // converts to. T = SemaRef.GetTypeFromParser(D.getName().ConversionFunctionId, &ReturnTypeInfo); break; } if (D.getAttributes()) distributeTypeAttrsFromDeclarator(state, T); // C++11 [dcl.spec.auto]p5: reject 'auto' if it is not in an allowed context. if (D.getDeclSpec().containsPlaceholderType()) { int Error = -1; switch (D.getContext()) { case Declarator::LambdaExprContext: llvm_unreachable("Can't specify a type specifier in lambda grammar"); case Declarator::ObjCParameterContext: case Declarator::ObjCResultContext: case Declarator::PrototypeContext: Error = 0; break; case Declarator::LambdaExprParameterContext: // In C++14, generic lambdas allow 'auto' in their parameters. if (!(SemaRef.getLangOpts().CPlusPlus14 && D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto)) Error = 16; break; case Declarator::MemberContext: { if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static || D.isFunctionDeclarator()) break; bool Cxx = SemaRef.getLangOpts().CPlusPlus; switch (cast(SemaRef.CurContext)->getTagKind()) { case TTK_Enum: llvm_unreachable("unhandled tag kind"); case TTK_Struct: Error = Cxx ? 1 : 2; /* Struct member */ break; case TTK_Union: Error = Cxx ? 3 : 4; /* Union member */ break; case TTK_Class: Error = 5; /* Class member */ break; case TTK_Interface: Error = 6; /* Interface member */ break; } break; } case Declarator::CXXCatchContext: case Declarator::ObjCCatchContext: Error = 7; // Exception declaration break; case Declarator::TemplateParamContext: if (!SemaRef.getLangOpts().CPlusPlus1z) Error = 8; // Template parameter break; case Declarator::BlockLiteralContext: Error = 9; // Block literal break; case Declarator::TemplateTypeArgContext: Error = 10; // Template type argument break; case Declarator::AliasDeclContext: case Declarator::AliasTemplateContext: Error = 12; // Type alias break; case Declarator::TrailingReturnContext: if (!SemaRef.getLangOpts().CPlusPlus14 || D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type) Error = 13; // Function return type break; case Declarator::ConversionIdContext: if (!SemaRef.getLangOpts().CPlusPlus14 || D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type) Error = 14; // conversion-type-id break; case Declarator::TypeNameContext: Error = 15; // Generic break; case Declarator::FileContext: case Declarator::BlockContext: case Declarator::ForContext: case Declarator::InitStmtContext: case Declarator::ConditionContext: break; case Declarator::CXXNewContext: if (D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type) Error = 17; // 'new' type break; case Declarator::KNRTypeListContext: Error = 18; // K&R function parameter break; } if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef) Error = 11; // In Objective-C it is an error to use 'auto' on a function declarator // (and everywhere for '__auto_type'). if (D.isFunctionDeclarator() && (!SemaRef.getLangOpts().CPlusPlus11 || D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type)) Error = 13; bool HaveTrailing = false; // C++11 [dcl.spec.auto]p2: 'auto' is always fine if the declarator // contains a trailing return type. That is only legal at the outermost // level. Check all declarator chunks (outermost first) anyway, to give // better diagnostics. // We don't support '__auto_type' with trailing return types. if (SemaRef.getLangOpts().CPlusPlus11 && D.getDeclSpec().getTypeSpecType() != DeclSpec::TST_auto_type) { for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { unsigned chunkIndex = e - i - 1; state.setCurrentChunkIndex(chunkIndex); DeclaratorChunk &DeclType = D.getTypeObject(chunkIndex); if (DeclType.Kind == DeclaratorChunk::Function) { const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun; if (FTI.hasTrailingReturnType()) { HaveTrailing = true; Error = -1; break; } } } } SourceRange AutoRange = D.getDeclSpec().getTypeSpecTypeLoc(); if (D.getName().getKind() == UnqualifiedId::IK_ConversionFunctionId) AutoRange = D.getName().getSourceRange(); if (Error != -1) { unsigned Keyword; switch (D.getDeclSpec().getTypeSpecType()) { case DeclSpec::TST_auto: Keyword = 0; break; case DeclSpec::TST_decltype_auto: Keyword = 1; break; case DeclSpec::TST_auto_type: Keyword = 2; break; default: llvm_unreachable("unknown auto TypeSpecType"); } SemaRef.Diag(AutoRange.getBegin(), diag::err_auto_not_allowed) << Keyword << Error << AutoRange; T = SemaRef.Context.IntTy; D.setInvalidType(true); } else if (!HaveTrailing) { // If there was a trailing return type, we already got // warn_cxx98_compat_trailing_return_type in the parser. SemaRef.Diag(AutoRange.getBegin(), diag::warn_cxx98_compat_auto_type_specifier) << AutoRange; } } if (SemaRef.getLangOpts().CPlusPlus && OwnedTagDecl && OwnedTagDecl->isCompleteDefinition()) { // Check the contexts where C++ forbids the declaration of a new class // or enumeration in a type-specifier-seq. unsigned DiagID = 0; switch (D.getContext()) { case Declarator::TrailingReturnContext: // Class and enumeration definitions are syntactically not allowed in // trailing return types. llvm_unreachable("parser should not have allowed this"); break; case Declarator::FileContext: case Declarator::MemberContext: case Declarator::BlockContext: case Declarator::ForContext: case Declarator::InitStmtContext: case Declarator::BlockLiteralContext: case Declarator::LambdaExprContext: // C++11 [dcl.type]p3: // A type-specifier-seq shall not define a class or enumeration unless // it appears in the type-id of an alias-declaration (7.1.3) that is not // the declaration of a template-declaration. case Declarator::AliasDeclContext: break; case Declarator::AliasTemplateContext: DiagID = diag::err_type_defined_in_alias_template; break; case Declarator::TypeNameContext: case Declarator::ConversionIdContext: case Declarator::TemplateParamContext: case Declarator::CXXNewContext: case Declarator::CXXCatchContext: case Declarator::ObjCCatchContext: case Declarator::TemplateTypeArgContext: DiagID = diag::err_type_defined_in_type_specifier; break; case Declarator::PrototypeContext: case Declarator::LambdaExprParameterContext: case Declarator::ObjCParameterContext: case Declarator::ObjCResultContext: case Declarator::KNRTypeListContext: // C++ [dcl.fct]p6: // Types shall not be defined in return or parameter types. DiagID = diag::err_type_defined_in_param_type; break; case Declarator::ConditionContext: // C++ 6.4p2: // The type-specifier-seq shall not contain typedef and shall not declare // a new class or enumeration. DiagID = diag::err_type_defined_in_condition; break; } if (DiagID != 0) { SemaRef.Diag(OwnedTagDecl->getLocation(), DiagID) << SemaRef.Context.getTypeDeclType(OwnedTagDecl); D.setInvalidType(true); } } assert(!T.isNull() && "This function should not return a null type"); return T; } /// Produce an appropriate diagnostic for an ambiguity between a function /// declarator and a C++ direct-initializer. static void warnAboutAmbiguousFunction(Sema &S, Declarator &D, DeclaratorChunk &DeclType, QualType RT) { const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun; assert(FTI.isAmbiguous && "no direct-initializer / function ambiguity"); // If the return type is void there is no ambiguity. if (RT->isVoidType()) return; // An initializer for a non-class type can have at most one argument. if (!RT->isRecordType() && FTI.NumParams > 1) return; // An initializer for a reference must have exactly one argument. if (RT->isReferenceType() && FTI.NumParams != 1) return; // Only warn if this declarator is declaring a function at block scope, and // doesn't have a storage class (such as 'extern') specified. if (!D.isFunctionDeclarator() || D.getFunctionDefinitionKind() != FDK_Declaration || !S.CurContext->isFunctionOrMethod() || D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_unspecified) return; // Inside a condition, a direct initializer is not permitted. We allow one to // be parsed in order to give better diagnostics in condition parsing. if (D.getContext() == Declarator::ConditionContext) return; SourceRange ParenRange(DeclType.Loc, DeclType.EndLoc); S.Diag(DeclType.Loc, FTI.NumParams ? diag::warn_parens_disambiguated_as_function_declaration : diag::warn_empty_parens_are_function_decl) << ParenRange; // If the declaration looks like: // T var1, // f(); // and name lookup finds a function named 'f', then the ',' was // probably intended to be a ';'. if (!D.isFirstDeclarator() && D.getIdentifier()) { FullSourceLoc Comma(D.getCommaLoc(), S.SourceMgr); FullSourceLoc Name(D.getIdentifierLoc(), S.SourceMgr); if (Comma.getFileID() != Name.getFileID() || Comma.getSpellingLineNumber() != Name.getSpellingLineNumber()) { LookupResult Result(S, D.getIdentifier(), SourceLocation(), Sema::LookupOrdinaryName); if (S.LookupName(Result, S.getCurScope())) S.Diag(D.getCommaLoc(), diag::note_empty_parens_function_call) << FixItHint::CreateReplacement(D.getCommaLoc(), ";") << D.getIdentifier(); } } if (FTI.NumParams > 0) { // For a declaration with parameters, eg. "T var(T());", suggest adding // parens around the first parameter to turn the declaration into a // variable declaration. SourceRange Range = FTI.Params[0].Param->getSourceRange(); SourceLocation B = Range.getBegin(); SourceLocation E = S.getLocForEndOfToken(Range.getEnd()); // FIXME: Maybe we should suggest adding braces instead of parens // in C++11 for classes that don't have an initializer_list constructor. S.Diag(B, diag::note_additional_parens_for_variable_declaration) << FixItHint::CreateInsertion(B, "(") << FixItHint::CreateInsertion(E, ")"); } else { // For a declaration without parameters, eg. "T var();", suggest replacing // the parens with an initializer to turn the declaration into a variable // declaration. const CXXRecordDecl *RD = RT->getAsCXXRecordDecl(); // Empty parens mean value-initialization, and no parens mean // default initialization. These are equivalent if the default // constructor is user-provided or if zero-initialization is a // no-op. if (RD && RD->hasDefinition() && (RD->isEmpty() || RD->hasUserProvidedDefaultConstructor())) S.Diag(DeclType.Loc, diag::note_empty_parens_default_ctor) << FixItHint::CreateRemoval(ParenRange); else { std::string Init = S.getFixItZeroInitializerForType(RT, ParenRange.getBegin()); if (Init.empty() && S.LangOpts.CPlusPlus11) Init = "{}"; if (!Init.empty()) S.Diag(DeclType.Loc, diag::note_empty_parens_zero_initialize) << FixItHint::CreateReplacement(ParenRange, Init); } } } /// Helper for figuring out the default CC for a function declarator type. If /// this is the outermost chunk, then we can determine the CC from the /// declarator context. If not, then this could be either a member function /// type or normal function type. static CallingConv getCCForDeclaratorChunk(Sema &S, Declarator &D, const DeclaratorChunk::FunctionTypeInfo &FTI, unsigned ChunkIndex) { assert(D.getTypeObject(ChunkIndex).Kind == DeclaratorChunk::Function); // Check for an explicit CC attribute. for (auto Attr = FTI.AttrList; Attr; Attr = Attr->getNext()) { switch (Attr->getKind()) { CALLING_CONV_ATTRS_CASELIST: { // Ignore attributes that don't validate or can't apply to the // function type. We'll diagnose the failure to apply them in // handleFunctionTypeAttr. CallingConv CC; if (!S.CheckCallingConvAttr(*Attr, CC) && (!FTI.isVariadic || supportsVariadicCall(CC))) { return CC; } break; } default: break; } } bool IsCXXInstanceMethod = false; if (S.getLangOpts().CPlusPlus) { // Look inwards through parentheses to see if this chunk will form a // member pointer type or if we're the declarator. Any type attributes // between here and there will override the CC we choose here. unsigned I = ChunkIndex; bool FoundNonParen = false; while (I && !FoundNonParen) { --I; if (D.getTypeObject(I).Kind != DeclaratorChunk::Paren) FoundNonParen = true; } if (FoundNonParen) { // If we're not the declarator, we're a regular function type unless we're // in a member pointer. IsCXXInstanceMethod = D.getTypeObject(I).Kind == DeclaratorChunk::MemberPointer; } else if (D.getContext() == Declarator::LambdaExprContext) { // This can only be a call operator for a lambda, which is an instance // method. IsCXXInstanceMethod = true; } else { // We're the innermost decl chunk, so must be a function declarator. assert(D.isFunctionDeclarator()); // If we're inside a record, we're declaring a method, but it could be // explicitly or implicitly static. IsCXXInstanceMethod = D.isFirstDeclarationOfMember() && D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef && !D.isStaticMember(); } } CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); // Attribute AT_OpenCLKernel affects the calling convention for SPIR // and AMDGPU targets, hence it cannot be treated as a calling // convention attribute. This is the simplest place to infer // calling convention for OpenCL kernels. if (S.getLangOpts().OpenCL) { for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList(); Attr; Attr = Attr->getNext()) { if (Attr->getKind() == AttributeList::AT_OpenCLKernel) { llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch(); if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64 || - arch == llvm::Triple::amdgcn) { + arch == llvm::Triple::amdgcn || arch == llvm::Triple::r600) { CC = CC_OpenCLKernel; } break; } } } return CC; } namespace { /// A simple notion of pointer kinds, which matches up with the various /// pointer declarators. enum class SimplePointerKind { Pointer, BlockPointer, MemberPointer, Array, }; } // end anonymous namespace IdentifierInfo *Sema::getNullabilityKeyword(NullabilityKind nullability) { switch (nullability) { case NullabilityKind::NonNull: if (!Ident__Nonnull) Ident__Nonnull = PP.getIdentifierInfo("_Nonnull"); return Ident__Nonnull; case NullabilityKind::Nullable: if (!Ident__Nullable) Ident__Nullable = PP.getIdentifierInfo("_Nullable"); return Ident__Nullable; case NullabilityKind::Unspecified: if (!Ident__Null_unspecified) Ident__Null_unspecified = PP.getIdentifierInfo("_Null_unspecified"); return Ident__Null_unspecified; } llvm_unreachable("Unknown nullability kind."); } /// Retrieve the identifier "NSError". IdentifierInfo *Sema::getNSErrorIdent() { if (!Ident_NSError) Ident_NSError = PP.getIdentifierInfo("NSError"); return Ident_NSError; } /// Check whether there is a nullability attribute of any kind in the given /// attribute list. static bool hasNullabilityAttr(const AttributeList *attrs) { for (const AttributeList *attr = attrs; attr; attr = attr->getNext()) { if (attr->getKind() == AttributeList::AT_TypeNonNull || attr->getKind() == AttributeList::AT_TypeNullable || attr->getKind() == AttributeList::AT_TypeNullUnspecified) return true; } return false; } namespace { /// Describes the kind of a pointer a declarator describes. enum class PointerDeclaratorKind { // Not a pointer. NonPointer, // Single-level pointer. SingleLevelPointer, // Multi-level pointer (of any pointer kind). MultiLevelPointer, // CFFooRef* MaybePointerToCFRef, // CFErrorRef* CFErrorRefPointer, // NSError** NSErrorPointerPointer, }; /// Describes a declarator chunk wrapping a pointer that marks inference as /// unexpected. // These values must be kept in sync with diagnostics. enum class PointerWrappingDeclaratorKind { /// Pointer is top-level. None = -1, /// Pointer is an array element. Array = 0, /// Pointer is the referent type of a C++ reference. Reference = 1 }; } // end anonymous namespace /// Classify the given declarator, whose type-specified is \c type, based on /// what kind of pointer it refers to. /// /// This is used to determine the default nullability. static PointerDeclaratorKind classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, PointerWrappingDeclaratorKind &wrappingKind) { unsigned numNormalPointers = 0; // For any dependent type, we consider it a non-pointer. if (type->isDependentType()) return PointerDeclaratorKind::NonPointer; // Look through the declarator chunks to identify pointers. for (unsigned i = 0, n = declarator.getNumTypeObjects(); i != n; ++i) { DeclaratorChunk &chunk = declarator.getTypeObject(i); switch (chunk.Kind) { case DeclaratorChunk::Array: if (numNormalPointers == 0) wrappingKind = PointerWrappingDeclaratorKind::Array; break; case DeclaratorChunk::Function: case DeclaratorChunk::Pipe: break; case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer : PointerDeclaratorKind::SingleLevelPointer; case DeclaratorChunk::Paren: break; case DeclaratorChunk::Reference: if (numNormalPointers == 0) wrappingKind = PointerWrappingDeclaratorKind::Reference; break; case DeclaratorChunk::Pointer: ++numNormalPointers; if (numNormalPointers > 2) return PointerDeclaratorKind::MultiLevelPointer; break; } } // Then, dig into the type specifier itself. unsigned numTypeSpecifierPointers = 0; do { // Decompose normal pointers. if (auto ptrType = type->getAs()) { ++numNormalPointers; if (numNormalPointers > 2) return PointerDeclaratorKind::MultiLevelPointer; type = ptrType->getPointeeType(); ++numTypeSpecifierPointers; continue; } // Decompose block pointers. if (type->getAs()) { return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer : PointerDeclaratorKind::SingleLevelPointer; } // Decompose member pointers. if (type->getAs()) { return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer : PointerDeclaratorKind::SingleLevelPointer; } // Look at Objective-C object pointers. if (auto objcObjectPtr = type->getAs()) { ++numNormalPointers; ++numTypeSpecifierPointers; // If this is NSError**, report that. if (auto objcClassDecl = objcObjectPtr->getInterfaceDecl()) { if (objcClassDecl->getIdentifier() == S.getNSErrorIdent() && numNormalPointers == 2 && numTypeSpecifierPointers < 2) { return PointerDeclaratorKind::NSErrorPointerPointer; } } break; } // Look at Objective-C class types. if (auto objcClass = type->getAs()) { if (objcClass->getInterface()->getIdentifier() == S.getNSErrorIdent()) { if (numNormalPointers == 2 && numTypeSpecifierPointers < 2) return PointerDeclaratorKind::NSErrorPointerPointer;; } break; } // If at this point we haven't seen a pointer, we won't see one. if (numNormalPointers == 0) return PointerDeclaratorKind::NonPointer; if (auto recordType = type->getAs()) { RecordDecl *recordDecl = recordType->getDecl(); bool isCFError = false; if (S.CFError) { // If we already know about CFError, test it directly. isCFError = (S.CFError == recordDecl); } else { // Check whether this is CFError, which we identify based on its bridge // to NSError. if (recordDecl->getTagKind() == TTK_Struct && numNormalPointers > 0) { if (auto bridgeAttr = recordDecl->getAttr()) { if (bridgeAttr->getBridgedType() == S.getNSErrorIdent()) { S.CFError = recordDecl; isCFError = true; } } } } // If this is CFErrorRef*, report it as such. if (isCFError && numNormalPointers == 2 && numTypeSpecifierPointers < 2) { return PointerDeclaratorKind::CFErrorRefPointer; } break; } break; } while (true); switch (numNormalPointers) { case 0: return PointerDeclaratorKind::NonPointer; case 1: return PointerDeclaratorKind::SingleLevelPointer; case 2: return PointerDeclaratorKind::MaybePointerToCFRef; default: return PointerDeclaratorKind::MultiLevelPointer; } } static FileID getNullabilityCompletenessCheckFileID(Sema &S, SourceLocation loc) { // If we're anywhere in a function, method, or closure context, don't perform // completeness checks. for (DeclContext *ctx = S.CurContext; ctx; ctx = ctx->getParent()) { if (ctx->isFunctionOrMethod()) return FileID(); if (ctx->isFileContext()) break; } // We only care about the expansion location. loc = S.SourceMgr.getExpansionLoc(loc); FileID file = S.SourceMgr.getFileID(loc); if (file.isInvalid()) return FileID(); // Retrieve file information. bool invalid = false; const SrcMgr::SLocEntry &sloc = S.SourceMgr.getSLocEntry(file, &invalid); if (invalid || !sloc.isFile()) return FileID(); // We don't want to perform completeness checks on the main file or in // system headers. const SrcMgr::FileInfo &fileInfo = sloc.getFile(); if (fileInfo.getIncludeLoc().isInvalid()) return FileID(); if (fileInfo.getFileCharacteristic() != SrcMgr::C_User && S.Diags.getSuppressSystemWarnings()) { return FileID(); } return file; } /// Creates a fix-it to insert a C-style nullability keyword at \p pointerLoc, /// taking into account whitespace before and after. static void fixItNullability(Sema &S, DiagnosticBuilder &Diag, SourceLocation PointerLoc, NullabilityKind Nullability) { assert(PointerLoc.isValid()); if (PointerLoc.isMacroID()) return; SourceLocation FixItLoc = S.getLocForEndOfToken(PointerLoc); if (!FixItLoc.isValid() || FixItLoc == PointerLoc) return; const char *NextChar = S.SourceMgr.getCharacterData(FixItLoc); if (!NextChar) return; SmallString<32> InsertionTextBuf{" "}; InsertionTextBuf += getNullabilitySpelling(Nullability); InsertionTextBuf += " "; StringRef InsertionText = InsertionTextBuf.str(); if (isWhitespace(*NextChar)) { InsertionText = InsertionText.drop_back(); } else if (NextChar[-1] == '[') { if (NextChar[0] == ']') InsertionText = InsertionText.drop_back().drop_front(); else InsertionText = InsertionText.drop_front(); } else if (!isIdentifierBody(NextChar[0], /*allow dollar*/true) && !isIdentifierBody(NextChar[-1], /*allow dollar*/true)) { InsertionText = InsertionText.drop_back().drop_front(); } Diag << FixItHint::CreateInsertion(FixItLoc, InsertionText); } static void emitNullabilityConsistencyWarning(Sema &S, SimplePointerKind PointerKind, SourceLocation PointerLoc) { assert(PointerLoc.isValid()); if (PointerKind == SimplePointerKind::Array) { S.Diag(PointerLoc, diag::warn_nullability_missing_array); } else { S.Diag(PointerLoc, diag::warn_nullability_missing) << static_cast(PointerKind); } if (PointerLoc.isMacroID()) return; auto addFixIt = [&](NullabilityKind Nullability) { auto Diag = S.Diag(PointerLoc, diag::note_nullability_fix_it); Diag << static_cast(Nullability); Diag << static_cast(PointerKind); fixItNullability(S, Diag, PointerLoc, Nullability); }; addFixIt(NullabilityKind::Nullable); addFixIt(NullabilityKind::NonNull); } /// Complains about missing nullability if the file containing \p pointerLoc /// has other uses of nullability (either the keywords or the \c assume_nonnull /// pragma). /// /// If the file has \e not seen other uses of nullability, this particular /// pointer is saved for possible later diagnosis. See recordNullabilitySeen(). static void checkNullabilityConsistency(Sema &S, SimplePointerKind pointerKind, SourceLocation pointerLoc) { // Determine which file we're performing consistency checking for. FileID file = getNullabilityCompletenessCheckFileID(S, pointerLoc); if (file.isInvalid()) return; // If we haven't seen any type nullability in this file, we won't warn now // about anything. FileNullability &fileNullability = S.NullabilityMap[file]; if (!fileNullability.SawTypeNullability) { // If this is the first pointer declarator in the file, and the appropriate // warning is on, record it in case we need to diagnose it retroactively. diag::kind diagKind; if (pointerKind == SimplePointerKind::Array) diagKind = diag::warn_nullability_missing_array; else diagKind = diag::warn_nullability_missing; if (fileNullability.PointerLoc.isInvalid() && !S.Context.getDiagnostics().isIgnored(diagKind, pointerLoc)) { fileNullability.PointerLoc = pointerLoc; fileNullability.PointerKind = static_cast(pointerKind); } return; } // Complain about missing nullability. emitNullabilityConsistencyWarning(S, pointerKind, pointerLoc); } /// Marks that a nullability feature has been used in the file containing /// \p loc. /// /// If this file already had pointer types in it that were missing nullability, /// the first such instance is retroactively diagnosed. /// /// \sa checkNullabilityConsistency static void recordNullabilitySeen(Sema &S, SourceLocation loc) { FileID file = getNullabilityCompletenessCheckFileID(S, loc); if (file.isInvalid()) return; FileNullability &fileNullability = S.NullabilityMap[file]; if (fileNullability.SawTypeNullability) return; fileNullability.SawTypeNullability = true; // If we haven't seen any type nullability before, now we have. Retroactively // diagnose the first unannotated pointer, if there was one. if (fileNullability.PointerLoc.isInvalid()) return; auto kind = static_cast(fileNullability.PointerKind); emitNullabilityConsistencyWarning(S, kind, fileNullability.PointerLoc); } /// Returns true if any of the declarator chunks before \p endIndex include a /// level of indirection: array, pointer, reference, or pointer-to-member. /// /// Because declarator chunks are stored in outer-to-inner order, testing /// every chunk before \p endIndex is testing all chunks that embed the current /// chunk as part of their type. /// /// It is legal to pass the result of Declarator::getNumTypeObjects() as the /// end index, in which case all chunks are tested. static bool hasOuterPointerLikeChunk(const Declarator &D, unsigned endIndex) { unsigned i = endIndex; while (i != 0) { // Walk outwards along the declarator chunks. --i; const DeclaratorChunk &DC = D.getTypeObject(i); switch (DC.Kind) { case DeclaratorChunk::Paren: break; case DeclaratorChunk::Array: case DeclaratorChunk::Pointer: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: return true; case DeclaratorChunk::Function: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Pipe: // These are invalid anyway, so just ignore. break; } } return false; } static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, QualType declSpecType, TypeSourceInfo *TInfo) { // The TypeSourceInfo that this function returns will not be a null type. // If there is an error, this function will fill in a dummy type as fallback. QualType T = declSpecType; Declarator &D = state.getDeclarator(); Sema &S = state.getSema(); ASTContext &Context = S.Context; const LangOptions &LangOpts = S.getLangOpts(); // The name we're declaring, if any. DeclarationName Name; if (D.getIdentifier()) Name = D.getIdentifier(); // Does this declaration declare a typedef-name? bool IsTypedefName = D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef || D.getContext() == Declarator::AliasDeclContext || D.getContext() == Declarator::AliasTemplateContext; // Does T refer to a function type with a cv-qualifier or a ref-qualifier? bool IsQualifiedFunction = T->isFunctionProtoType() && (T->castAs()->getTypeQuals() != 0 || T->castAs()->getRefQualifier() != RQ_None); // If T is 'decltype(auto)', the only declarators we can have are parens // and at most one function declarator if this is a function declaration. if (const AutoType *AT = T->getAs()) { if (AT->isDecltypeAuto()) { for (unsigned I = 0, E = D.getNumTypeObjects(); I != E; ++I) { unsigned Index = E - I - 1; DeclaratorChunk &DeclChunk = D.getTypeObject(Index); unsigned DiagId = diag::err_decltype_auto_compound_type; unsigned DiagKind = 0; switch (DeclChunk.Kind) { case DeclaratorChunk::Paren: continue; case DeclaratorChunk::Function: { unsigned FnIndex; if (D.isFunctionDeclarationContext() && D.isFunctionDeclarator(FnIndex) && FnIndex == Index) continue; DiagId = diag::err_decltype_auto_function_declarator_not_declaration; break; } case DeclaratorChunk::Pointer: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: DiagKind = 0; break; case DeclaratorChunk::Reference: DiagKind = 1; break; case DeclaratorChunk::Array: DiagKind = 2; break; case DeclaratorChunk::Pipe: break; } S.Diag(DeclChunk.Loc, DiagId) << DiagKind; D.setInvalidType(true); break; } } } // Determine whether we should infer _Nonnull on pointer types. Optional inferNullability; bool inferNullabilityCS = false; bool inferNullabilityInnerOnly = false; bool inferNullabilityInnerOnlyComplete = false; // Are we in an assume-nonnull region? bool inAssumeNonNullRegion = false; SourceLocation assumeNonNullLoc = S.PP.getPragmaAssumeNonNullLoc(); if (assumeNonNullLoc.isValid()) { inAssumeNonNullRegion = true; recordNullabilitySeen(S, assumeNonNullLoc); } // Whether to complain about missing nullability specifiers or not. enum { /// Never complain. CAMN_No, /// Complain on the inner pointers (but not the outermost /// pointer). CAMN_InnerPointers, /// Complain about any pointers that don't have nullability /// specified or inferred. CAMN_Yes } complainAboutMissingNullability = CAMN_No; unsigned NumPointersRemaining = 0; auto complainAboutInferringWithinChunk = PointerWrappingDeclaratorKind::None; if (IsTypedefName) { // For typedefs, we do not infer any nullability (the default), // and we only complain about missing nullability specifiers on // inner pointers. complainAboutMissingNullability = CAMN_InnerPointers; auto isDependentNonPointerType = [](QualType T) -> bool { // Note: This is intended to be the same check as Type::canHaveNullability // except with all of the ambiguous cases being treated as 'false' rather // than 'true'. return T->isDependentType() && !T->isAnyPointerType() && !T->isBlockPointerType() && !T->isMemberPointerType(); }; if (T->canHaveNullability() && !T->getNullability(S.Context) && !isDependentNonPointerType(T)) { // Note that we allow but don't require nullability on dependent types. ++NumPointersRemaining; } for (unsigned i = 0, n = D.getNumTypeObjects(); i != n; ++i) { DeclaratorChunk &chunk = D.getTypeObject(i); switch (chunk.Kind) { case DeclaratorChunk::Array: case DeclaratorChunk::Function: case DeclaratorChunk::Pipe: break; case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: ++NumPointersRemaining; break; case DeclaratorChunk::Paren: case DeclaratorChunk::Reference: continue; case DeclaratorChunk::Pointer: ++NumPointersRemaining; continue; } } } else { bool isFunctionOrMethod = false; switch (auto context = state.getDeclarator().getContext()) { case Declarator::ObjCParameterContext: case Declarator::ObjCResultContext: case Declarator::PrototypeContext: case Declarator::TrailingReturnContext: isFunctionOrMethod = true; // fallthrough case Declarator::MemberContext: if (state.getDeclarator().isObjCIvar() && !isFunctionOrMethod) { complainAboutMissingNullability = CAMN_No; break; } // Weak properties are inferred to be nullable. if (state.getDeclarator().isObjCWeakProperty() && inAssumeNonNullRegion) { inferNullability = NullabilityKind::Nullable; break; } // fallthrough case Declarator::FileContext: case Declarator::KNRTypeListContext: { complainAboutMissingNullability = CAMN_Yes; // Nullability inference depends on the type and declarator. auto wrappingKind = PointerWrappingDeclaratorKind::None; switch (classifyPointerDeclarator(S, T, D, wrappingKind)) { case PointerDeclaratorKind::NonPointer: case PointerDeclaratorKind::MultiLevelPointer: // Cannot infer nullability. break; case PointerDeclaratorKind::SingleLevelPointer: // Infer _Nonnull if we are in an assumes-nonnull region. if (inAssumeNonNullRegion) { complainAboutInferringWithinChunk = wrappingKind; inferNullability = NullabilityKind::NonNull; inferNullabilityCS = (context == Declarator::ObjCParameterContext || context == Declarator::ObjCResultContext); } break; case PointerDeclaratorKind::CFErrorRefPointer: case PointerDeclaratorKind::NSErrorPointerPointer: // Within a function or method signature, infer _Nullable at both // levels. if (isFunctionOrMethod && inAssumeNonNullRegion) inferNullability = NullabilityKind::Nullable; break; case PointerDeclaratorKind::MaybePointerToCFRef: if (isFunctionOrMethod) { // On pointer-to-pointer parameters marked cf_returns_retained or // cf_returns_not_retained, if the outer pointer is explicit then // infer the inner pointer as _Nullable. auto hasCFReturnsAttr = [](const AttributeList *NextAttr) -> bool { while (NextAttr) { if (NextAttr->getKind() == AttributeList::AT_CFReturnsRetained || NextAttr->getKind() == AttributeList::AT_CFReturnsNotRetained) return true; NextAttr = NextAttr->getNext(); } return false; }; if (const auto *InnermostChunk = D.getInnermostNonParenChunk()) { if (hasCFReturnsAttr(D.getAttributes()) || hasCFReturnsAttr(InnermostChunk->getAttrs()) || hasCFReturnsAttr(D.getDeclSpec().getAttributes().getList())) { inferNullability = NullabilityKind::Nullable; inferNullabilityInnerOnly = true; } } } break; } break; } case Declarator::ConversionIdContext: complainAboutMissingNullability = CAMN_Yes; break; case Declarator::AliasDeclContext: case Declarator::AliasTemplateContext: case Declarator::BlockContext: case Declarator::BlockLiteralContext: case Declarator::ConditionContext: case Declarator::CXXCatchContext: case Declarator::CXXNewContext: case Declarator::ForContext: case Declarator::InitStmtContext: case Declarator::LambdaExprContext: case Declarator::LambdaExprParameterContext: case Declarator::ObjCCatchContext: case Declarator::TemplateParamContext: case Declarator::TemplateTypeArgContext: case Declarator::TypeNameContext: // Don't infer in these contexts. break; } } // Local function that returns true if its argument looks like a va_list. auto isVaList = [&S](QualType T) -> bool { auto *typedefTy = T->getAs(); if (!typedefTy) return false; TypedefDecl *vaListTypedef = S.Context.getBuiltinVaListDecl(); do { if (typedefTy->getDecl() == vaListTypedef) return true; if (auto *name = typedefTy->getDecl()->getIdentifier()) if (name->isStr("va_list")) return true; typedefTy = typedefTy->desugar()->getAs(); } while (typedefTy); return false; }; // Local function that checks the nullability for a given pointer declarator. // Returns true if _Nonnull was inferred. auto inferPointerNullability = [&](SimplePointerKind pointerKind, SourceLocation pointerLoc, AttributeList *&attrs) -> AttributeList * { // We've seen a pointer. if (NumPointersRemaining > 0) --NumPointersRemaining; // If a nullability attribute is present, there's nothing to do. if (hasNullabilityAttr(attrs)) return nullptr; // If we're supposed to infer nullability, do so now. if (inferNullability && !inferNullabilityInnerOnlyComplete) { AttributeList::Syntax syntax = inferNullabilityCS ? AttributeList::AS_ContextSensitiveKeyword : AttributeList::AS_Keyword; AttributeList *nullabilityAttr = state.getDeclarator().getAttributePool() .create( S.getNullabilityKeyword( *inferNullability), SourceRange(pointerLoc), nullptr, SourceLocation(), nullptr, 0, syntax); spliceAttrIntoList(*nullabilityAttr, attrs); if (inferNullabilityCS) { state.getDeclarator().getMutableDeclSpec().getObjCQualifiers() ->setObjCDeclQualifier(ObjCDeclSpec::DQ_CSNullability); } if (pointerLoc.isValid() && complainAboutInferringWithinChunk != PointerWrappingDeclaratorKind::None) { auto Diag = S.Diag(pointerLoc, diag::warn_nullability_inferred_on_nested_type); Diag << static_cast(complainAboutInferringWithinChunk); fixItNullability(S, Diag, pointerLoc, NullabilityKind::NonNull); } if (inferNullabilityInnerOnly) inferNullabilityInnerOnlyComplete = true; return nullabilityAttr; } // If we're supposed to complain about missing nullability, do so // now if it's truly missing. switch (complainAboutMissingNullability) { case CAMN_No: break; case CAMN_InnerPointers: if (NumPointersRemaining == 0) break; // Fallthrough. case CAMN_Yes: checkNullabilityConsistency(S, pointerKind, pointerLoc); } return nullptr; }; // If the type itself could have nullability but does not, infer pointer // nullability and perform consistency checking. if (S.ActiveTemplateInstantiations.empty()) { if (T->canHaveNullability() && !T->getNullability(S.Context)) { if (isVaList(T)) { // Record that we've seen a pointer, but do nothing else. if (NumPointersRemaining > 0) --NumPointersRemaining; } else { SimplePointerKind pointerKind = SimplePointerKind::Pointer; if (T->isBlockPointerType()) pointerKind = SimplePointerKind::BlockPointer; else if (T->isMemberPointerType()) pointerKind = SimplePointerKind::MemberPointer; if (auto *attr = inferPointerNullability( pointerKind, D.getDeclSpec().getTypeSpecTypeLoc(), D.getMutableDeclSpec().getAttributes().getListRef())) { T = Context.getAttributedType( AttributedType::getNullabilityAttrKind(*inferNullability),T,T); attr->setUsedAsTypeAttr(); } } } if (complainAboutMissingNullability == CAMN_Yes && T->isArrayType() && !T->getNullability(S.Context) && !isVaList(T) && D.isPrototypeContext() && !hasOuterPointerLikeChunk(D, D.getNumTypeObjects())) { checkNullabilityConsistency(S, SimplePointerKind::Array, D.getDeclSpec().getTypeSpecTypeLoc()); } } // Walk the DeclTypeInfo, building the recursive type as we go. // DeclTypeInfos are ordered from the identifier out, which is // opposite of what we want :). for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { unsigned chunkIndex = e - i - 1; state.setCurrentChunkIndex(chunkIndex); DeclaratorChunk &DeclType = D.getTypeObject(chunkIndex); IsQualifiedFunction &= DeclType.Kind == DeclaratorChunk::Paren; switch (DeclType.Kind) { case DeclaratorChunk::Paren: T = S.BuildParenType(T); break; case DeclaratorChunk::BlockPointer: // If blocks are disabled, emit an error. if (!LangOpts.Blocks) S.Diag(DeclType.Loc, diag::err_blocks_disable) << LangOpts.OpenCL; // Handle pointer nullability. inferPointerNullability(SimplePointerKind::BlockPointer, DeclType.Loc, DeclType.getAttrListRef()); T = S.BuildBlockPointerType(T, D.getIdentifierLoc(), Name); if (DeclType.Cls.TypeQuals || LangOpts.OpenCL) { // OpenCL v2.0, s6.12.5 - Block variable declarations are implicitly // qualified with const. if (LangOpts.OpenCL) DeclType.Cls.TypeQuals |= DeclSpec::TQ_const; T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Cls.TypeQuals); } break; case DeclaratorChunk::Pointer: // Verify that we're not building a pointer to pointer to function with // exception specification. if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) { S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec); D.setInvalidType(true); // Build the type anyway. } // Handle pointer nullability inferPointerNullability(SimplePointerKind::Pointer, DeclType.Loc, DeclType.getAttrListRef()); if (LangOpts.ObjC1 && T->getAs()) { T = Context.getObjCObjectPointerType(T); if (DeclType.Ptr.TypeQuals) T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals); break; } // OpenCL v2.0 s6.9b - Pointer to image/sampler cannot be used. // OpenCL v2.0 s6.13.16.1 - Pointer to pipe cannot be used. // OpenCL v2.0 s6.12.5 - Pointers to Blocks are not allowed. if (LangOpts.OpenCL) { if (T->isImageType() || T->isSamplerT() || T->isPipeType() || T->isBlockPointerType()) { S.Diag(D.getIdentifierLoc(), diag::err_opencl_pointer_to_type) << T; D.setInvalidType(true); } } T = S.BuildPointerType(T, DeclType.Loc, Name); if (DeclType.Ptr.TypeQuals) T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals); break; case DeclaratorChunk::Reference: { // Verify that we're not building a reference to pointer to function with // exception specification. if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) { S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec); D.setInvalidType(true); // Build the type anyway. } T = S.BuildReferenceType(T, DeclType.Ref.LValueRef, DeclType.Loc, Name); if (DeclType.Ref.HasRestrict) T = S.BuildQualifiedType(T, DeclType.Loc, Qualifiers::Restrict); break; } case DeclaratorChunk::Array: { // Verify that we're not building an array of pointers to function with // exception specification. if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) { S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec); D.setInvalidType(true); // Build the type anyway. } DeclaratorChunk::ArrayTypeInfo &ATI = DeclType.Arr; Expr *ArraySize = static_cast(ATI.NumElts); ArrayType::ArraySizeModifier ASM; if (ATI.isStar) ASM = ArrayType::Star; else if (ATI.hasStatic) ASM = ArrayType::Static; else ASM = ArrayType::Normal; if (ASM == ArrayType::Star && !D.isPrototypeContext()) { // FIXME: This check isn't quite right: it allows star in prototypes // for function definitions, and disallows some edge cases detailed // in http://gcc.gnu.org/ml/gcc-patches/2009-02/msg00133.html S.Diag(DeclType.Loc, diag::err_array_star_outside_prototype); ASM = ArrayType::Normal; D.setInvalidType(true); } // C99 6.7.5.2p1: The optional type qualifiers and the keyword static // shall appear only in a declaration of a function parameter with an // array type, ... if (ASM == ArrayType::Static || ATI.TypeQuals) { if (!(D.isPrototypeContext() || D.getContext() == Declarator::KNRTypeListContext)) { S.Diag(DeclType.Loc, diag::err_array_static_outside_prototype) << (ASM == ArrayType::Static ? "'static'" : "type qualifier"); // Remove the 'static' and the type qualifiers. if (ASM == ArrayType::Static) ASM = ArrayType::Normal; ATI.TypeQuals = 0; D.setInvalidType(true); } // C99 6.7.5.2p1: ... and then only in the outermost array type // derivation. if (hasOuterPointerLikeChunk(D, chunkIndex)) { S.Diag(DeclType.Loc, diag::err_array_static_not_outermost) << (ASM == ArrayType::Static ? "'static'" : "type qualifier"); if (ASM == ArrayType::Static) ASM = ArrayType::Normal; ATI.TypeQuals = 0; D.setInvalidType(true); } } const AutoType *AT = T->getContainedAutoType(); // Allow arrays of auto if we are a generic lambda parameter. // i.e. [](auto (&array)[5]) { return array[0]; }; OK if (AT && D.getContext() != Declarator::LambdaExprParameterContext) { // We've already diagnosed this for decltype(auto). if (!AT->isDecltypeAuto()) S.Diag(DeclType.Loc, diag::err_illegal_decl_array_of_auto) << getPrintableNameForEntity(Name) << T; T = QualType(); break; } // Array parameters can be marked nullable as well, although it's not // necessary if they're marked 'static'. if (complainAboutMissingNullability == CAMN_Yes && !hasNullabilityAttr(DeclType.getAttrs()) && ASM != ArrayType::Static && D.isPrototypeContext() && !hasOuterPointerLikeChunk(D, chunkIndex)) { checkNullabilityConsistency(S, SimplePointerKind::Array, DeclType.Loc); } T = S.BuildArrayType(T, ASM, ArraySize, ATI.TypeQuals, SourceRange(DeclType.Loc, DeclType.EndLoc), Name); break; } case DeclaratorChunk::Function: { // If the function declarator has a prototype (i.e. it is not () and // does not have a K&R-style identifier list), then the arguments are part // of the type, otherwise the argument list is (). const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun; IsQualifiedFunction = FTI.TypeQuals || FTI.hasRefQualifier(); // Check for auto functions and trailing return type and adjust the // return type accordingly. if (!D.isInvalidType()) { // trailing-return-type is only required if we're declaring a function, // and not, for instance, a pointer to a function. if (D.getDeclSpec().containsPlaceholderType() && !FTI.hasTrailingReturnType() && chunkIndex == 0 && !S.getLangOpts().CPlusPlus14) { S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto ? diag::err_auto_missing_trailing_return : diag::err_deduced_return_type); T = Context.IntTy; D.setInvalidType(true); } else if (FTI.hasTrailingReturnType()) { // T must be exactly 'auto' at this point. See CWG issue 681. if (isa(T)) { S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), diag::err_trailing_return_in_parens) << T << D.getDeclSpec().getSourceRange(); D.setInvalidType(true); } else if (D.getContext() != Declarator::LambdaExprContext && (T.hasQualifiers() || !isa(T) || cast(T)->getKeyword() != AutoTypeKeyword::Auto)) { S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), diag::err_trailing_return_without_auto) << T << D.getDeclSpec().getSourceRange(); D.setInvalidType(true); } T = S.GetTypeFromParser(FTI.getTrailingReturnType(), &TInfo); if (T.isNull()) { // An error occurred parsing the trailing return type. T = Context.IntTy; D.setInvalidType(true); } } } // C99 6.7.5.3p1: The return type may not be a function or array type. // For conversion functions, we'll diagnose this particular error later. if ((T->isArrayType() || T->isFunctionType()) && (D.getName().getKind() != UnqualifiedId::IK_ConversionFunctionId)) { unsigned diagID = diag::err_func_returning_array_function; // Last processing chunk in block context means this function chunk // represents the block. if (chunkIndex == 0 && D.getContext() == Declarator::BlockLiteralContext) diagID = diag::err_block_returning_array_function; S.Diag(DeclType.Loc, diagID) << T->isFunctionType() << T; T = Context.IntTy; D.setInvalidType(true); } // Do not allow returning half FP value. // FIXME: This really should be in BuildFunctionType. if (T->isHalfType()) { if (S.getLangOpts().OpenCL) { if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16")) { S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return) << T << 0 /*pointer hint*/; D.setInvalidType(true); } } else if (!S.getLangOpts().HalfArgsAndReturns) { S.Diag(D.getIdentifierLoc(), diag::err_parameters_retval_cannot_have_fp16_type) << 1; D.setInvalidType(true); } } if (LangOpts.OpenCL) { // OpenCL v2.0 s6.12.5 - A block cannot be the return value of a // function. if (T->isBlockPointerType() || T->isImageType() || T->isSamplerT() || T->isPipeType()) { S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return) << T << 1 /*hint off*/; D.setInvalidType(true); } // OpenCL doesn't support variadic functions and blocks // (s6.9.e and s6.12.5 OpenCL v2.0) except for printf. // We also allow here any toolchain reserved identifiers. if (FTI.isVariadic && !(D.getIdentifier() && ((D.getIdentifier()->getName() == "printf" && LangOpts.OpenCLVersion >= 120) || D.getIdentifier()->getName().startswith("__")))) { S.Diag(D.getIdentifierLoc(), diag::err_opencl_variadic_function); D.setInvalidType(true); } } // Methods cannot return interface types. All ObjC objects are // passed by reference. if (T->isObjCObjectType()) { SourceLocation DiagLoc, FixitLoc; if (TInfo) { DiagLoc = TInfo->getTypeLoc().getLocStart(); FixitLoc = S.getLocForEndOfToken(TInfo->getTypeLoc().getLocEnd()); } else { DiagLoc = D.getDeclSpec().getTypeSpecTypeLoc(); FixitLoc = S.getLocForEndOfToken(D.getDeclSpec().getLocEnd()); } S.Diag(DiagLoc, diag::err_object_cannot_be_passed_returned_by_value) << 0 << T << FixItHint::CreateInsertion(FixitLoc, "*"); T = Context.getObjCObjectPointerType(T); if (TInfo) { TypeLocBuilder TLB; TLB.pushFullCopy(TInfo->getTypeLoc()); ObjCObjectPointerTypeLoc TLoc = TLB.push(T); TLoc.setStarLoc(FixitLoc); TInfo = TLB.getTypeSourceInfo(Context, T); } D.setInvalidType(true); } // cv-qualifiers on return types are pointless except when the type is a // class type in C++. if ((T.getCVRQualifiers() || T->isAtomicType()) && !(S.getLangOpts().CPlusPlus && (T->isDependentType() || T->isRecordType()))) { if (T->isVoidType() && !S.getLangOpts().CPlusPlus && D.getFunctionDefinitionKind() == FDK_Definition) { // [6.9.1/3] qualified void return is invalid on a C // function definition. Apparently ok on declarations and // in C++ though (!) S.Diag(DeclType.Loc, diag::err_func_returning_qualified_void) << T; } else diagnoseRedundantReturnTypeQualifiers(S, T, D, chunkIndex); } // Objective-C ARC ownership qualifiers are ignored on the function // return type (by type canonicalization). Complain if this attribute // was written here. if (T.getQualifiers().hasObjCLifetime()) { SourceLocation AttrLoc; if (chunkIndex + 1 < D.getNumTypeObjects()) { DeclaratorChunk ReturnTypeChunk = D.getTypeObject(chunkIndex + 1); for (const AttributeList *Attr = ReturnTypeChunk.getAttrs(); Attr; Attr = Attr->getNext()) { if (Attr->getKind() == AttributeList::AT_ObjCOwnership) { AttrLoc = Attr->getLoc(); break; } } } if (AttrLoc.isInvalid()) { for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList(); Attr; Attr = Attr->getNext()) { if (Attr->getKind() == AttributeList::AT_ObjCOwnership) { AttrLoc = Attr->getLoc(); break; } } } if (AttrLoc.isValid()) { // The ownership attributes are almost always written via // the predefined // __strong/__weak/__autoreleasing/__unsafe_unretained. if (AttrLoc.isMacroID()) AttrLoc = S.SourceMgr.getImmediateExpansionRange(AttrLoc).first; S.Diag(AttrLoc, diag::warn_arc_lifetime_result_type) << T.getQualifiers().getObjCLifetime(); } } if (LangOpts.CPlusPlus && D.getDeclSpec().hasTagDefinition()) { // C++ [dcl.fct]p6: // Types shall not be defined in return or parameter types. TagDecl *Tag = cast(D.getDeclSpec().getRepAsDecl()); S.Diag(Tag->getLocation(), diag::err_type_defined_in_result_type) << Context.getTypeDeclType(Tag); } // Exception specs are not allowed in typedefs. Complain, but add it // anyway. if (IsTypedefName && FTI.getExceptionSpecType() && !LangOpts.CPlusPlus1z) S.Diag(FTI.getExceptionSpecLocBeg(), diag::err_exception_spec_in_typedef) << (D.getContext() == Declarator::AliasDeclContext || D.getContext() == Declarator::AliasTemplateContext); // If we see "T var();" or "T var(T());" at block scope, it is probably // an attempt to initialize a variable, not a function declaration. if (FTI.isAmbiguous) warnAboutAmbiguousFunction(S, D, DeclType, T); // GNU warning -Wstrict-prototypes // Warn if a function declaration is without a prototype. // This warning is issued for all kinds of unprototyped function // declarations (i.e. function type typedef, function pointer etc.) // C99 6.7.5.3p14: // The empty list in a function declarator that is not part of a // definition of that function specifies that no information // about the number or types of the parameters is supplied. if (D.getFunctionDefinitionKind() == FDK_Declaration && FTI.NumParams == 0 && !LangOpts.CPlusPlus) S.Diag(DeclType.Loc, diag::warn_strict_prototypes) << 0 << FixItHint::CreateInsertion(FTI.getRParenLoc(), "void"); FunctionType::ExtInfo EI(getCCForDeclaratorChunk(S, D, FTI, chunkIndex)); if (!FTI.NumParams && !FTI.isVariadic && !LangOpts.CPlusPlus) { // Simple void foo(), where the incoming T is the result type. T = Context.getFunctionNoProtoType(T, EI); } else { // We allow a zero-parameter variadic function in C if the // function is marked with the "overloadable" attribute. Scan // for this attribute now. if (!FTI.NumParams && FTI.isVariadic && !LangOpts.CPlusPlus) { bool Overloadable = false; for (const AttributeList *Attrs = D.getAttributes(); Attrs; Attrs = Attrs->getNext()) { if (Attrs->getKind() == AttributeList::AT_Overloadable) { Overloadable = true; break; } } if (!Overloadable) S.Diag(FTI.getEllipsisLoc(), diag::err_ellipsis_first_param); } if (FTI.NumParams && FTI.Params[0].Param == nullptr) { // C99 6.7.5.3p3: Reject int(x,y,z) when it's not a function // definition. S.Diag(FTI.Params[0].IdentLoc, diag::err_ident_list_in_fn_declaration); D.setInvalidType(true); // Recover by creating a K&R-style function type. T = Context.getFunctionNoProtoType(T, EI); break; } FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = EI; EPI.Variadic = FTI.isVariadic; EPI.HasTrailingReturn = FTI.hasTrailingReturnType(); EPI.TypeQuals = FTI.TypeQuals; EPI.RefQualifier = !FTI.hasRefQualifier()? RQ_None : FTI.RefQualifierIsLValueRef? RQ_LValue : RQ_RValue; // Otherwise, we have a function with a parameter list that is // potentially variadic. SmallVector ParamTys; ParamTys.reserve(FTI.NumParams); SmallVector ExtParameterInfos(FTI.NumParams); bool HasAnyInterestingExtParameterInfos = false; for (unsigned i = 0, e = FTI.NumParams; i != e; ++i) { ParmVarDecl *Param = cast(FTI.Params[i].Param); QualType ParamTy = Param->getType(); assert(!ParamTy.isNull() && "Couldn't parse type?"); // Look for 'void'. void is allowed only as a single parameter to a // function with no other parameters (C99 6.7.5.3p10). We record // int(void) as a FunctionProtoType with an empty parameter list. if (ParamTy->isVoidType()) { // If this is something like 'float(int, void)', reject it. 'void' // is an incomplete type (C99 6.2.5p19) and function decls cannot // have parameters of incomplete type. if (FTI.NumParams != 1 || FTI.isVariadic) { S.Diag(DeclType.Loc, diag::err_void_only_param); ParamTy = Context.IntTy; Param->setType(ParamTy); } else if (FTI.Params[i].Ident) { // Reject, but continue to parse 'int(void abc)'. S.Diag(FTI.Params[i].IdentLoc, diag::err_param_with_void_type); ParamTy = Context.IntTy; Param->setType(ParamTy); } else { // Reject, but continue to parse 'float(const void)'. if (ParamTy.hasQualifiers()) S.Diag(DeclType.Loc, diag::err_void_param_qualified); // Do not add 'void' to the list. break; } } else if (ParamTy->isHalfType()) { // Disallow half FP parameters. // FIXME: This really should be in BuildFunctionType. if (S.getLangOpts().OpenCL) { if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16")) { S.Diag(Param->getLocation(), diag::err_opencl_half_param) << ParamTy; D.setInvalidType(); Param->setInvalidDecl(); } } else if (!S.getLangOpts().HalfArgsAndReturns) { S.Diag(Param->getLocation(), diag::err_parameters_retval_cannot_have_fp16_type) << 0; D.setInvalidType(); } } else if (!FTI.hasPrototype) { if (ParamTy->isPromotableIntegerType()) { ParamTy = Context.getPromotedIntegerType(ParamTy); Param->setKNRPromoted(true); } else if (const BuiltinType* BTy = ParamTy->getAs()) { if (BTy->getKind() == BuiltinType::Float) { ParamTy = Context.DoubleTy; Param->setKNRPromoted(true); } } } if (LangOpts.ObjCAutoRefCount && Param->hasAttr()) { ExtParameterInfos[i] = ExtParameterInfos[i].withIsConsumed(true); HasAnyInterestingExtParameterInfos = true; } if (auto attr = Param->getAttr()) { ExtParameterInfos[i] = ExtParameterInfos[i].withABI(attr->getABI()); HasAnyInterestingExtParameterInfos = true; } ParamTys.push_back(ParamTy); } if (HasAnyInterestingExtParameterInfos) { EPI.ExtParameterInfos = ExtParameterInfos.data(); checkExtParameterInfos(S, ParamTys, EPI, [&](unsigned i) { return FTI.Params[i].Param->getLocation(); }); } SmallVector Exceptions; SmallVector DynamicExceptions; SmallVector DynamicExceptionRanges; Expr *NoexceptExpr = nullptr; if (FTI.getExceptionSpecType() == EST_Dynamic) { // FIXME: It's rather inefficient to have to split into two vectors // here. unsigned N = FTI.getNumExceptions(); DynamicExceptions.reserve(N); DynamicExceptionRanges.reserve(N); for (unsigned I = 0; I != N; ++I) { DynamicExceptions.push_back(FTI.Exceptions[I].Ty); DynamicExceptionRanges.push_back(FTI.Exceptions[I].Range); } } else if (FTI.getExceptionSpecType() == EST_ComputedNoexcept) { NoexceptExpr = FTI.NoexceptExpr; } S.checkExceptionSpecification(D.isFunctionDeclarationContext(), FTI.getExceptionSpecType(), DynamicExceptions, DynamicExceptionRanges, NoexceptExpr, Exceptions, EPI.ExceptionSpec); T = Context.getFunctionType(T, ParamTys, EPI); } break; } case DeclaratorChunk::MemberPointer: { // The scope spec must refer to a class, or be dependent. CXXScopeSpec &SS = DeclType.Mem.Scope(); QualType ClsType; // Handle pointer nullability. inferPointerNullability(SimplePointerKind::MemberPointer, DeclType.Loc, DeclType.getAttrListRef()); if (SS.isInvalid()) { // Avoid emitting extra errors if we already errored on the scope. D.setInvalidType(true); } else if (S.isDependentScopeSpecifier(SS) || dyn_cast_or_null(S.computeDeclContext(SS))) { NestedNameSpecifier *NNS = SS.getScopeRep(); NestedNameSpecifier *NNSPrefix = NNS->getPrefix(); switch (NNS->getKind()) { case NestedNameSpecifier::Identifier: ClsType = Context.getDependentNameType(ETK_None, NNSPrefix, NNS->getAsIdentifier()); break; case NestedNameSpecifier::Namespace: case NestedNameSpecifier::NamespaceAlias: case NestedNameSpecifier::Global: case NestedNameSpecifier::Super: llvm_unreachable("Nested-name-specifier must name a type"); case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: ClsType = QualType(NNS->getAsType(), 0); // Note: if the NNS has a prefix and ClsType is a nondependent // TemplateSpecializationType, then the NNS prefix is NOT included // in ClsType; hence we wrap ClsType into an ElaboratedType. // NOTE: in particular, no wrap occurs if ClsType already is an // Elaborated, DependentName, or DependentTemplateSpecialization. if (NNSPrefix && isa(NNS->getAsType())) ClsType = Context.getElaboratedType(ETK_None, NNSPrefix, ClsType); break; } } else { S.Diag(DeclType.Mem.Scope().getBeginLoc(), diag::err_illegal_decl_mempointer_in_nonclass) << (D.getIdentifier() ? D.getIdentifier()->getName() : "type name") << DeclType.Mem.Scope().getRange(); D.setInvalidType(true); } if (!ClsType.isNull()) T = S.BuildMemberPointerType(T, ClsType, DeclType.Loc, D.getIdentifier()); if (T.isNull()) { T = Context.IntTy; D.setInvalidType(true); } else if (DeclType.Mem.TypeQuals) { T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Mem.TypeQuals); } break; } case DeclaratorChunk::Pipe: { T = S.BuildReadPipeType(T, DeclType.Loc); processTypeAttrs(state, T, TAL_DeclSpec, D.getDeclSpec().getAttributes().getList()); break; } } if (T.isNull()) { D.setInvalidType(true); T = Context.IntTy; } // See if there are any attributes on this declarator chunk. processTypeAttrs(state, T, TAL_DeclChunk, const_cast(DeclType.getAttrs())); } assert(!T.isNull() && "T must not be null after this point"); if (LangOpts.CPlusPlus && T->isFunctionType()) { const FunctionProtoType *FnTy = T->getAs(); assert(FnTy && "Why oh why is there not a FunctionProtoType here?"); // C++ 8.3.5p4: // A cv-qualifier-seq shall only be part of the function type // for a nonstatic member function, the function type to which a pointer // to member refers, or the top-level function type of a function typedef // declaration. // // Core issue 547 also allows cv-qualifiers on function types that are // top-level template type arguments. bool FreeFunction; if (!D.getCXXScopeSpec().isSet()) { FreeFunction = ((D.getContext() != Declarator::MemberContext && D.getContext() != Declarator::LambdaExprContext) || D.getDeclSpec().isFriendSpecified()); } else { DeclContext *DC = S.computeDeclContext(D.getCXXScopeSpec()); FreeFunction = (DC && !DC->isRecord()); } // C++11 [dcl.fct]p6 (w/DR1417): // An attempt to specify a function type with a cv-qualifier-seq or a // ref-qualifier (including by typedef-name) is ill-formed unless it is: // - the function type for a non-static member function, // - the function type to which a pointer to member refers, // - the top-level function type of a function typedef declaration or // alias-declaration, // - the type-id in the default argument of a type-parameter, or // - the type-id of a template-argument for a type-parameter // // FIXME: Checking this here is insufficient. We accept-invalid on: // // template struct S { void f(T); }; // S s; // // ... for instance. if (IsQualifiedFunction && !(!FreeFunction && D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static) && !IsTypedefName && D.getContext() != Declarator::TemplateTypeArgContext) { SourceLocation Loc = D.getLocStart(); SourceRange RemovalRange; unsigned I; if (D.isFunctionDeclarator(I)) { SmallVector RemovalLocs; const DeclaratorChunk &Chunk = D.getTypeObject(I); assert(Chunk.Kind == DeclaratorChunk::Function); if (Chunk.Fun.hasRefQualifier()) RemovalLocs.push_back(Chunk.Fun.getRefQualifierLoc()); if (Chunk.Fun.TypeQuals & Qualifiers::Const) RemovalLocs.push_back(Chunk.Fun.getConstQualifierLoc()); if (Chunk.Fun.TypeQuals & Qualifiers::Volatile) RemovalLocs.push_back(Chunk.Fun.getVolatileQualifierLoc()); if (Chunk.Fun.TypeQuals & Qualifiers::Restrict) RemovalLocs.push_back(Chunk.Fun.getRestrictQualifierLoc()); if (!RemovalLocs.empty()) { std::sort(RemovalLocs.begin(), RemovalLocs.end(), BeforeThanCompare(S.getSourceManager())); RemovalRange = SourceRange(RemovalLocs.front(), RemovalLocs.back()); Loc = RemovalLocs.front(); } } S.Diag(Loc, diag::err_invalid_qualified_function_type) << FreeFunction << D.isFunctionDeclarator() << T << getFunctionQualifiersAsString(FnTy) << FixItHint::CreateRemoval(RemovalRange); // Strip the cv-qualifiers and ref-qualifiers from the type. FunctionProtoType::ExtProtoInfo EPI = FnTy->getExtProtoInfo(); EPI.TypeQuals = 0; EPI.RefQualifier = RQ_None; T = Context.getFunctionType(FnTy->getReturnType(), FnTy->getParamTypes(), EPI); // Rebuild any parens around the identifier in the function type. for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { if (D.getTypeObject(i).Kind != DeclaratorChunk::Paren) break; T = S.BuildParenType(T); } } } // Apply any undistributed attributes from the declarator. processTypeAttrs(state, T, TAL_DeclName, D.getAttributes()); // Diagnose any ignored type attributes. state.diagnoseIgnoredTypeAttrs(T); // C++0x [dcl.constexpr]p9: // A constexpr specifier used in an object declaration declares the object // as const. if (D.getDeclSpec().isConstexprSpecified() && T->isObjectType()) { T.addConst(); } // If there was an ellipsis in the declarator, the declaration declares a // parameter pack whose type may be a pack expansion type. if (D.hasEllipsis()) { // C++0x [dcl.fct]p13: // A declarator-id or abstract-declarator containing an ellipsis shall // only be used in a parameter-declaration. Such a parameter-declaration // is a parameter pack (14.5.3). [...] switch (D.getContext()) { case Declarator::PrototypeContext: case Declarator::LambdaExprParameterContext: // C++0x [dcl.fct]p13: // [...] When it is part of a parameter-declaration-clause, the // parameter pack is a function parameter pack (14.5.3). The type T // of the declarator-id of the function parameter pack shall contain // a template parameter pack; each template parameter pack in T is // expanded by the function parameter pack. // // We represent function parameter packs as function parameters whose // type is a pack expansion. if (!T->containsUnexpandedParameterPack()) { S.Diag(D.getEllipsisLoc(), diag::err_function_parameter_pack_without_parameter_packs) << T << D.getSourceRange(); D.setEllipsisLoc(SourceLocation()); } else { T = Context.getPackExpansionType(T, None); } break; case Declarator::TemplateParamContext: // C++0x [temp.param]p15: // If a template-parameter is a [...] is a parameter-declaration that // declares a parameter pack (8.3.5), then the template-parameter is a // template parameter pack (14.5.3). // // Note: core issue 778 clarifies that, if there are any unexpanded // parameter packs in the type of the non-type template parameter, then // it expands those parameter packs. if (T->containsUnexpandedParameterPack()) T = Context.getPackExpansionType(T, None); else S.Diag(D.getEllipsisLoc(), LangOpts.CPlusPlus11 ? diag::warn_cxx98_compat_variadic_templates : diag::ext_variadic_templates); break; case Declarator::FileContext: case Declarator::KNRTypeListContext: case Declarator::ObjCParameterContext: // FIXME: special diagnostic here? case Declarator::ObjCResultContext: // FIXME: special diagnostic here? case Declarator::TypeNameContext: case Declarator::CXXNewContext: case Declarator::AliasDeclContext: case Declarator::AliasTemplateContext: case Declarator::MemberContext: case Declarator::BlockContext: case Declarator::ForContext: case Declarator::InitStmtContext: case Declarator::ConditionContext: case Declarator::CXXCatchContext: case Declarator::ObjCCatchContext: case Declarator::BlockLiteralContext: case Declarator::LambdaExprContext: case Declarator::ConversionIdContext: case Declarator::TrailingReturnContext: case Declarator::TemplateTypeArgContext: // FIXME: We may want to allow parameter packs in block-literal contexts // in the future. S.Diag(D.getEllipsisLoc(), diag::err_ellipsis_in_declarator_not_parameter); D.setEllipsisLoc(SourceLocation()); break; } } assert(!T.isNull() && "T must not be null at the end of this function"); if (D.isInvalidType()) return Context.getTrivialTypeSourceInfo(T); return S.GetTypeSourceInfoForDeclarator(D, T, TInfo); } /// GetTypeForDeclarator - Convert the type for the specified /// declarator to Type instances. /// /// The result of this call will never be null, but the associated /// type may be a null type if there's an unrecoverable error. TypeSourceInfo *Sema::GetTypeForDeclarator(Declarator &D, Scope *S) { // Determine the type of the declarator. Not all forms of declarator // have a type. TypeProcessingState state(*this, D); TypeSourceInfo *ReturnTypeInfo = nullptr; QualType T = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo); if (D.isPrototypeContext() && getLangOpts().ObjCAutoRefCount) inferARCWriteback(state, T); return GetFullTypeForDeclarator(state, T, ReturnTypeInfo); } static void transferARCOwnershipToDeclSpec(Sema &S, QualType &declSpecTy, Qualifiers::ObjCLifetime ownership) { if (declSpecTy->isObjCRetainableType() && declSpecTy.getObjCLifetime() == Qualifiers::OCL_None) { Qualifiers qs; qs.addObjCLifetime(ownership); declSpecTy = S.Context.getQualifiedType(declSpecTy, qs); } } static void transferARCOwnershipToDeclaratorChunk(TypeProcessingState &state, Qualifiers::ObjCLifetime ownership, unsigned chunkIndex) { Sema &S = state.getSema(); Declarator &D = state.getDeclarator(); // Look for an explicit lifetime attribute. DeclaratorChunk &chunk = D.getTypeObject(chunkIndex); for (const AttributeList *attr = chunk.getAttrs(); attr; attr = attr->getNext()) if (attr->getKind() == AttributeList::AT_ObjCOwnership) return; const char *attrStr = nullptr; switch (ownership) { case Qualifiers::OCL_None: llvm_unreachable("no ownership!"); case Qualifiers::OCL_ExplicitNone: attrStr = "none"; break; case Qualifiers::OCL_Strong: attrStr = "strong"; break; case Qualifiers::OCL_Weak: attrStr = "weak"; break; case Qualifiers::OCL_Autoreleasing: attrStr = "autoreleasing"; break; } IdentifierLoc *Arg = new (S.Context) IdentifierLoc; Arg->Ident = &S.Context.Idents.get(attrStr); Arg->Loc = SourceLocation(); ArgsUnion Args(Arg); // If there wasn't one, add one (with an invalid source location // so that we don't make an AttributedType for it). AttributeList *attr = D.getAttributePool() .create(&S.Context.Idents.get("objc_ownership"), SourceLocation(), /*scope*/ nullptr, SourceLocation(), /*args*/ &Args, 1, AttributeList::AS_GNU); spliceAttrIntoList(*attr, chunk.getAttrListRef()); // TODO: mark whether we did this inference? } /// \brief Used for transferring ownership in casts resulting in l-values. static void transferARCOwnership(TypeProcessingState &state, QualType &declSpecTy, Qualifiers::ObjCLifetime ownership) { Sema &S = state.getSema(); Declarator &D = state.getDeclarator(); int inner = -1; bool hasIndirection = false; for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { DeclaratorChunk &chunk = D.getTypeObject(i); switch (chunk.Kind) { case DeclaratorChunk::Paren: // Ignore parens. break; case DeclaratorChunk::Array: case DeclaratorChunk::Reference: case DeclaratorChunk::Pointer: if (inner != -1) hasIndirection = true; inner = i; break; case DeclaratorChunk::BlockPointer: if (inner != -1) transferARCOwnershipToDeclaratorChunk(state, ownership, i); return; case DeclaratorChunk::Function: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: return; } } if (inner == -1) return; DeclaratorChunk &chunk = D.getTypeObject(inner); if (chunk.Kind == DeclaratorChunk::Pointer) { if (declSpecTy->isObjCRetainableType()) return transferARCOwnershipToDeclSpec(S, declSpecTy, ownership); if (declSpecTy->isObjCObjectType() && hasIndirection) return transferARCOwnershipToDeclaratorChunk(state, ownership, inner); } else { assert(chunk.Kind == DeclaratorChunk::Array || chunk.Kind == DeclaratorChunk::Reference); return transferARCOwnershipToDeclSpec(S, declSpecTy, ownership); } } TypeSourceInfo *Sema::GetTypeForDeclaratorCast(Declarator &D, QualType FromTy) { TypeProcessingState state(*this, D); TypeSourceInfo *ReturnTypeInfo = nullptr; QualType declSpecTy = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo); if (getLangOpts().ObjC1) { Qualifiers::ObjCLifetime ownership = Context.getInnerObjCOwnership(FromTy); if (ownership != Qualifiers::OCL_None) transferARCOwnership(state, declSpecTy, ownership); } return GetFullTypeForDeclarator(state, declSpecTy, ReturnTypeInfo); } /// Map an AttributedType::Kind to an AttributeList::Kind. static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) { switch (kind) { case AttributedType::attr_address_space: return AttributeList::AT_AddressSpace; case AttributedType::attr_regparm: return AttributeList::AT_Regparm; case AttributedType::attr_vector_size: return AttributeList::AT_VectorSize; case AttributedType::attr_neon_vector_type: return AttributeList::AT_NeonVectorType; case AttributedType::attr_neon_polyvector_type: return AttributeList::AT_NeonPolyVectorType; case AttributedType::attr_objc_gc: return AttributeList::AT_ObjCGC; case AttributedType::attr_objc_ownership: case AttributedType::attr_objc_inert_unsafe_unretained: return AttributeList::AT_ObjCOwnership; case AttributedType::attr_noreturn: return AttributeList::AT_NoReturn; case AttributedType::attr_cdecl: return AttributeList::AT_CDecl; case AttributedType::attr_fastcall: return AttributeList::AT_FastCall; case AttributedType::attr_stdcall: return AttributeList::AT_StdCall; case AttributedType::attr_thiscall: return AttributeList::AT_ThisCall; case AttributedType::attr_regcall: return AttributeList::AT_RegCall; case AttributedType::attr_pascal: return AttributeList::AT_Pascal; case AttributedType::attr_swiftcall: return AttributeList::AT_SwiftCall; case AttributedType::attr_vectorcall: return AttributeList::AT_VectorCall; case AttributedType::attr_pcs: case AttributedType::attr_pcs_vfp: return AttributeList::AT_Pcs; case AttributedType::attr_inteloclbicc: return AttributeList::AT_IntelOclBicc; case AttributedType::attr_ms_abi: return AttributeList::AT_MSABI; case AttributedType::attr_sysv_abi: return AttributeList::AT_SysVABI; case AttributedType::attr_preserve_most: return AttributeList::AT_PreserveMost; case AttributedType::attr_preserve_all: return AttributeList::AT_PreserveAll; case AttributedType::attr_ptr32: return AttributeList::AT_Ptr32; case AttributedType::attr_ptr64: return AttributeList::AT_Ptr64; case AttributedType::attr_sptr: return AttributeList::AT_SPtr; case AttributedType::attr_uptr: return AttributeList::AT_UPtr; case AttributedType::attr_nonnull: return AttributeList::AT_TypeNonNull; case AttributedType::attr_nullable: return AttributeList::AT_TypeNullable; case AttributedType::attr_null_unspecified: return AttributeList::AT_TypeNullUnspecified; case AttributedType::attr_objc_kindof: return AttributeList::AT_ObjCKindOf; } llvm_unreachable("unexpected attribute kind!"); } static void fillAttributedTypeLoc(AttributedTypeLoc TL, const AttributeList *attrs, const AttributeList *DeclAttrs = nullptr) { // DeclAttrs and attrs cannot be both empty. assert((attrs || DeclAttrs) && "no type attributes in the expected location!"); AttributeList::Kind parsedKind = getAttrListKind(TL.getAttrKind()); // Try to search for an attribute of matching kind in attrs list. while (attrs && attrs->getKind() != parsedKind) attrs = attrs->getNext(); if (!attrs) { // No matching type attribute in attrs list found. // Try searching through C++11 attributes in the declarator attribute list. while (DeclAttrs && (!DeclAttrs->isCXX11Attribute() || DeclAttrs->getKind() != parsedKind)) DeclAttrs = DeclAttrs->getNext(); attrs = DeclAttrs; } assert(attrs && "no matching type attribute in expected location!"); TL.setAttrNameLoc(attrs->getLoc()); if (TL.hasAttrExprOperand()) { assert(attrs->isArgExpr(0) && "mismatched attribute operand kind"); TL.setAttrExprOperand(attrs->getArgAsExpr(0)); } else if (TL.hasAttrEnumOperand()) { assert((attrs->isArgIdent(0) || attrs->isArgExpr(0)) && "unexpected attribute operand kind"); if (attrs->isArgIdent(0)) TL.setAttrEnumOperandLoc(attrs->getArgAsIdent(0)->Loc); else TL.setAttrEnumOperandLoc(attrs->getArgAsExpr(0)->getExprLoc()); } // FIXME: preserve this information to here. if (TL.hasAttrOperand()) TL.setAttrOperandParensRange(SourceRange()); } namespace { class TypeSpecLocFiller : public TypeLocVisitor { ASTContext &Context; const DeclSpec &DS; public: TypeSpecLocFiller(ASTContext &Context, const DeclSpec &DS) : Context(Context), DS(DS) {} void VisitAttributedTypeLoc(AttributedTypeLoc TL) { fillAttributedTypeLoc(TL, DS.getAttributes().getList()); Visit(TL.getModifiedLoc()); } void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) { Visit(TL.getUnqualifiedLoc()); } void VisitTypedefTypeLoc(TypedefTypeLoc TL) { TL.setNameLoc(DS.getTypeSpecTypeLoc()); } void VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) { TL.setNameLoc(DS.getTypeSpecTypeLoc()); // FIXME. We should have DS.getTypeSpecTypeEndLoc(). But, it requires // addition field. What we have is good enough for dispay of location // of 'fixit' on interface name. TL.setNameEndLoc(DS.getLocEnd()); } void VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) { TypeSourceInfo *RepTInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &RepTInfo); TL.copy(RepTInfo->getTypeLoc()); } void VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) { TypeSourceInfo *RepTInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &RepTInfo); TL.copy(RepTInfo->getTypeLoc()); } void VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) { TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); // If we got no declarator info from previous Sema routines, // just fill with the typespec loc. if (!TInfo) { TL.initialize(Context, DS.getTypeSpecTypeNameLoc()); return; } TypeLoc OldTL = TInfo->getTypeLoc(); if (TInfo->getType()->getAs()) { ElaboratedTypeLoc ElabTL = OldTL.castAs(); TemplateSpecializationTypeLoc NamedTL = ElabTL.getNamedTypeLoc() .castAs(); TL.copy(NamedTL); } else { TL.copy(OldTL.castAs()); assert(TL.getRAngleLoc() == OldTL.castAs().getRAngleLoc()); } } void VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) { assert(DS.getTypeSpecType() == DeclSpec::TST_typeofExpr); TL.setTypeofLoc(DS.getTypeSpecTypeLoc()); TL.setParensRange(DS.getTypeofParensRange()); } void VisitTypeOfTypeLoc(TypeOfTypeLoc TL) { assert(DS.getTypeSpecType() == DeclSpec::TST_typeofType); TL.setTypeofLoc(DS.getTypeSpecTypeLoc()); TL.setParensRange(DS.getTypeofParensRange()); assert(DS.getRepAsType()); TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); TL.setUnderlyingTInfo(TInfo); } void VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) { // FIXME: This holds only because we only have one unary transform. assert(DS.getTypeSpecType() == DeclSpec::TST_underlyingType); TL.setKWLoc(DS.getTypeSpecTypeLoc()); TL.setParensRange(DS.getTypeofParensRange()); assert(DS.getRepAsType()); TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); TL.setUnderlyingTInfo(TInfo); } void VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { // By default, use the source location of the type specifier. TL.setBuiltinLoc(DS.getTypeSpecTypeLoc()); if (TL.needsExtraLocalData()) { // Set info for the written builtin specifiers. TL.getWrittenBuiltinSpecs() = DS.getWrittenBuiltinSpecs(); // Try to have a meaningful source location. if (TL.getWrittenSignSpec() != TSS_unspecified) TL.expandBuiltinRange(DS.getTypeSpecSignLoc()); if (TL.getWrittenWidthSpec() != TSW_unspecified) TL.expandBuiltinRange(DS.getTypeSpecWidthRange()); } } void VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) { ElaboratedTypeKeyword Keyword = TypeWithKeyword::getKeywordForTypeSpec(DS.getTypeSpecType()); if (DS.getTypeSpecType() == TST_typename) { TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); if (TInfo) { TL.copy(TInfo->getTypeLoc().castAs()); return; } } TL.setElaboratedKeywordLoc(Keyword != ETK_None ? DS.getTypeSpecTypeLoc() : SourceLocation()); const CXXScopeSpec& SS = DS.getTypeSpecScope(); TL.setQualifierLoc(SS.getWithLocInContext(Context)); Visit(TL.getNextTypeLoc().getUnqualifiedLoc()); } void VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { assert(DS.getTypeSpecType() == TST_typename); TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); assert(TInfo); TL.copy(TInfo->getTypeLoc().castAs()); } void VisitDependentTemplateSpecializationTypeLoc( DependentTemplateSpecializationTypeLoc TL) { assert(DS.getTypeSpecType() == TST_typename); TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); assert(TInfo); TL.copy( TInfo->getTypeLoc().castAs()); } void VisitTagTypeLoc(TagTypeLoc TL) { TL.setNameLoc(DS.getTypeSpecTypeNameLoc()); } void VisitAtomicTypeLoc(AtomicTypeLoc TL) { // An AtomicTypeLoc can come from either an _Atomic(...) type specifier // or an _Atomic qualifier. if (DS.getTypeSpecType() == DeclSpec::TST_atomic) { TL.setKWLoc(DS.getTypeSpecTypeLoc()); TL.setParensRange(DS.getTypeofParensRange()); TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); assert(TInfo); TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc()); } else { TL.setKWLoc(DS.getAtomicSpecLoc()); // No parens, to indicate this was spelled as an _Atomic qualifier. TL.setParensRange(SourceRange()); Visit(TL.getValueLoc()); } } void VisitPipeTypeLoc(PipeTypeLoc TL) { TL.setKWLoc(DS.getTypeSpecTypeLoc()); TypeSourceInfo *TInfo = nullptr; Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo); TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc()); } void VisitTypeLoc(TypeLoc TL) { // FIXME: add other typespec types and change this to an assert. TL.initialize(Context, DS.getTypeSpecTypeLoc()); } }; class DeclaratorLocFiller : public TypeLocVisitor { ASTContext &Context; const DeclaratorChunk &Chunk; public: DeclaratorLocFiller(ASTContext &Context, const DeclaratorChunk &Chunk) : Context(Context), Chunk(Chunk) {} void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) { llvm_unreachable("qualified type locs not expected here!"); } void VisitDecayedTypeLoc(DecayedTypeLoc TL) { llvm_unreachable("decayed type locs not expected here!"); } void VisitAttributedTypeLoc(AttributedTypeLoc TL) { fillAttributedTypeLoc(TL, Chunk.getAttrs()); } void VisitAdjustedTypeLoc(AdjustedTypeLoc TL) { // nothing } void VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::BlockPointer); TL.setCaretLoc(Chunk.Loc); } void VisitPointerTypeLoc(PointerTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Pointer); TL.setStarLoc(Chunk.Loc); } void VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Pointer); TL.setStarLoc(Chunk.Loc); } void VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::MemberPointer); const CXXScopeSpec& SS = Chunk.Mem.Scope(); NestedNameSpecifierLoc NNSLoc = SS.getWithLocInContext(Context); const Type* ClsTy = TL.getClass(); QualType ClsQT = QualType(ClsTy, 0); TypeSourceInfo *ClsTInfo = Context.CreateTypeSourceInfo(ClsQT, 0); // Now copy source location info into the type loc component. TypeLoc ClsTL = ClsTInfo->getTypeLoc(); switch (NNSLoc.getNestedNameSpecifier()->getKind()) { case NestedNameSpecifier::Identifier: assert(isa(ClsTy) && "Unexpected TypeLoc"); { DependentNameTypeLoc DNTLoc = ClsTL.castAs(); DNTLoc.setElaboratedKeywordLoc(SourceLocation()); DNTLoc.setQualifierLoc(NNSLoc.getPrefix()); DNTLoc.setNameLoc(NNSLoc.getLocalBeginLoc()); } break; case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: if (isa(ClsTy)) { ElaboratedTypeLoc ETLoc = ClsTL.castAs(); ETLoc.setElaboratedKeywordLoc(SourceLocation()); ETLoc.setQualifierLoc(NNSLoc.getPrefix()); TypeLoc NamedTL = ETLoc.getNamedTypeLoc(); NamedTL.initializeFullCopy(NNSLoc.getTypeLoc()); } else { ClsTL.initializeFullCopy(NNSLoc.getTypeLoc()); } break; case NestedNameSpecifier::Namespace: case NestedNameSpecifier::NamespaceAlias: case NestedNameSpecifier::Global: case NestedNameSpecifier::Super: llvm_unreachable("Nested-name-specifier must name a type"); } // Finally fill in MemberPointerLocInfo fields. TL.setStarLoc(Chunk.Loc); TL.setClassTInfo(ClsTInfo); } void VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Reference); // 'Amp' is misleading: this might have been originally /// spelled with AmpAmp. TL.setAmpLoc(Chunk.Loc); } void VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Reference); assert(!Chunk.Ref.LValueRef); TL.setAmpAmpLoc(Chunk.Loc); } void VisitArrayTypeLoc(ArrayTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Array); TL.setLBracketLoc(Chunk.Loc); TL.setRBracketLoc(Chunk.EndLoc); TL.setSizeExpr(static_cast(Chunk.Arr.NumElts)); } void VisitFunctionTypeLoc(FunctionTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Function); TL.setLocalRangeBegin(Chunk.Loc); TL.setLocalRangeEnd(Chunk.EndLoc); const DeclaratorChunk::FunctionTypeInfo &FTI = Chunk.Fun; TL.setLParenLoc(FTI.getLParenLoc()); TL.setRParenLoc(FTI.getRParenLoc()); for (unsigned i = 0, e = TL.getNumParams(), tpi = 0; i != e; ++i) { ParmVarDecl *Param = cast(FTI.Params[i].Param); TL.setParam(tpi++, Param); } TL.setExceptionSpecRange(FTI.getExceptionSpecRange()); } void VisitParenTypeLoc(ParenTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Paren); TL.setLParenLoc(Chunk.Loc); TL.setRParenLoc(Chunk.EndLoc); } void VisitPipeTypeLoc(PipeTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Pipe); TL.setKWLoc(Chunk.Loc); } void VisitTypeLoc(TypeLoc TL) { llvm_unreachable("unsupported TypeLoc kind in declarator!"); } }; } // end anonymous namespace static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) { SourceLocation Loc; switch (Chunk.Kind) { case DeclaratorChunk::Function: case DeclaratorChunk::Array: case DeclaratorChunk::Paren: case DeclaratorChunk::Pipe: llvm_unreachable("cannot be _Atomic qualified"); case DeclaratorChunk::Pointer: Loc = SourceLocation::getFromRawEncoding(Chunk.Ptr.AtomicQualLoc); break; case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: // FIXME: Provide a source location for the _Atomic keyword. break; } ATL.setKWLoc(Loc); ATL.setParensRange(SourceRange()); } /// \brief Create and instantiate a TypeSourceInfo with type source information. /// /// \param T QualType referring to the type as written in source code. /// /// \param ReturnTypeInfo For declarators whose return type does not show /// up in the normal place in the declaration specifiers (such as a C++ /// conversion function), this pointer will refer to a type source information /// for that return type. TypeSourceInfo * Sema::GetTypeSourceInfoForDeclarator(Declarator &D, QualType T, TypeSourceInfo *ReturnTypeInfo) { TypeSourceInfo *TInfo = Context.CreateTypeSourceInfo(T); UnqualTypeLoc CurrTL = TInfo->getTypeLoc().getUnqualifiedLoc(); const AttributeList *DeclAttrs = D.getAttributes(); // Handle parameter packs whose type is a pack expansion. if (isa(T)) { CurrTL.castAs().setEllipsisLoc(D.getEllipsisLoc()); CurrTL = CurrTL.getNextTypeLoc().getUnqualifiedLoc(); } for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { // An AtomicTypeLoc might be produced by an atomic qualifier in this // declarator chunk. if (AtomicTypeLoc ATL = CurrTL.getAs()) { fillAtomicQualLoc(ATL, D.getTypeObject(i)); CurrTL = ATL.getValueLoc().getUnqualifiedLoc(); } while (AttributedTypeLoc TL = CurrTL.getAs()) { fillAttributedTypeLoc(TL, D.getTypeObject(i).getAttrs(), DeclAttrs); CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc(); } // FIXME: Ordering here? while (AdjustedTypeLoc TL = CurrTL.getAs()) CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc(); DeclaratorLocFiller(Context, D.getTypeObject(i)).Visit(CurrTL); CurrTL = CurrTL.getNextTypeLoc().getUnqualifiedLoc(); } // If we have different source information for the return type, use // that. This really only applies to C++ conversion functions. if (ReturnTypeInfo) { TypeLoc TL = ReturnTypeInfo->getTypeLoc(); assert(TL.getFullDataSize() == CurrTL.getFullDataSize()); memcpy(CurrTL.getOpaqueData(), TL.getOpaqueData(), TL.getFullDataSize()); } else { TypeSpecLocFiller(Context, D.getDeclSpec()).Visit(CurrTL); } return TInfo; } /// \brief Create a LocInfoType to hold the given QualType and TypeSourceInfo. ParsedType Sema::CreateParsedType(QualType T, TypeSourceInfo *TInfo) { // FIXME: LocInfoTypes are "transient", only needed for passing to/from Parser // and Sema during declaration parsing. Try deallocating/caching them when // it's appropriate, instead of allocating them and keeping them around. LocInfoType *LocT = (LocInfoType*)BumpAlloc.Allocate(sizeof(LocInfoType), TypeAlignment); new (LocT) LocInfoType(T, TInfo); assert(LocT->getTypeClass() != T->getTypeClass() && "LocInfoType's TypeClass conflicts with an existing Type class"); return ParsedType::make(QualType(LocT, 0)); } void LocInfoType::getAsStringInternal(std::string &Str, const PrintingPolicy &Policy) const { llvm_unreachable("LocInfoType leaked into the type system; an opaque TypeTy*" " was used directly instead of getting the QualType through" " GetTypeFromParser"); } TypeResult Sema::ActOnTypeName(Scope *S, Declarator &D) { // C99 6.7.6: Type names have no identifier. This is already validated by // the parser. assert(D.getIdentifier() == nullptr && "Type name should have no identifier!"); TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S); QualType T = TInfo->getType(); if (D.isInvalidType()) return true; // Make sure there are no unused decl attributes on the declarator. // We don't want to do this for ObjC parameters because we're going // to apply them to the actual parameter declaration. // Likewise, we don't want to do this for alias declarations, because // we are actually going to build a declaration from this eventually. if (D.getContext() != Declarator::ObjCParameterContext && D.getContext() != Declarator::AliasDeclContext && D.getContext() != Declarator::AliasTemplateContext) checkUnusedDeclAttributes(D); if (getLangOpts().CPlusPlus) { // Check that there are no default arguments (C++ only). CheckExtraCXXDefaultArguments(D); } return CreateParsedType(T, TInfo); } ParsedType Sema::ActOnObjCInstanceType(SourceLocation Loc) { QualType T = Context.getObjCInstanceType(); TypeSourceInfo *TInfo = Context.getTrivialTypeSourceInfo(T, Loc); return CreateParsedType(T, TInfo); } //===----------------------------------------------------------------------===// // Type Attribute Processing //===----------------------------------------------------------------------===// /// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the /// specified type. The attribute contains 1 argument, the id of the address /// space for the type. static void HandleAddressSpaceTypeAttribute(QualType &Type, const AttributeList &Attr, Sema &S){ // If this type is already address space qualified, reject it. // ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "No type shall be qualified by // qualifiers for two or more different address spaces." if (Type.getAddressSpace()) { S.Diag(Attr.getLoc(), diag::err_attribute_address_multiple_qualifiers); Attr.setInvalid(); return; } // ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "A function type shall not be // qualified by an address-space qualifier." if (Type->isFunctionType()) { S.Diag(Attr.getLoc(), diag::err_attribute_address_function_type); Attr.setInvalid(); return; } unsigned ASIdx; if (Attr.getKind() == AttributeList::AT_AddressSpace) { // Check the attribute arguments. if (Attr.getNumArgs() != 1) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr.getName() << 1; Attr.setInvalid(); return; } Expr *ASArgExpr = static_cast(Attr.getArgAsExpr(0)); llvm::APSInt addrSpace(32); if (ASArgExpr->isTypeDependent() || ASArgExpr->isValueDependent() || !ASArgExpr->isIntegerConstantExpr(addrSpace, S.Context)) { S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName() << AANT_ArgumentIntegerConstant << ASArgExpr->getSourceRange(); Attr.setInvalid(); return; } // Bounds checking. if (addrSpace.isSigned()) { if (addrSpace.isNegative()) { S.Diag(Attr.getLoc(), diag::err_attribute_address_space_negative) << ASArgExpr->getSourceRange(); Attr.setInvalid(); return; } addrSpace.setIsSigned(false); } llvm::APSInt max(addrSpace.getBitWidth()); max = Qualifiers::MaxAddressSpace; if (addrSpace > max) { S.Diag(Attr.getLoc(), diag::err_attribute_address_space_too_high) << int(Qualifiers::MaxAddressSpace) << ASArgExpr->getSourceRange(); Attr.setInvalid(); return; } ASIdx = static_cast(addrSpace.getZExtValue()); } else { // The keyword-based type attributes imply which address space to use. switch (Attr.getKind()) { case AttributeList::AT_OpenCLGlobalAddressSpace: ASIdx = LangAS::opencl_global; break; case AttributeList::AT_OpenCLLocalAddressSpace: ASIdx = LangAS::opencl_local; break; case AttributeList::AT_OpenCLConstantAddressSpace: ASIdx = LangAS::opencl_constant; break; case AttributeList::AT_OpenCLGenericAddressSpace: ASIdx = LangAS::opencl_generic; break; default: assert(Attr.getKind() == AttributeList::AT_OpenCLPrivateAddressSpace); ASIdx = 0; break; } } Type = S.Context.getAddrSpaceQualType(Type, ASIdx); } /// Does this type have a "direct" ownership qualifier? That is, /// is it written like "__strong id", as opposed to something like /// "typeof(foo)", where that happens to be strong? static bool hasDirectOwnershipQualifier(QualType type) { // Fast path: no qualifier at all. assert(type.getQualifiers().hasObjCLifetime()); while (true) { // __strong id if (const AttributedType *attr = dyn_cast(type)) { if (attr->getAttrKind() == AttributedType::attr_objc_ownership) return true; type = attr->getModifiedType(); // X *__strong (...) } else if (const ParenType *paren = dyn_cast(type)) { type = paren->getInnerType(); // That's it for things we want to complain about. In particular, // we do not want to look through typedefs, typeof(expr), // typeof(type), or any other way that the type is somehow // abstracted. } else { return false; } } } /// handleObjCOwnershipTypeAttr - Process an objc_ownership /// attribute on the specified type. /// /// Returns 'true' if the attribute was handled. static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type) { bool NonObjCPointer = false; if (!type->isDependentType() && !type->isUndeducedType()) { if (const PointerType *ptr = type->getAs()) { QualType pointee = ptr->getPointeeType(); if (pointee->isObjCRetainableType() || pointee->isPointerType()) return false; // It is important not to lose the source info that there was an attribute // applied to non-objc pointer. We will create an attributed type but // its type will be the same as the original type. NonObjCPointer = true; } else if (!type->isObjCRetainableType()) { return false; } // Don't accept an ownership attribute in the declspec if it would // just be the return type of a block pointer. if (state.isProcessingDeclSpec()) { Declarator &D = state.getDeclarator(); if (maybeMovePastReturnType(D, D.getNumTypeObjects(), /*onlyBlockPointers=*/true)) return false; } } Sema &S = state.getSema(); SourceLocation AttrLoc = attr.getLoc(); if (AttrLoc.isMacroID()) AttrLoc = S.getSourceManager().getImmediateExpansionRange(AttrLoc).first; if (!attr.isArgIdent(0)) { S.Diag(AttrLoc, diag::err_attribute_argument_type) << attr.getName() << AANT_ArgumentString; attr.setInvalid(); return true; } IdentifierInfo *II = attr.getArgAsIdent(0)->Ident; Qualifiers::ObjCLifetime lifetime; if (II->isStr("none")) lifetime = Qualifiers::OCL_ExplicitNone; else if (II->isStr("strong")) lifetime = Qualifiers::OCL_Strong; else if (II->isStr("weak")) lifetime = Qualifiers::OCL_Weak; else if (II->isStr("autoreleasing")) lifetime = Qualifiers::OCL_Autoreleasing; else { S.Diag(AttrLoc, diag::warn_attribute_type_not_supported) << attr.getName() << II; attr.setInvalid(); return true; } // Just ignore lifetime attributes other than __weak and __unsafe_unretained // outside of ARC mode. if (!S.getLangOpts().ObjCAutoRefCount && lifetime != Qualifiers::OCL_Weak && lifetime != Qualifiers::OCL_ExplicitNone) { return true; } SplitQualType underlyingType = type.split(); // Check for redundant/conflicting ownership qualifiers. if (Qualifiers::ObjCLifetime previousLifetime = type.getQualifiers().getObjCLifetime()) { // If it's written directly, that's an error. if (hasDirectOwnershipQualifier(type)) { S.Diag(AttrLoc, diag::err_attr_objc_ownership_redundant) << type; return true; } // Otherwise, if the qualifiers actually conflict, pull sugar off // and remove the ObjCLifetime qualifiers. if (previousLifetime != lifetime) { // It's possible to have multiple local ObjCLifetime qualifiers. We // can't stop after we reach a type that is directly qualified. const Type *prevTy = nullptr; while (!prevTy || prevTy != underlyingType.Ty) { prevTy = underlyingType.Ty; underlyingType = underlyingType.getSingleStepDesugaredType(); } underlyingType.Quals.removeObjCLifetime(); } } underlyingType.Quals.addObjCLifetime(lifetime); if (NonObjCPointer) { StringRef name = attr.getName()->getName(); switch (lifetime) { case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: break; case Qualifiers::OCL_Strong: name = "__strong"; break; case Qualifiers::OCL_Weak: name = "__weak"; break; case Qualifiers::OCL_Autoreleasing: name = "__autoreleasing"; break; } S.Diag(AttrLoc, diag::warn_type_attribute_wrong_type) << name << TDS_ObjCObjOrBlock << type; } // Don't actually add the __unsafe_unretained qualifier in non-ARC files, // because having both 'T' and '__unsafe_unretained T' exist in the type // system causes unfortunate widespread consistency problems. (For example, // they're not considered compatible types, and we mangle them identicially // as template arguments.) These problems are all individually fixable, // but it's easier to just not add the qualifier and instead sniff it out // in specific places using isObjCInertUnsafeUnretainedType(). // // Doing this does means we miss some trivial consistency checks that // would've triggered in ARC, but that's better than trying to solve all // the coexistence problems with __unsafe_unretained. if (!S.getLangOpts().ObjCAutoRefCount && lifetime == Qualifiers::OCL_ExplicitNone) { type = S.Context.getAttributedType( AttributedType::attr_objc_inert_unsafe_unretained, type, type); return true; } QualType origType = type; if (!NonObjCPointer) type = S.Context.getQualifiedType(underlyingType); // If we have a valid source location for the attribute, use an // AttributedType instead. if (AttrLoc.isValid()) type = S.Context.getAttributedType(AttributedType::attr_objc_ownership, origType, type); auto diagnoseOrDelay = [](Sema &S, SourceLocation loc, unsigned diagnostic, QualType type) { if (S.DelayedDiagnostics.shouldDelayDiagnostics()) { S.DelayedDiagnostics.add( sema::DelayedDiagnostic::makeForbiddenType( S.getSourceManager().getExpansionLoc(loc), diagnostic, type, /*ignored*/ 0)); } else { S.Diag(loc, diagnostic); } }; // Sometimes, __weak isn't allowed. if (lifetime == Qualifiers::OCL_Weak && !S.getLangOpts().ObjCWeak && !NonObjCPointer) { // Use a specialized diagnostic if the runtime just doesn't support them. unsigned diagnostic = (S.getLangOpts().ObjCWeakRuntime ? diag::err_arc_weak_disabled : diag::err_arc_weak_no_runtime); // In any case, delay the diagnostic until we know what we're parsing. diagnoseOrDelay(S, AttrLoc, diagnostic, type); attr.setInvalid(); return true; } // Forbid __weak for class objects marked as // objc_arc_weak_reference_unavailable if (lifetime == Qualifiers::OCL_Weak) { if (const ObjCObjectPointerType *ObjT = type->getAs()) { if (ObjCInterfaceDecl *Class = ObjT->getInterfaceDecl()) { if (Class->isArcWeakrefUnavailable()) { S.Diag(AttrLoc, diag::err_arc_unsupported_weak_class); S.Diag(ObjT->getInterfaceDecl()->getLocation(), diag::note_class_declared); } } } } return true; } /// handleObjCGCTypeAttr - Process the __attribute__((objc_gc)) type /// attribute on the specified type. Returns true to indicate that /// the attribute was handled, false to indicate that the type does /// not permit the attribute. static bool handleObjCGCTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type) { Sema &S = state.getSema(); // Delay if this isn't some kind of pointer. if (!type->isPointerType() && !type->isObjCObjectPointerType() && !type->isBlockPointerType()) return false; if (type.getObjCGCAttr() != Qualifiers::GCNone) { S.Diag(attr.getLoc(), diag::err_attribute_multiple_objc_gc); attr.setInvalid(); return true; } // Check the attribute arguments. if (!attr.isArgIdent(0)) { S.Diag(attr.getLoc(), diag::err_attribute_argument_type) << attr.getName() << AANT_ArgumentString; attr.setInvalid(); return true; } Qualifiers::GC GCAttr; if (attr.getNumArgs() > 1) { S.Diag(attr.getLoc(), diag::err_attribute_wrong_number_arguments) << attr.getName() << 1; attr.setInvalid(); return true; } IdentifierInfo *II = attr.getArgAsIdent(0)->Ident; if (II->isStr("weak")) GCAttr = Qualifiers::Weak; else if (II->isStr("strong")) GCAttr = Qualifiers::Strong; else { S.Diag(attr.getLoc(), diag::warn_attribute_type_not_supported) << attr.getName() << II; attr.setInvalid(); return true; } QualType origType = type; type = S.Context.getObjCGCQualType(origType, GCAttr); // Make an attributed type to preserve the source information. if (attr.getLoc().isValid()) type = S.Context.getAttributedType(AttributedType::attr_objc_gc, origType, type); return true; } namespace { /// A helper class to unwrap a type down to a function for the /// purposes of applying attributes there. /// /// Use: /// FunctionTypeUnwrapper unwrapped(SemaRef, T); /// if (unwrapped.isFunctionType()) { /// const FunctionType *fn = unwrapped.get(); /// // change fn somehow /// T = unwrapped.wrap(fn); /// } struct FunctionTypeUnwrapper { enum WrapKind { Desugar, Attributed, Parens, Pointer, BlockPointer, Reference, MemberPointer }; QualType Original; const FunctionType *Fn; SmallVector Stack; FunctionTypeUnwrapper(Sema &S, QualType T) : Original(T) { while (true) { const Type *Ty = T.getTypePtr(); if (isa(Ty)) { Fn = cast(Ty); return; } else if (isa(Ty)) { T = cast(Ty)->getInnerType(); Stack.push_back(Parens); } else if (isa(Ty)) { T = cast(Ty)->getPointeeType(); Stack.push_back(Pointer); } else if (isa(Ty)) { T = cast(Ty)->getPointeeType(); Stack.push_back(BlockPointer); } else if (isa(Ty)) { T = cast(Ty)->getPointeeType(); Stack.push_back(MemberPointer); } else if (isa(Ty)) { T = cast(Ty)->getPointeeType(); Stack.push_back(Reference); } else if (isa(Ty)) { T = cast(Ty)->getEquivalentType(); Stack.push_back(Attributed); } else { const Type *DTy = Ty->getUnqualifiedDesugaredType(); if (Ty == DTy) { Fn = nullptr; return; } T = QualType(DTy, 0); Stack.push_back(Desugar); } } } bool isFunctionType() const { return (Fn != nullptr); } const FunctionType *get() const { return Fn; } QualType wrap(Sema &S, const FunctionType *New) { // If T wasn't modified from the unwrapped type, do nothing. if (New == get()) return Original; Fn = New; return wrap(S.Context, Original, 0); } private: QualType wrap(ASTContext &C, QualType Old, unsigned I) { if (I == Stack.size()) return C.getQualifiedType(Fn, Old.getQualifiers()); // Build up the inner type, applying the qualifiers from the old // type to the new type. SplitQualType SplitOld = Old.split(); // As a special case, tail-recurse if there are no qualifiers. if (SplitOld.Quals.empty()) return wrap(C, SplitOld.Ty, I); return C.getQualifiedType(wrap(C, SplitOld.Ty, I), SplitOld.Quals); } QualType wrap(ASTContext &C, const Type *Old, unsigned I) { if (I == Stack.size()) return QualType(Fn, 0); switch (static_cast(Stack[I++])) { case Desugar: // This is the point at which we potentially lose source // information. return wrap(C, Old->getUnqualifiedDesugaredType(), I); case Attributed: return wrap(C, cast(Old)->getEquivalentType(), I); case Parens: { QualType New = wrap(C, cast(Old)->getInnerType(), I); return C.getParenType(New); } case Pointer: { QualType New = wrap(C, cast(Old)->getPointeeType(), I); return C.getPointerType(New); } case BlockPointer: { QualType New = wrap(C, cast(Old)->getPointeeType(),I); return C.getBlockPointerType(New); } case MemberPointer: { const MemberPointerType *OldMPT = cast(Old); QualType New = wrap(C, OldMPT->getPointeeType(), I); return C.getMemberPointerType(New, OldMPT->getClass()); } case Reference: { const ReferenceType *OldRef = cast(Old); QualType New = wrap(C, OldRef->getPointeeType(), I); if (isa(OldRef)) return C.getLValueReferenceType(New, OldRef->isSpelledAsLValue()); else return C.getRValueReferenceType(New); } } llvm_unreachable("unknown wrapping kind"); } }; } // end anonymous namespace static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &State, AttributeList &Attr, QualType &Type) { Sema &S = State.getSema(); AttributeList::Kind Kind = Attr.getKind(); QualType Desugared = Type; const AttributedType *AT = dyn_cast(Type); while (AT) { AttributedType::Kind CurAttrKind = AT->getAttrKind(); // You cannot specify duplicate type attributes, so if the attribute has // already been applied, flag it. if (getAttrListKind(CurAttrKind) == Kind) { S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << Attr.getName(); return true; } // You cannot have both __sptr and __uptr on the same type, nor can you // have __ptr32 and __ptr64. if ((CurAttrKind == AttributedType::attr_ptr32 && Kind == AttributeList::AT_Ptr64) || (CurAttrKind == AttributedType::attr_ptr64 && Kind == AttributeList::AT_Ptr32)) { S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible) << "'__ptr32'" << "'__ptr64'"; return true; } else if ((CurAttrKind == AttributedType::attr_sptr && Kind == AttributeList::AT_UPtr) || (CurAttrKind == AttributedType::attr_uptr && Kind == AttributeList::AT_SPtr)) { S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible) << "'__sptr'" << "'__uptr'"; return true; } Desugared = AT->getEquivalentType(); AT = dyn_cast(Desugared); } // Pointer type qualifiers can only operate on pointer types, but not // pointer-to-member types. if (!isa(Desugared)) { if (Type->isMemberPointerType()) S.Diag(Attr.getLoc(), diag::err_attribute_no_member_pointers) << Attr.getName(); else S.Diag(Attr.getLoc(), diag::err_attribute_pointers_only) << Attr.getName() << 0; return true; } AttributedType::Kind TAK; switch (Kind) { default: llvm_unreachable("Unknown attribute kind"); case AttributeList::AT_Ptr32: TAK = AttributedType::attr_ptr32; break; case AttributeList::AT_Ptr64: TAK = AttributedType::attr_ptr64; break; case AttributeList::AT_SPtr: TAK = AttributedType::attr_sptr; break; case AttributeList::AT_UPtr: TAK = AttributedType::attr_uptr; break; } Type = S.Context.getAttributedType(TAK, Type, Type); return false; } bool Sema::checkNullabilityTypeSpecifier(QualType &type, NullabilityKind nullability, SourceLocation nullabilityLoc, bool isContextSensitive, bool allowOnArrayType) { recordNullabilitySeen(*this, nullabilityLoc); // Check for existing nullability attributes on the type. QualType desugared = type; while (auto attributed = dyn_cast(desugared.getTypePtr())) { // Check whether there is already a null if (auto existingNullability = attributed->getImmediateNullability()) { // Duplicated nullability. if (nullability == *existingNullability) { Diag(nullabilityLoc, diag::warn_nullability_duplicate) << DiagNullabilityKind(nullability, isContextSensitive) << FixItHint::CreateRemoval(nullabilityLoc); break; } // Conflicting nullability. Diag(nullabilityLoc, diag::err_nullability_conflicting) << DiagNullabilityKind(nullability, isContextSensitive) << DiagNullabilityKind(*existingNullability, false); return true; } desugared = attributed->getModifiedType(); } // If there is already a different nullability specifier, complain. // This (unlike the code above) looks through typedefs that might // have nullability specifiers on them, which means we cannot // provide a useful Fix-It. if (auto existingNullability = desugared->getNullability(Context)) { if (nullability != *existingNullability) { Diag(nullabilityLoc, diag::err_nullability_conflicting) << DiagNullabilityKind(nullability, isContextSensitive) << DiagNullabilityKind(*existingNullability, false); // Try to find the typedef with the existing nullability specifier. if (auto typedefType = desugared->getAs()) { TypedefNameDecl *typedefDecl = typedefType->getDecl(); QualType underlyingType = typedefDecl->getUnderlyingType(); if (auto typedefNullability = AttributedType::stripOuterNullability(underlyingType)) { if (*typedefNullability == *existingNullability) { Diag(typedefDecl->getLocation(), diag::note_nullability_here) << DiagNullabilityKind(*existingNullability, false); } } } return true; } } // If this definitely isn't a pointer type, reject the specifier. if (!desugared->canHaveNullability() && !(allowOnArrayType && desugared->isArrayType())) { Diag(nullabilityLoc, diag::err_nullability_nonpointer) << DiagNullabilityKind(nullability, isContextSensitive) << type; return true; } // For the context-sensitive keywords/Objective-C property // attributes, require that the type be a single-level pointer. if (isContextSensitive) { // Make sure that the pointee isn't itself a pointer type. const Type *pointeeType; if (desugared->isArrayType()) pointeeType = desugared->getArrayElementTypeNoTypeQual(); else pointeeType = desugared->getPointeeType().getTypePtr(); if (pointeeType->isAnyPointerType() || pointeeType->isObjCObjectPointerType() || pointeeType->isMemberPointerType()) { Diag(nullabilityLoc, diag::err_nullability_cs_multilevel) << DiagNullabilityKind(nullability, true) << type; Diag(nullabilityLoc, diag::note_nullability_type_specifier) << DiagNullabilityKind(nullability, false) << type << FixItHint::CreateReplacement(nullabilityLoc, getNullabilitySpelling(nullability)); return true; } } // Form the attributed type. type = Context.getAttributedType( AttributedType::getNullabilityAttrKind(nullability), type, type); return false; } bool Sema::checkObjCKindOfType(QualType &type, SourceLocation loc) { if (isa(type)) { // Build the attributed type to record where __kindof occurred. type = Context.getAttributedType(AttributedType::attr_objc_kindof, type, type); return false; } // Find out if it's an Objective-C object or object pointer type; const ObjCObjectPointerType *ptrType = type->getAs(); const ObjCObjectType *objType = ptrType ? ptrType->getObjectType() : type->getAs(); // If not, we can't apply __kindof. if (!objType) { // FIXME: Handle dependent types that aren't yet object types. Diag(loc, diag::err_objc_kindof_nonobject) << type; return true; } // Rebuild the "equivalent" type, which pushes __kindof down into // the object type. // There is no need to apply kindof on an unqualified id type. QualType equivType = Context.getObjCObjectType( objType->getBaseType(), objType->getTypeArgsAsWritten(), objType->getProtocols(), /*isKindOf=*/objType->isObjCUnqualifiedId() ? false : true); // If we started with an object pointer type, rebuild it. if (ptrType) { equivType = Context.getObjCObjectPointerType(equivType); if (auto nullability = type->getNullability(Context)) { auto attrKind = AttributedType::getNullabilityAttrKind(*nullability); equivType = Context.getAttributedType(attrKind, equivType, equivType); } } // Build the attributed type to record where __kindof occurred. type = Context.getAttributedType(AttributedType::attr_objc_kindof, type, equivType); return false; } /// Map a nullability attribute kind to a nullability kind. static NullabilityKind mapNullabilityAttrKind(AttributeList::Kind kind) { switch (kind) { case AttributeList::AT_TypeNonNull: return NullabilityKind::NonNull; case AttributeList::AT_TypeNullable: return NullabilityKind::Nullable; case AttributeList::AT_TypeNullUnspecified: return NullabilityKind::Unspecified; default: llvm_unreachable("not a nullability attribute kind"); } } /// Distribute a nullability type attribute that cannot be applied to /// the type specifier to a pointer, block pointer, or member pointer /// declarator, complaining if necessary. /// /// \returns true if the nullability annotation was distributed, false /// otherwise. static bool distributeNullabilityTypeAttr(TypeProcessingState &state, QualType type, AttributeList &attr) { Declarator &declarator = state.getDeclarator(); /// Attempt to move the attribute to the specified chunk. auto moveToChunk = [&](DeclaratorChunk &chunk, bool inFunction) -> bool { // If there is already a nullability attribute there, don't add // one. if (hasNullabilityAttr(chunk.getAttrListRef())) return false; // Complain about the nullability qualifier being in the wrong // place. enum { PK_Pointer, PK_BlockPointer, PK_MemberPointer, PK_FunctionPointer, PK_MemberFunctionPointer, } pointerKind = chunk.Kind == DeclaratorChunk::Pointer ? (inFunction ? PK_FunctionPointer : PK_Pointer) : chunk.Kind == DeclaratorChunk::BlockPointer ? PK_BlockPointer : inFunction? PK_MemberFunctionPointer : PK_MemberPointer; auto diag = state.getSema().Diag(attr.getLoc(), diag::warn_nullability_declspec) << DiagNullabilityKind(mapNullabilityAttrKind(attr.getKind()), attr.isContextSensitiveKeywordAttribute()) << type << static_cast(pointerKind); // FIXME: MemberPointer chunks don't carry the location of the *. if (chunk.Kind != DeclaratorChunk::MemberPointer) { diag << FixItHint::CreateRemoval(attr.getLoc()) << FixItHint::CreateInsertion( state.getSema().getPreprocessor() .getLocForEndOfToken(chunk.Loc), " " + attr.getName()->getName().str() + " "); } moveAttrFromListToList(attr, state.getCurrentAttrListRef(), chunk.getAttrListRef()); return true; }; // Move it to the outermost pointer, member pointer, or block // pointer declarator. for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) { DeclaratorChunk &chunk = declarator.getTypeObject(i-1); switch (chunk.Kind) { case DeclaratorChunk::Pointer: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: return moveToChunk(chunk, false); case DeclaratorChunk::Paren: case DeclaratorChunk::Array: continue; case DeclaratorChunk::Function: // Try to move past the return type to a function/block/member // function pointer. if (DeclaratorChunk *dest = maybeMovePastReturnType( declarator, i, /*onlyBlockPointers=*/false)) { return moveToChunk(*dest, true); } return false; // Don't walk through these. case DeclaratorChunk::Reference: case DeclaratorChunk::Pipe: return false; } } return false; } static AttributedType::Kind getCCTypeAttrKind(AttributeList &Attr) { assert(!Attr.isInvalid()); switch (Attr.getKind()) { default: llvm_unreachable("not a calling convention attribute"); case AttributeList::AT_CDecl: return AttributedType::attr_cdecl; case AttributeList::AT_FastCall: return AttributedType::attr_fastcall; case AttributeList::AT_StdCall: return AttributedType::attr_stdcall; case AttributeList::AT_ThisCall: return AttributedType::attr_thiscall; case AttributeList::AT_RegCall: return AttributedType::attr_regcall; case AttributeList::AT_Pascal: return AttributedType::attr_pascal; case AttributeList::AT_SwiftCall: return AttributedType::attr_swiftcall; case AttributeList::AT_VectorCall: return AttributedType::attr_vectorcall; case AttributeList::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, // but the form may not be. StringRef Str; if (Attr.isArgExpr(0)) Str = cast(Attr.getArgAsExpr(0))->getString(); else Str = Attr.getArgAsIdent(0)->Ident->getName(); return llvm::StringSwitch(Str) .Case("aapcs", AttributedType::attr_pcs) .Case("aapcs-vfp", AttributedType::attr_pcs_vfp); } case AttributeList::AT_IntelOclBicc: return AttributedType::attr_inteloclbicc; case AttributeList::AT_MSABI: return AttributedType::attr_ms_abi; case AttributeList::AT_SysVABI: return AttributedType::attr_sysv_abi; case AttributeList::AT_PreserveMost: return AttributedType::attr_preserve_most; case AttributeList::AT_PreserveAll: return AttributedType::attr_preserve_all; } llvm_unreachable("unexpected attribute kind!"); } /// Process an individual function attribute. Returns true to /// indicate that the attribute was handled, false if it wasn't. static bool handleFunctionTypeAttr(TypeProcessingState &state, AttributeList &attr, QualType &type) { Sema &S = state.getSema(); FunctionTypeUnwrapper unwrapped(S, type); if (attr.getKind() == AttributeList::AT_NoReturn) { if (S.CheckNoReturnAttr(attr)) return true; // Delay if this is not a function type. if (!unwrapped.isFunctionType()) return false; // Otherwise we can process right away. FunctionType::ExtInfo EI = unwrapped.get()->getExtInfo().withNoReturn(true); type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI)); return true; } // ns_returns_retained is not always a type attribute, but if we got // here, we're treating it as one right now. if (attr.getKind() == AttributeList::AT_NSReturnsRetained) { assert(S.getLangOpts().ObjCAutoRefCount && "ns_returns_retained treated as type attribute in non-ARC"); if (attr.getNumArgs()) return true; // Delay if this is not a function type. if (!unwrapped.isFunctionType()) return false; FunctionType::ExtInfo EI = unwrapped.get()->getExtInfo().withProducesResult(true); type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI)); return true; } if (attr.getKind() == AttributeList::AT_Regparm) { unsigned value; if (S.CheckRegparmAttr(attr, value)) return true; // Delay if this is not a function type. if (!unwrapped.isFunctionType()) return false; // Diagnose regparm with fastcall. const FunctionType *fn = unwrapped.get(); CallingConv CC = fn->getCallConv(); if (CC == CC_X86FastCall) { S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible) << FunctionType::getNameForCallConv(CC) << "regparm"; attr.setInvalid(); return true; } FunctionType::ExtInfo EI = unwrapped.get()->getExtInfo().withRegParm(value); type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI)); return true; } // Delay if the type didn't work out to a function. if (!unwrapped.isFunctionType()) return false; // Otherwise, a calling convention. CallingConv CC; if (S.CheckCallingConvAttr(attr, CC)) return true; const FunctionType *fn = unwrapped.get(); CallingConv CCOld = fn->getCallConv(); AttributedType::Kind CCAttrKind = getCCTypeAttrKind(attr); if (CCOld != CC) { // Error out on when there's already an attribute on the type // and the CCs don't match. const AttributedType *AT = S.getCallingConvAttributedType(type); if (AT && AT->getAttrKind() != CCAttrKind) { S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible) << FunctionType::getNameForCallConv(CC) << FunctionType::getNameForCallConv(CCOld); attr.setInvalid(); return true; } } // Diagnose use of variadic functions with calling conventions that // don't support them (e.g. because they're callee-cleanup). // We delay warning about this on unprototyped function declarations // until after redeclaration checking, just in case we pick up a // prototype that way. And apparently we also "delay" warning about // unprototyped function types in general, despite not necessarily having // much ability to diagnose it later. if (!supportsVariadicCall(CC)) { const FunctionProtoType *FnP = dyn_cast(fn); if (FnP && FnP->isVariadic()) { unsigned DiagID = diag::err_cconv_varargs; // stdcall and fastcall are ignored with a warning for GCC and MS // compatibility. bool IsInvalid = true; if (CC == CC_X86StdCall || CC == CC_X86FastCall) { DiagID = diag::warn_cconv_varargs; IsInvalid = false; } S.Diag(attr.getLoc(), DiagID) << FunctionType::getNameForCallConv(CC); if (IsInvalid) attr.setInvalid(); return true; } } // Also diagnose fastcall with regparm. if (CC == CC_X86FastCall && fn->getHasRegParm()) { S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible) << "regparm" << FunctionType::getNameForCallConv(CC_X86FastCall); attr.setInvalid(); return true; } // Modify the CC from the wrapped function type, wrap it all back, and then // wrap the whole thing in an AttributedType as written. The modified type // might have a different CC if we ignored the attribute. QualType Equivalent; if (CCOld == CC) { Equivalent = type; } else { auto EI = unwrapped.get()->getExtInfo().withCallingConv(CC); Equivalent = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI)); } type = S.Context.getAttributedType(CCAttrKind, type, Equivalent); return true; } bool Sema::hasExplicitCallingConv(QualType &T) { QualType R = T.IgnoreParens(); while (const AttributedType *AT = dyn_cast(R)) { if (AT->isCallingConv()) return true; R = AT->getModifiedType().IgnoreParens(); } return false; } void Sema::adjustMemberFunctionCC(QualType &T, bool IsStatic, bool IsCtorOrDtor, SourceLocation Loc) { FunctionTypeUnwrapper Unwrapped(*this, T); const FunctionType *FT = Unwrapped.get(); bool IsVariadic = (isa(FT) && cast(FT)->isVariadic()); CallingConv CurCC = FT->getCallConv(); CallingConv ToCC = Context.getDefaultCallingConvention(IsVariadic, !IsStatic); if (CurCC == ToCC) return; // MS compiler ignores explicit calling convention attributes on structors. We // should do the same. if (Context.getTargetInfo().getCXXABI().isMicrosoft() && IsCtorOrDtor) { // Issue a warning on ignored calling convention -- except of __stdcall. // Again, this is what MS compiler does. if (CurCC != CC_X86StdCall) Diag(Loc, diag::warn_cconv_structors) << FunctionType::getNameForCallConv(CurCC); // Default adjustment. } else { // Only adjust types with the default convention. For example, on Windows // we should adjust a __cdecl type to __thiscall for instance methods, and a // __thiscall type to __cdecl for static methods. CallingConv DefaultCC = Context.getDefaultCallingConvention(IsVariadic, IsStatic); if (CurCC != DefaultCC || DefaultCC == ToCC) return; if (hasExplicitCallingConv(T)) return; } FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(ToCC)); QualType Wrapped = Unwrapped.wrap(*this, FT); T = Context.getAdjustedType(T, Wrapped); } /// HandleVectorSizeAttribute - this attribute is only applicable to integral /// and float scalars, although arrays, pointers, and function return values are /// allowed in conjunction with this construct. Aggregates with this attribute /// are invalid, even if they are of the same size as a corresponding scalar. /// The raw attribute should contain precisely 1 argument, the vector size for /// the variable, measured in bytes. If curType and rawAttr are well formed, /// this routine will return a new vector type. static void HandleVectorSizeAttr(QualType& CurType, const AttributeList &Attr, Sema &S) { // Check the attribute arguments. if (Attr.getNumArgs() != 1) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr.getName() << 1; Attr.setInvalid(); return; } Expr *sizeExpr = static_cast(Attr.getArgAsExpr(0)); llvm::APSInt vecSize(32); if (sizeExpr->isTypeDependent() || sizeExpr->isValueDependent() || !sizeExpr->isIntegerConstantExpr(vecSize, S.Context)) { S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName() << AANT_ArgumentIntegerConstant << sizeExpr->getSourceRange(); Attr.setInvalid(); return; } // The base type must be integer (not Boolean or enumeration) or float, and // can't already be a vector. if (!CurType->isBuiltinType() || CurType->isBooleanType() || (!CurType->isIntegerType() && !CurType->isRealFloatingType())) { S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType; Attr.setInvalid(); return; } unsigned typeSize = static_cast(S.Context.getTypeSize(CurType)); // vecSize is specified in bytes - convert to bits. unsigned vectorSize = static_cast(vecSize.getZExtValue() * 8); // the vector size needs to be an integral multiple of the type size. if (vectorSize % typeSize) { S.Diag(Attr.getLoc(), diag::err_attribute_invalid_size) << sizeExpr->getSourceRange(); Attr.setInvalid(); return; } if (VectorType::isVectorSizeTooLarge(vectorSize / typeSize)) { S.Diag(Attr.getLoc(), diag::err_attribute_size_too_large) << sizeExpr->getSourceRange(); Attr.setInvalid(); return; } if (vectorSize == 0) { S.Diag(Attr.getLoc(), diag::err_attribute_zero_size) << sizeExpr->getSourceRange(); Attr.setInvalid(); return; } // Success! Instantiate the vector type, the number of elements is > 0, and // not required to be a power of 2, unlike GCC. CurType = S.Context.getVectorType(CurType, vectorSize/typeSize, VectorType::GenericVector); } /// \brief Process the OpenCL-like ext_vector_type attribute when it occurs on /// a type. static void HandleExtVectorTypeAttr(QualType &CurType, const AttributeList &Attr, Sema &S) { // check the attribute arguments. if (Attr.getNumArgs() != 1) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr.getName() << 1; return; } Expr *sizeExpr; // Special case where the argument is a template id. if (Attr.isArgIdent(0)) { CXXScopeSpec SS; SourceLocation TemplateKWLoc; UnqualifiedId id; id.setIdentifier(Attr.getArgAsIdent(0)->Ident, Attr.getLoc()); ExprResult Size = S.ActOnIdExpression(S.getCurScope(), SS, TemplateKWLoc, id, false, false); if (Size.isInvalid()) return; sizeExpr = Size.get(); } else { sizeExpr = Attr.getArgAsExpr(0); } // Create the vector type. QualType T = S.BuildExtVectorType(CurType, sizeExpr, Attr.getLoc()); if (!T.isNull()) CurType = T; } static bool isPermittedNeonBaseType(QualType &Ty, VectorType::VectorKind VecKind, Sema &S) { const BuiltinType *BTy = Ty->getAs(); if (!BTy) return false; llvm::Triple Triple = S.Context.getTargetInfo().getTriple(); // Signed poly is mathematically wrong, but has been baked into some ABIs by // now. bool IsPolyUnsigned = Triple.getArch() == llvm::Triple::aarch64 || Triple.getArch() == llvm::Triple::aarch64_be; if (VecKind == VectorType::NeonPolyVector) { if (IsPolyUnsigned) { // AArch64 polynomial vectors are unsigned and support poly64. return BTy->getKind() == BuiltinType::UChar || BTy->getKind() == BuiltinType::UShort || BTy->getKind() == BuiltinType::ULong || BTy->getKind() == BuiltinType::ULongLong; } else { // AArch32 polynomial vector are signed. return BTy->getKind() == BuiltinType::SChar || BTy->getKind() == BuiltinType::Short; } } // Non-polynomial vector types: the usual suspects are allowed, as well as // float64_t on AArch64. bool Is64Bit = Triple.getArch() == llvm::Triple::aarch64 || Triple.getArch() == llvm::Triple::aarch64_be; if (Is64Bit && BTy->getKind() == BuiltinType::Double) return true; return BTy->getKind() == BuiltinType::SChar || BTy->getKind() == BuiltinType::UChar || BTy->getKind() == BuiltinType::Short || BTy->getKind() == BuiltinType::UShort || BTy->getKind() == BuiltinType::Int || BTy->getKind() == BuiltinType::UInt || BTy->getKind() == BuiltinType::Long || BTy->getKind() == BuiltinType::ULong || BTy->getKind() == BuiltinType::LongLong || BTy->getKind() == BuiltinType::ULongLong || BTy->getKind() == BuiltinType::Float || BTy->getKind() == BuiltinType::Half; } /// HandleNeonVectorTypeAttr - The "neon_vector_type" and /// "neon_polyvector_type" attributes are used to create vector types that /// are mangled according to ARM's ABI. Otherwise, these types are identical /// to those created with the "vector_size" attribute. Unlike "vector_size" /// the argument to these Neon attributes is the number of vector elements, /// not the vector size in bytes. The vector width and element type must /// match one of the standard Neon vector types. static void HandleNeonVectorTypeAttr(QualType& CurType, const AttributeList &Attr, Sema &S, VectorType::VectorKind VecKind) { // Target must have NEON if (!S.Context.getTargetInfo().hasFeature("neon")) { S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr.getName(); Attr.setInvalid(); return; } // Check the attribute arguments. if (Attr.getNumArgs() != 1) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr.getName() << 1; Attr.setInvalid(); return; } // The number of elements must be an ICE. Expr *numEltsExpr = static_cast(Attr.getArgAsExpr(0)); llvm::APSInt numEltsInt(32); if (numEltsExpr->isTypeDependent() || numEltsExpr->isValueDependent() || !numEltsExpr->isIntegerConstantExpr(numEltsInt, S.Context)) { S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName() << AANT_ArgumentIntegerConstant << numEltsExpr->getSourceRange(); Attr.setInvalid(); return; } // Only certain element types are supported for Neon vectors. if (!isPermittedNeonBaseType(CurType, VecKind, S)) { S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType; Attr.setInvalid(); return; } // The total size of the vector must be 64 or 128 bits. unsigned typeSize = static_cast(S.Context.getTypeSize(CurType)); unsigned numElts = static_cast(numEltsInt.getZExtValue()); unsigned vecSize = typeSize * numElts; if (vecSize != 64 && vecSize != 128) { S.Diag(Attr.getLoc(), diag::err_attribute_bad_neon_vector_size) << CurType; Attr.setInvalid(); return; } CurType = S.Context.getVectorType(CurType, numElts, VecKind); } /// Handle OpenCL Access Qualifier Attribute. static void HandleOpenCLAccessAttr(QualType &CurType, const AttributeList &Attr, Sema &S) { // OpenCL v2.0 s6.6 - Access qualifier can be used only for image and pipe type. if (!(CurType->isImageType() || CurType->isPipeType())) { S.Diag(Attr.getLoc(), diag::err_opencl_invalid_access_qualifier); Attr.setInvalid(); return; } if (const TypedefType* TypedefTy = CurType->getAs()) { QualType PointeeTy = TypedefTy->desugar(); S.Diag(Attr.getLoc(), diag::err_opencl_multiple_access_qualifiers); std::string PrevAccessQual; switch (cast(PointeeTy.getTypePtr())->getKind()) { #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: \ PrevAccessQual = #Access; \ break; #include "clang/Basic/OpenCLImageTypes.def" default: assert(0 && "Unable to find corresponding image type."); } S.Diag(TypedefTy->getDecl()->getLocStart(), diag::note_opencl_typedef_access_qualifier) << PrevAccessQual; } else if (CurType->isPipeType()) { if (Attr.getSemanticSpelling() == OpenCLAccessAttr::Keyword_write_only) { QualType ElemType = CurType->getAs()->getElementType(); CurType = S.Context.getWritePipeType(ElemType); } } } static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, AttributeList *attrs) { // Scan through and apply attributes to this type where it makes sense. Some // attributes (such as __address_space__, __vector_size__, etc) apply to the // type, but others can be present in the type specifiers even though they // apply to the decl. Here we apply type attributes and ignore the rest. bool hasOpenCLAddressSpace = false; while (attrs) { AttributeList &attr = *attrs; attrs = attr.getNext(); // reset to the next here due to early loop continue // stmts // Skip attributes that were marked to be invalid. if (attr.isInvalid()) continue; if (attr.isCXX11Attribute()) { // [[gnu::...]] attributes are treated as declaration attributes, so may // not appertain to a DeclaratorChunk, even if we handle them as type // attributes. if (attr.getScopeName() && attr.getScopeName()->isStr("gnu")) { if (TAL == TAL_DeclChunk) { state.getSema().Diag(attr.getLoc(), diag::warn_cxx11_gnu_attribute_on_type) << attr.getName(); continue; } } else if (TAL != TAL_DeclChunk) { // Otherwise, only consider type processing for a C++11 attribute if // it's actually been applied to a type. continue; } } // If this is an attribute we can handle, do so now, // otherwise, add it to the FnAttrs list for rechaining. switch (attr.getKind()) { default: // A C++11 attribute on a declarator chunk must appertain to a type. if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk) { state.getSema().Diag(attr.getLoc(), diag::err_attribute_not_type_attr) << attr.getName(); attr.setUsedAsTypeAttr(); } break; case AttributeList::UnknownAttribute: if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk) state.getSema().Diag(attr.getLoc(), diag::warn_unknown_attribute_ignored) << attr.getName(); break; case AttributeList::IgnoredAttribute: break; case AttributeList::AT_MayAlias: // FIXME: This attribute needs to actually be handled, but if we ignore // it it breaks large amounts of Linux software. attr.setUsedAsTypeAttr(); break; case AttributeList::AT_OpenCLPrivateAddressSpace: case AttributeList::AT_OpenCLGlobalAddressSpace: case AttributeList::AT_OpenCLLocalAddressSpace: case AttributeList::AT_OpenCLConstantAddressSpace: case AttributeList::AT_OpenCLGenericAddressSpace: case AttributeList::AT_AddressSpace: HandleAddressSpaceTypeAttribute(type, attr, state.getSema()); attr.setUsedAsTypeAttr(); hasOpenCLAddressSpace = true; break; OBJC_POINTER_TYPE_ATTRS_CASELIST: if (!handleObjCPointerTypeAttr(state, attr, type)) distributeObjCPointerTypeAttr(state, attr, type); attr.setUsedAsTypeAttr(); break; case AttributeList::AT_VectorSize: HandleVectorSizeAttr(type, attr, state.getSema()); attr.setUsedAsTypeAttr(); break; case AttributeList::AT_ExtVectorType: HandleExtVectorTypeAttr(type, attr, state.getSema()); attr.setUsedAsTypeAttr(); break; case AttributeList::AT_NeonVectorType: HandleNeonVectorTypeAttr(type, attr, state.getSema(), VectorType::NeonVector); attr.setUsedAsTypeAttr(); break; case AttributeList::AT_NeonPolyVectorType: HandleNeonVectorTypeAttr(type, attr, state.getSema(), VectorType::NeonPolyVector); attr.setUsedAsTypeAttr(); break; case AttributeList::AT_OpenCLAccess: HandleOpenCLAccessAttr(type, attr, state.getSema()); attr.setUsedAsTypeAttr(); break; MS_TYPE_ATTRS_CASELIST: if (!handleMSPointerTypeQualifierAttr(state, attr, type)) attr.setUsedAsTypeAttr(); break; NULLABILITY_TYPE_ATTRS_CASELIST: // Either add nullability here or try to distribute it. We // don't want to distribute the nullability specifier past any // dependent type, because that complicates the user model. if (type->canHaveNullability() || type->isDependentType() || type->isArrayType() || !distributeNullabilityTypeAttr(state, type, attr)) { unsigned endIndex; if (TAL == TAL_DeclChunk) endIndex = state.getCurrentChunkIndex(); else endIndex = state.getDeclarator().getNumTypeObjects(); bool allowOnArrayType = state.getDeclarator().isPrototypeContext() && !hasOuterPointerLikeChunk(state.getDeclarator(), endIndex); if (state.getSema().checkNullabilityTypeSpecifier( type, mapNullabilityAttrKind(attr.getKind()), attr.getLoc(), attr.isContextSensitiveKeywordAttribute(), allowOnArrayType)) { attr.setInvalid(); } attr.setUsedAsTypeAttr(); } break; case AttributeList::AT_ObjCKindOf: // '__kindof' must be part of the decl-specifiers. switch (TAL) { case TAL_DeclSpec: break; case TAL_DeclChunk: case TAL_DeclName: state.getSema().Diag(attr.getLoc(), diag::err_objc_kindof_wrong_position) << FixItHint::CreateRemoval(attr.getLoc()) << FixItHint::CreateInsertion( state.getDeclarator().getDeclSpec().getLocStart(), "__kindof "); break; } // Apply it regardless. if (state.getSema().checkObjCKindOfType(type, attr.getLoc())) attr.setInvalid(); attr.setUsedAsTypeAttr(); break; case AttributeList::AT_NSReturnsRetained: if (!state.getSema().getLangOpts().ObjCAutoRefCount) break; // fallthrough into the function attrs FUNCTION_TYPE_ATTRS_CASELIST: attr.setUsedAsTypeAttr(); // Never process function type attributes as part of the // declaration-specifiers. if (TAL == TAL_DeclSpec) distributeFunctionTypeAttrFromDeclSpec(state, attr, type); // Otherwise, handle the possible delays. else if (!handleFunctionTypeAttr(state, attr, type)) distributeFunctionTypeAttr(state, attr, type); break; } } // If address space is not set, OpenCL 2.0 defines non private default // address spaces for some cases: // OpenCL 2.0, section 6.5: // The address space for a variable at program scope or a static variable // inside a function can either be __global or __constant, but defaults to // __global if not specified. // (...) // Pointers that are declared without pointing to a named address space point // to the generic address space. if (state.getSema().getLangOpts().OpenCLVersion >= 200 && !hasOpenCLAddressSpace && type.getAddressSpace() == 0 && (TAL == TAL_DeclSpec || TAL == TAL_DeclChunk)) { Declarator &D = state.getDeclarator(); if (state.getCurrentChunkIndex() > 0 && D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == DeclaratorChunk::Pointer) { type = state.getSema().Context.getAddrSpaceQualType( type, LangAS::opencl_generic); } else if (state.getCurrentChunkIndex() == 0 && D.getContext() == Declarator::FileContext && !D.isFunctionDeclarator() && !D.isFunctionDefinition() && D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef && !type->isSamplerT()) type = state.getSema().Context.getAddrSpaceQualType( type, LangAS::opencl_global); else if (state.getCurrentChunkIndex() == 0 && D.getContext() == Declarator::BlockContext && D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static) type = state.getSema().Context.getAddrSpaceQualType( type, LangAS::opencl_global); } } void Sema::completeExprArrayBound(Expr *E) { if (DeclRefExpr *DRE = dyn_cast(E->IgnoreParens())) { if (VarDecl *Var = dyn_cast(DRE->getDecl())) { if (isTemplateInstantiation(Var->getTemplateSpecializationKind())) { SourceLocation PointOfInstantiation = E->getExprLoc(); if (MemberSpecializationInfo *MSInfo = Var->getMemberSpecializationInfo()) { // If we don't already have a point of instantiation, this is it. if (MSInfo->getPointOfInstantiation().isInvalid()) { MSInfo->setPointOfInstantiation(PointOfInstantiation); // This is a modification of an existing AST node. Notify // listeners. if (ASTMutationListener *L = getASTMutationListener()) L->StaticDataMemberInstantiated(Var); } } else { VarTemplateSpecializationDecl *VarSpec = cast(Var); if (VarSpec->getPointOfInstantiation().isInvalid()) VarSpec->setPointOfInstantiation(PointOfInstantiation); } InstantiateVariableDefinition(PointOfInstantiation, Var); // Update the type to the newly instantiated definition's type both // here and within the expression. if (VarDecl *Def = Var->getDefinition()) { DRE->setDecl(Def); QualType T = Def->getType(); DRE->setType(T); // FIXME: Update the type on all intervening expressions. E->setType(T); } // We still go on to try to complete the type independently, as it // may also require instantiations or diagnostics if it remains // incomplete. } } } } /// \brief Ensure that the type of the given expression is complete. /// /// This routine checks whether the expression \p E has a complete type. If the /// expression refers to an instantiable construct, that instantiation is /// performed as needed to complete its type. Furthermore /// Sema::RequireCompleteType is called for the expression's type (or in the /// case of a reference type, the referred-to type). /// /// \param E The expression whose type is required to be complete. /// \param Diagnoser The object that will emit a diagnostic if the type is /// incomplete. /// /// \returns \c true if the type of \p E is incomplete and diagnosed, \c false /// otherwise. bool Sema::RequireCompleteExprType(Expr *E, TypeDiagnoser &Diagnoser) { QualType T = E->getType(); // Incomplete array types may be completed by the initializer attached to // their definitions. For static data members of class templates and for // variable templates, we need to instantiate the definition to get this // initializer and complete the type. if (T->isIncompleteArrayType()) { completeExprArrayBound(E); T = E->getType(); } // FIXME: Are there other cases which require instantiating something other // than the type to complete the type of an expression? return RequireCompleteType(E->getExprLoc(), T, Diagnoser); } bool Sema::RequireCompleteExprType(Expr *E, unsigned DiagID) { BoundTypeDiagnoser<> Diagnoser(DiagID); return RequireCompleteExprType(E, Diagnoser); } /// @brief Ensure that the type T is a complete type. /// /// This routine checks whether the type @p T is complete in any /// context where a complete type is required. If @p T is a complete /// type, returns false. If @p T is a class template specialization, /// this routine then attempts to perform class template /// instantiation. If instantiation fails, or if @p T is incomplete /// and cannot be completed, issues the diagnostic @p diag (giving it /// the type @p T) and returns true. /// /// @param Loc The location in the source that the incomplete type /// diagnostic should refer to. /// /// @param T The type that this routine is examining for completeness. /// /// @returns @c true if @p T is incomplete and a diagnostic was emitted, /// @c false otherwise. bool Sema::RequireCompleteType(SourceLocation Loc, QualType T, TypeDiagnoser &Diagnoser) { if (RequireCompleteTypeImpl(Loc, T, &Diagnoser)) return true; if (const TagType *Tag = T->getAs()) { if (!Tag->getDecl()->isCompleteDefinitionRequired()) { Tag->getDecl()->setCompleteDefinitionRequired(); Consumer.HandleTagDeclRequiredDefinition(Tag->getDecl()); } } return false; } /// \brief Determine whether there is any declaration of \p D that was ever a /// definition (perhaps before module merging) and is currently visible. /// \param D The definition of the entity. /// \param Suggested Filled in with the declaration that should be made visible /// in order to provide a definition of this entity. /// \param OnlyNeedComplete If \c true, we only need the type to be complete, /// not defined. This only matters for enums with a fixed underlying /// type, since in all other cases, a type is complete if and only if it /// is defined. bool Sema::hasVisibleDefinition(NamedDecl *D, NamedDecl **Suggested, bool OnlyNeedComplete) { // Easy case: if we don't have modules, all declarations are visible. if (!getLangOpts().Modules && !getLangOpts().ModulesLocalVisibility) return true; // If this definition was instantiated from a template, map back to the // pattern from which it was instantiated. if (isa(D) && cast(D)->isBeingDefined()) { // We're in the middle of defining it; this definition should be treated // as visible. return true; } else if (auto *RD = dyn_cast(D)) { if (auto *Pattern = RD->getTemplateInstantiationPattern()) RD = Pattern; D = RD->getDefinition(); } else if (auto *ED = dyn_cast(D)) { if (auto *Pattern = ED->getTemplateInstantiationPattern()) ED = Pattern; if (OnlyNeedComplete && ED->isFixed()) { // If the enum has a fixed underlying type, and we're only looking for a // complete type (not a definition), any visible declaration of it will // do. *Suggested = nullptr; for (auto *Redecl : ED->redecls()) { if (isVisible(Redecl)) return true; if (Redecl->isThisDeclarationADefinition() || (Redecl->isCanonicalDecl() && !*Suggested)) *Suggested = Redecl; } return false; } D = ED->getDefinition(); } else if (auto *FD = dyn_cast(D)) { if (auto *Pattern = FD->getTemplateInstantiationPattern()) FD = Pattern; D = FD->getDefinition(); } else if (auto *VD = dyn_cast(D)) { if (auto *Pattern = VD->getTemplateInstantiationPattern()) VD = Pattern; D = VD->getDefinition(); } assert(D && "missing definition for pattern of instantiated definition"); *Suggested = D; if (isVisible(D)) return true; // The external source may have additional definitions of this entity that are // visible, so complete the redeclaration chain now and ask again. if (auto *Source = Context.getExternalSource()) { Source->CompleteRedeclChain(D); return isVisible(D); } return false; } /// Locks in the inheritance model for the given class and all of its bases. static void assignInheritanceModel(Sema &S, CXXRecordDecl *RD) { RD = RD->getMostRecentDecl(); if (!RD->hasAttr()) { MSInheritanceAttr::Spelling IM; switch (S.MSPointerToMemberRepresentationMethod) { case LangOptions::PPTMK_BestCase: IM = RD->calculateInheritanceModel(); break; case LangOptions::PPTMK_FullGeneralitySingleInheritance: IM = MSInheritanceAttr::Keyword_single_inheritance; break; case LangOptions::PPTMK_FullGeneralityMultipleInheritance: IM = MSInheritanceAttr::Keyword_multiple_inheritance; break; case LangOptions::PPTMK_FullGeneralityVirtualInheritance: IM = MSInheritanceAttr::Keyword_unspecified_inheritance; break; } RD->addAttr(MSInheritanceAttr::CreateImplicit( S.getASTContext(), IM, /*BestCase=*/S.MSPointerToMemberRepresentationMethod == LangOptions::PPTMK_BestCase, S.ImplicitMSInheritanceAttrLoc.isValid() ? S.ImplicitMSInheritanceAttrLoc : RD->getSourceRange())); S.Consumer.AssignInheritanceModel(RD); } } /// \brief The implementation of RequireCompleteType bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T, TypeDiagnoser *Diagnoser) { // FIXME: Add this assertion to make sure we always get instantiation points. // assert(!Loc.isInvalid() && "Invalid location in RequireCompleteType"); // FIXME: Add this assertion to help us flush out problems with // checking for dependent types and type-dependent expressions. // // assert(!T->isDependentType() && // "Can't ask whether a dependent type is complete"); // We lock in the inheritance model once somebody has asked us to ensure // that a pointer-to-member type is complete. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { if (const MemberPointerType *MPTy = T->getAs()) { if (!MPTy->getClass()->isDependentType()) { (void)isCompleteType(Loc, QualType(MPTy->getClass(), 0)); assignInheritanceModel(*this, MPTy->getMostRecentCXXRecordDecl()); } } } NamedDecl *Def = nullptr; bool Incomplete = T->isIncompleteType(&Def); // Check that any necessary explicit specializations are visible. For an // enum, we just need the declaration, so don't check this. if (Def && !isa(Def)) checkSpecializationVisibility(Loc, Def); // If we have a complete type, we're done. if (!Incomplete) { // If we know about the definition but it is not visible, complain. NamedDecl *SuggestedDef = nullptr; if (Def && !hasVisibleDefinition(Def, &SuggestedDef, /*OnlyNeedComplete*/true)) { // If the user is going to see an error here, recover by making the // definition visible. bool TreatAsComplete = Diagnoser && !isSFINAEContext(); if (Diagnoser) diagnoseMissingImport(Loc, SuggestedDef, MissingImportKind::Definition, /*Recover*/TreatAsComplete); return !TreatAsComplete; } return false; } const TagType *Tag = T->getAs(); const ObjCInterfaceType *IFace = T->getAs(); // If there's an unimported definition of this type in a module (for // instance, because we forward declared it, then imported the definition), // import that definition now. // // FIXME: What about other cases where an import extends a redeclaration // chain for a declaration that can be accessed through a mechanism other // than name lookup (eg, referenced in a template, or a variable whose type // could be completed by the module)? // // FIXME: Should we map through to the base array element type before // checking for a tag type? if (Tag || IFace) { NamedDecl *D = Tag ? static_cast(Tag->getDecl()) : IFace->getDecl(); // Avoid diagnosing invalid decls as incomplete. if (D->isInvalidDecl()) return true; // Give the external AST source a chance to complete the type. if (auto *Source = Context.getExternalSource()) { if (Tag) Source->CompleteType(Tag->getDecl()); else Source->CompleteType(IFace->getDecl()); // If the external source completed the type, go through the motions // again to ensure we're allowed to use the completed type. if (!T->isIncompleteType()) return RequireCompleteTypeImpl(Loc, T, Diagnoser); } } // If we have a class template specialization or a class member of a // class template specialization, or an array with known size of such, // try to instantiate it. QualType MaybeTemplate = T; while (const ConstantArrayType *Array = Context.getAsConstantArrayType(MaybeTemplate)) MaybeTemplate = Array->getElementType(); if (const RecordType *Record = MaybeTemplate->getAs()) { bool Instantiated = false; bool Diagnosed = false; if (ClassTemplateSpecializationDecl *ClassTemplateSpec = dyn_cast(Record->getDecl())) { if (ClassTemplateSpec->getSpecializationKind() == TSK_Undeclared) { Diagnosed = InstantiateClassTemplateSpecialization( Loc, ClassTemplateSpec, TSK_ImplicitInstantiation, /*Complain=*/Diagnoser); Instantiated = true; } } else if (CXXRecordDecl *Rec = dyn_cast(Record->getDecl())) { CXXRecordDecl *Pattern = Rec->getInstantiatedFromMemberClass(); if (!Rec->isBeingDefined() && Pattern) { MemberSpecializationInfo *MSI = Rec->getMemberSpecializationInfo(); assert(MSI && "Missing member specialization information?"); // This record was instantiated from a class within a template. if (MSI->getTemplateSpecializationKind() != TSK_ExplicitSpecialization) { Diagnosed = InstantiateClass(Loc, Rec, Pattern, getTemplateInstantiationArgs(Rec), TSK_ImplicitInstantiation, /*Complain=*/Diagnoser); Instantiated = true; } } } if (Instantiated) { // Instantiate* might have already complained that the template is not // defined, if we asked it to. if (Diagnoser && Diagnosed) return true; // If we instantiated a definition, check that it's usable, even if // instantiation produced an error, so that repeated calls to this // function give consistent answers. if (!T->isIncompleteType()) return RequireCompleteTypeImpl(Loc, T, Diagnoser); } } // FIXME: If we didn't instantiate a definition because of an explicit // specialization declaration, check that it's visible. if (!Diagnoser) return true; Diagnoser->diagnose(*this, Loc, T); // If the type was a forward declaration of a class/struct/union // type, produce a note. if (Tag && !Tag->getDecl()->isInvalidDecl()) Diag(Tag->getDecl()->getLocation(), Tag->isBeingDefined() ? diag::note_type_being_defined : diag::note_forward_declaration) << QualType(Tag, 0); // If the Objective-C class was a forward declaration, produce a note. if (IFace && !IFace->getDecl()->isInvalidDecl()) Diag(IFace->getDecl()->getLocation(), diag::note_forward_class); // If we have external information that we can use to suggest a fix, // produce a note. if (ExternalSource) ExternalSource->MaybeDiagnoseMissingCompleteType(Loc, T); return true; } bool Sema::RequireCompleteType(SourceLocation Loc, QualType T, unsigned DiagID) { BoundTypeDiagnoser<> Diagnoser(DiagID); return RequireCompleteType(Loc, T, Diagnoser); } /// \brief Get diagnostic %select index for tag kind for /// literal type diagnostic message. /// WARNING: Indexes apply to particular diagnostics only! /// /// \returns diagnostic %select index. static unsigned getLiteralDiagFromTagKind(TagTypeKind Tag) { switch (Tag) { case TTK_Struct: return 0; case TTK_Interface: return 1; case TTK_Class: return 2; default: llvm_unreachable("Invalid tag kind for literal type diagnostic!"); } } /// @brief Ensure that the type T is a literal type. /// /// This routine checks whether the type @p T is a literal type. If @p T is an /// incomplete type, an attempt is made to complete it. If @p T is a literal /// type, or @p AllowIncompleteType is true and @p T is an incomplete type, /// returns false. Otherwise, this routine issues the diagnostic @p PD (giving /// it the type @p T), along with notes explaining why the type is not a /// literal type, and returns true. /// /// @param Loc The location in the source that the non-literal type /// diagnostic should refer to. /// /// @param T The type that this routine is examining for literalness. /// /// @param Diagnoser Emits a diagnostic if T is not a literal type. /// /// @returns @c true if @p T is not a literal type and a diagnostic was emitted, /// @c false otherwise. bool Sema::RequireLiteralType(SourceLocation Loc, QualType T, TypeDiagnoser &Diagnoser) { assert(!T->isDependentType() && "type should not be dependent"); QualType ElemType = Context.getBaseElementType(T); if ((isCompleteType(Loc, ElemType) || ElemType->isVoidType()) && T->isLiteralType(Context)) return false; Diagnoser.diagnose(*this, Loc, T); if (T->isVariableArrayType()) return true; const RecordType *RT = ElemType->getAs(); if (!RT) return true; const CXXRecordDecl *RD = cast(RT->getDecl()); // A partially-defined class type can't be a literal type, because a literal // class type must have a trivial destructor (which can't be checked until // the class definition is complete). if (RequireCompleteType(Loc, ElemType, diag::note_non_literal_incomplete, T)) return true; // If the class has virtual base classes, then it's not an aggregate, and // cannot have any constexpr constructors or a trivial default constructor, // so is non-literal. This is better to diagnose than the resulting absence // of constexpr constructors. if (RD->getNumVBases()) { Diag(RD->getLocation(), diag::note_non_literal_virtual_base) << getLiteralDiagFromTagKind(RD->getTagKind()) << RD->getNumVBases(); for (const auto &I : RD->vbases()) Diag(I.getLocStart(), diag::note_constexpr_virtual_base_here) << I.getSourceRange(); } else if (!RD->isAggregate() && !RD->hasConstexprNonCopyMoveConstructor() && !RD->hasTrivialDefaultConstructor()) { Diag(RD->getLocation(), diag::note_non_literal_no_constexpr_ctors) << RD; } else if (RD->hasNonLiteralTypeFieldsOrBases()) { for (const auto &I : RD->bases()) { if (!I.getType()->isLiteralType(Context)) { Diag(I.getLocStart(), diag::note_non_literal_base_class) << RD << I.getType() << I.getSourceRange(); return true; } } for (const auto *I : RD->fields()) { if (!I->getType()->isLiteralType(Context) || I->getType().isVolatileQualified()) { Diag(I->getLocation(), diag::note_non_literal_field) << RD << I << I->getType() << I->getType().isVolatileQualified(); return true; } } } else if (!RD->hasTrivialDestructor()) { // All fields and bases are of literal types, so have trivial destructors. // If this class's destructor is non-trivial it must be user-declared. CXXDestructorDecl *Dtor = RD->getDestructor(); assert(Dtor && "class has literal fields and bases but no dtor?"); if (!Dtor) return true; Diag(Dtor->getLocation(), Dtor->isUserProvided() ? diag::note_non_literal_user_provided_dtor : diag::note_non_literal_nontrivial_dtor) << RD; if (!Dtor->isUserProvided()) SpecialMemberIsTrivial(Dtor, CXXDestructor, /*Diagnose*/true); } return true; } bool Sema::RequireLiteralType(SourceLocation Loc, QualType T, unsigned DiagID) { BoundTypeDiagnoser<> Diagnoser(DiagID); return RequireLiteralType(Loc, T, Diagnoser); } /// \brief Retrieve a version of the type 'T' that is elaborated by Keyword /// and qualified by the nested-name-specifier contained in SS. QualType Sema::getElaboratedType(ElaboratedTypeKeyword Keyword, const CXXScopeSpec &SS, QualType T) { if (T.isNull()) return T; NestedNameSpecifier *NNS; if (SS.isValid()) NNS = SS.getScopeRep(); else { if (Keyword == ETK_None) return T; NNS = nullptr; } return Context.getElaboratedType(Keyword, NNS, T); } QualType Sema::BuildTypeofExprType(Expr *E, SourceLocation Loc) { ExprResult ER = CheckPlaceholderExpr(E); if (ER.isInvalid()) return QualType(); E = ER.get(); if (!getLangOpts().CPlusPlus && E->refersToBitField()) Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 2; if (!E->isTypeDependent()) { QualType T = E->getType(); if (const TagType *TT = T->getAs()) DiagnoseUseOfDecl(TT->getDecl(), E->getExprLoc()); } return Context.getTypeOfExprType(E); } /// getDecltypeForExpr - Given an expr, will return the decltype for /// that expression, according to the rules in C++11 /// [dcl.type.simple]p4 and C++11 [expr.lambda.prim]p18. static QualType getDecltypeForExpr(Sema &S, Expr *E) { if (E->isTypeDependent()) return S.Context.DependentTy; // C++11 [dcl.type.simple]p4: // The type denoted by decltype(e) is defined as follows: // // - if e is an unparenthesized id-expression or an unparenthesized class // member access (5.2.5), decltype(e) is the type of the entity named // by e. If there is no such entity, or if e names a set of overloaded // functions, the program is ill-formed; // // We apply the same rules for Objective-C ivar and property references. if (const DeclRefExpr *DRE = dyn_cast(E)) { if (const ValueDecl *VD = dyn_cast(DRE->getDecl())) return VD->getType(); } else if (const MemberExpr *ME = dyn_cast(E)) { if (const FieldDecl *FD = dyn_cast(ME->getMemberDecl())) return FD->getType(); } else if (const ObjCIvarRefExpr *IR = dyn_cast(E)) { return IR->getDecl()->getType(); } else if (const ObjCPropertyRefExpr *PR = dyn_cast(E)) { if (PR->isExplicitProperty()) return PR->getExplicitProperty()->getType(); } else if (auto *PE = dyn_cast(E)) { return PE->getType(); } // C++11 [expr.lambda.prim]p18: // Every occurrence of decltype((x)) where x is a possibly // parenthesized id-expression that names an entity of automatic // storage duration is treated as if x were transformed into an // access to a corresponding data member of the closure type that // would have been declared if x were an odr-use of the denoted // entity. using namespace sema; if (S.getCurLambda()) { if (isa(E)) { if (DeclRefExpr *DRE = dyn_cast(E->IgnoreParens())) { if (VarDecl *Var = dyn_cast(DRE->getDecl())) { QualType T = S.getCapturedDeclRefType(Var, DRE->getLocation()); if (!T.isNull()) return S.Context.getLValueReferenceType(T); } } } } // C++11 [dcl.type.simple]p4: // [...] QualType T = E->getType(); switch (E->getValueKind()) { // - otherwise, if e is an xvalue, decltype(e) is T&&, where T is the // type of e; case VK_XValue: T = S.Context.getRValueReferenceType(T); break; // - otherwise, if e is an lvalue, decltype(e) is T&, where T is the // type of e; case VK_LValue: T = S.Context.getLValueReferenceType(T); break; // - otherwise, decltype(e) is the type of e. case VK_RValue: break; } return T; } QualType Sema::BuildDecltypeType(Expr *E, SourceLocation Loc, bool AsUnevaluated) { ExprResult ER = CheckPlaceholderExpr(E); if (ER.isInvalid()) return QualType(); E = ER.get(); if (AsUnevaluated && ActiveTemplateInstantiations.empty() && E->HasSideEffects(Context, false)) { // The expression operand for decltype is in an unevaluated expression // context, so side effects could result in unintended consequences. Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context); } return Context.getDecltypeType(E, getDecltypeForExpr(*this, E)); } QualType Sema::BuildUnaryTransformType(QualType BaseType, UnaryTransformType::UTTKind UKind, SourceLocation Loc) { switch (UKind) { case UnaryTransformType::EnumUnderlyingType: if (!BaseType->isDependentType() && !BaseType->isEnumeralType()) { Diag(Loc, diag::err_only_enums_have_underlying_types); return QualType(); } else { QualType Underlying = BaseType; if (!BaseType->isDependentType()) { // The enum could be incomplete if we're parsing its definition or // recovering from an error. NamedDecl *FwdDecl = nullptr; if (BaseType->isIncompleteType(&FwdDecl)) { Diag(Loc, diag::err_underlying_type_of_incomplete_enum) << BaseType; Diag(FwdDecl->getLocation(), diag::note_forward_declaration) << FwdDecl; return QualType(); } EnumDecl *ED = BaseType->getAs()->getDecl(); assert(ED && "EnumType has no EnumDecl"); DiagnoseUseOfDecl(ED, Loc); Underlying = ED->getIntegerType(); assert(!Underlying.isNull()); } return Context.getUnaryTransformType(BaseType, Underlying, UnaryTransformType::EnumUnderlyingType); } } llvm_unreachable("unknown unary transform type"); } QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) { if (!T->isDependentType()) { // FIXME: It isn't entirely clear whether incomplete atomic types // are allowed or not; for simplicity, ban them for the moment. if (RequireCompleteType(Loc, T, diag::err_atomic_specifier_bad_type, 0)) return QualType(); int DisallowedKind = -1; if (T->isArrayType()) DisallowedKind = 1; else if (T->isFunctionType()) DisallowedKind = 2; else if (T->isReferenceType()) DisallowedKind = 3; else if (T->isAtomicType()) DisallowedKind = 4; else if (T.hasQualifiers()) DisallowedKind = 5; else if (!T.isTriviallyCopyableType(Context)) // Some other non-trivially-copyable type (probably a C++ class) DisallowedKind = 6; if (DisallowedKind != -1) { Diag(Loc, diag::err_atomic_specifier_bad_type) << DisallowedKind << T; return QualType(); } // FIXME: Do we need any handling for ARC here? } // Build the pointer type. return Context.getAtomicType(T); } Index: projects/clang400-import/contrib/llvm/tools/clang =================================================================== --- projects/clang400-import/contrib/llvm/tools/clang (revision 314176) +++ projects/clang400-import/contrib/llvm/tools/clang (revision 314177) Property changes on: projects/clang400-import/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/clang/dist:r313892-314175 Index: projects/clang400-import/contrib/llvm/tools/lld =================================================================== --- projects/clang400-import/contrib/llvm/tools/lld (revision 314176) +++ projects/clang400-import/contrib/llvm/tools/lld (revision 314177) Property changes on: projects/clang400-import/contrib/llvm/tools/lld ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/lld/dist:r313892-314175 Index: projects/clang400-import/contrib/llvm/tools/lldb =================================================================== --- projects/clang400-import/contrib/llvm/tools/lldb (revision 314176) +++ projects/clang400-import/contrib/llvm/tools/lldb (revision 314177) Property changes on: projects/clang400-import/contrib/llvm/tools/lldb ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/lldb/dist:r313892-314175 Index: projects/clang400-import/contrib/llvm =================================================================== --- projects/clang400-import/contrib/llvm (revision 314176) +++ projects/clang400-import/contrib/llvm (revision 314177) Property changes on: projects/clang400-import/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /vendor/llvm/dist:r313892-314175 Index: projects/clang400-import/lib/clang/include/clang/Basic/Version.inc =================================================================== --- projects/clang400-import/lib/clang/include/clang/Basic/Version.inc (revision 314176) +++ projects/clang400-import/lib/clang/include/clang/Basic/Version.inc (revision 314177) @@ -1,11 +1,11 @@ /* $FreeBSD$ */ #define CLANG_VERSION 4.0.0 #define CLANG_VERSION_STRING "4.0.0" #define CLANG_VERSION_MAJOR 4 #define CLANG_VERSION_MINOR 0 #define CLANG_VERSION_PATCHLEVEL 0 #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "295380" +#define SVN_REVISION "296002" Index: projects/clang400-import/lib/clang/include/lld/Config/Version.inc =================================================================== --- projects/clang400-import/lib/clang/include/lld/Config/Version.inc (revision 314176) +++ projects/clang400-import/lib/clang/include/lld/Config/Version.inc (revision 314177) @@ -1,8 +1,8 @@ // $FreeBSD$ #define LLD_VERSION 4.0.0 #define LLD_VERSION_STRING "4.0.0" #define LLD_VERSION_MAJOR 4 #define LLD_VERSION_MINOR 0 -#define LLD_REVISION_STRING "295380" +#define LLD_REVISION_STRING "296002" #define LLD_REPOSITORY_STRING "FreeBSD"